From 35ea1c9256ee4b82b0fa68f1b51e30128547f5b9 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Tue, 13 Dec 2016 19:41:52 +0000
Subject: MAINT: retranspile lapack from the supposed source

The source used is http://archive.debian.org/debian/pool/main/l/lapack3/lapack3_3.0.20000531a.orig.tar.gz. Originally this was done with a patched f2c, but if the patch isn't provided in the source tree, there's no sensible way to use it
---
 numpy/linalg/lapack_lite/blas_lite.c    | 13492 +----
 numpy/linalg/lapack_lite/dlapack_lite.c | 85031 ++++--------------------------
 numpy/linalg/lapack_lite/zlapack_lite.c |  4778 +-
 3 files changed, 13492 insertions(+), 89809 deletions(-)

diff --git a/numpy/linalg/lapack_lite/blas_lite.c b/numpy/linalg/lapack_lite/blas_lite.c
index 3ac680167..87322236c 100644
--- a/numpy/linalg/lapack_lite/blas_lite.c
+++ b/numpy/linalg/lapack_lite/blas_lite.c
@@ -20,41 +20,36 @@ extern doublereal dlapy2_(doublereal *x, doublereal *y);
 
 /* Table of constant values */
 
-static complex c_b21 = {1.f,0.f};
 static integer c__1 = 1;
-static doublecomplex c_b1077 = {1.,0.};
+static doublecomplex c_b359 = {1.,0.};
 
-/* Subroutine */ int caxpy_(integer *n, complex *ca, complex *cx, integer *
-	incx, complex *cy, integer *incy)
+/* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx,
+	integer *incx, doublereal *dy, integer *incy)
 {
     /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    real r__1, r__2;
-    complex q__1, q__2;
-
-    /* Builtin functions */
-    double r_imag(complex *);
+    integer i__1;
 
     /* Local variables */
-    static integer i__, ix, iy;
+    static integer i__, m, ix, iy, mp1;
 
 
 /*
        constant times a vector plus a vector.
+       uses unrolled loops for increments equal to one.
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
 */
 
 
     /* Parameter adjustments */
-    --cy;
-    --cx;
+    --dy;
+    --dx;
 
     /* Function Body */
     if (*n <= 0) {
 	return 0;
     }
-    if ((r__1 = ca->r, dabs(r__1)) + (r__2 = r_imag(ca), dabs(r__2)) == 0.f) {
+    if (*da == 0.) {
 	return 0;
     }
     if (*incx == 1 && *incy == 1) {
@@ -76,56 +71,86 @@ static doublecomplex c_b1077 = {1.,0.};
     }
     i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = iy;
-	i__3 = iy;
-	i__4 = ix;
-	q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[
-		i__4].i + ca->i * cx[i__4].r;
-	q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i;
-	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
+	dy[iy] += *da * dx[ix];
 	ix += *incx;
 	iy += *incy;
 /* L10: */
     }
     return 0;
 
-/*        code for both increments equal to 1 */
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
+*/
 
 L20:
-    i__1 = *n;
+    m = *n % 4;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	i__3 = i__;
-	i__4 = i__;
-	q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[
-		i__4].i + ca->i * cx[i__4].r;
-	q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i;
-	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
+	dy[i__] += *da * dx[i__];
 /* L30: */
+    }
+    if (*n < 4) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 4) {
+	dy[i__] += *da * dx[i__];
+	dy[i__ + 1] += *da * dx[i__ + 1];
+	dy[i__ + 2] += *da * dx[i__ + 2];
+	dy[i__ + 3] += *da * dx[i__ + 3];
+/* L50: */
     }
     return 0;
-} /* caxpy_ */
+} /* daxpy_ */
 
-/* Subroutine */ int ccopy_(integer *n, complex *cx, integer *incx, complex *
-	cy, integer *incy)
+doublereal dcabs1_(doublecomplex *z__)
 {
     /* System generated locals */
-    integer i__1, i__2, i__3;
+    doublereal ret_val;
+    static doublecomplex equiv_0[1];
 
     /* Local variables */
-    static integer i__, ix, iy;
+#define t ((doublereal *)equiv_0)
+#define zz (equiv_0)
+
+    zz->r = z__->r, zz->i = z__->i;
+    ret_val = abs(t[0]) + abs(t[1]);
+    return ret_val;
+} /* dcabs1_ */
+
+#undef zz
+#undef t
+
+
+/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx,
+	doublereal *dy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
 
 
 /*
        copies a vector, x, to a vector, y.
+       uses unrolled loops for increments equal to one.
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
 */
 
 
     /* Parameter adjustments */
-    --cy;
-    --cx;
+    --dy;
+    --dx;
 
     /* Function Body */
     if (*n <= 0) {
@@ -150,138 +175,78 @@ L20:
     }
     i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = iy;
-	i__3 = ix;
-	cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i;
+	dy[iy] = dx[ix];
 	ix += *incx;
 	iy += *incy;
 /* L10: */
     }
     return 0;
 
-/*        code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	i__3 = i__;
-	cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i;
-/* L30: */
-    }
-    return 0;
-} /* ccopy_ */
-
-/* Complex */ VOID cdotc_(complex * ret_val, integer *n, complex *cx, integer
-	*incx, complex *cy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static complex ctemp;
-
-
 /*
-       forms the dot product of two vectors, conjugating the first
-       vector.
-       jack dongarra, linpack,  3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --cy;
-    --cx;
+          code for both increments equal to 1
 
-    /* Function Body */
-    ctemp.r = 0.f, ctemp.i = 0.f;
-     ret_val->r = 0.f,  ret_val->i = 0.f;
-    if (*n <= 0) {
-	return ;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
 
-/*
-          code for unequal increments or equal increments
-            not equal to 1
+          clean-up loop
 */
 
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
+L20:
+    m = *n % 7;
+    if (m == 0) {
+	goto L40;
     }
-    i__1 = *n;
+    i__1 = m;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	r_cnjg(&q__3, &cx[ix]);
-	i__2 = iy;
-	q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r *
-		cy[i__2].i + q__3.i * cy[i__2].r;
-	q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
-	ctemp.r = q__1.r, ctemp.i = q__1.i;
-	ix += *incx;
-	iy += *incy;
-/* L10: */
+	dy[i__] = dx[i__];
+/* L30: */
     }
-     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
-    return ;
-
-/*        code for both increments equal to 1 */
-
-L20:
+    if (*n < 7) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
     i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	r_cnjg(&q__3, &cx[i__]);
-	i__2 = i__;
-	q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r *
-		cy[i__2].i + q__3.i * cy[i__2].r;
-	q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
-	ctemp.r = q__1.r, ctemp.i = q__1.i;
-/* L30: */
+    for (i__ = mp1; i__ <= i__1; i__ += 7) {
+	dy[i__] = dx[i__];
+	dy[i__ + 1] = dx[i__ + 1];
+	dy[i__ + 2] = dx[i__ + 2];
+	dy[i__ + 3] = dx[i__ + 3];
+	dy[i__ + 4] = dx[i__ + 4];
+	dy[i__ + 5] = dx[i__ + 5];
+	dy[i__ + 6] = dx[i__ + 6];
+/* L50: */
     }
-     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
-    return ;
-} /* cdotc_ */
+    return 0;
+} /* dcopy_ */
 
-/* Complex */ VOID cdotu_(complex * ret_val, integer *n, complex *cx, integer
-	*incx, complex *cy, integer *incy)
+doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
+	integer *incy)
 {
     /* System generated locals */
-    integer i__1, i__2, i__3;
-    complex q__1, q__2;
+    integer i__1;
+    doublereal ret_val;
 
     /* Local variables */
-    static integer i__, ix, iy;
-    static complex ctemp;
+    static integer i__, m, ix, iy, mp1;
+    static doublereal dtemp;
 
 
 /*
        forms the dot product of two vectors.
+       uses unrolled loops for increments equal to one.
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
 */
 
 
     /* Parameter adjustments */
-    --cy;
-    --cx;
+    --dy;
+    --dx;
 
     /* Function Body */
-    ctemp.r = 0.f, ctemp.i = 0.f;
-     ret_val->r = 0.f,  ret_val->i = 0.f;
+    ret_val = 0.;
+    dtemp = 0.;
     if (*n <= 0) {
-	return ;
+	return ret_val;
     }
     if (*incx == 1 && *incy == 1) {
 	goto L20;
@@ -302,53 +267,61 @@ L20:
     }
     i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = ix;
-	i__3 = iy;
-	q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i =
-		cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r;
-	q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
-	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	dtemp += dx[ix] * dy[iy];
 	ix += *incx;
 	iy += *incy;
 /* L10: */
     }
-     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
-    return ;
+    ret_val = dtemp;
+    return ret_val;
 
-/*        code for both increments equal to 1 */
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
+*/
 
 L20:
-    i__1 = *n;
+    m = *n % 5;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	i__3 = i__;
-	q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i =
-		cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r;
-	q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
-	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	dtemp += dx[i__] * dy[i__];
 /* L30: */
     }
-     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
-    return ;
-} /* cdotu_ */
+    if (*n < 5) {
+	goto L60;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 5) {
+	dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[
+		i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ +
+		4] * dy[i__ + 4];
+/* L50: */
+    }
+L60:
+    ret_val = dtemp;
+    return ret_val;
+} /* ddot_ */
 
-/* Subroutine */ int cgemm_(char *transa, char *transb, integer *m, integer *
-	n, integer *k, complex *alpha, complex *a, integer *lda, complex *b,
-	integer *ldb, complex *beta, complex *c__, integer *ldc)
+/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer *
+	n, integer *k, doublereal *alpha, doublereal *a, integer *lda,
+	doublereal *b, integer *ldb, doublereal *beta, doublereal *c__,
+	integer *ldc)
 {
     /* System generated locals */
     integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
-	    i__3, i__4, i__5, i__6;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+	    i__3;
 
     /* Local variables */
     static integer i__, j, l, info;
     static logical nota, notb;
-    static complex temp;
-    static logical conja, conjb;
+    static doublereal temp;
     static integer ncola;
     extern logical lsame_(char *, char *);
     static integer nrowa, nrowb;
@@ -359,13 +332,13 @@ L20:
     Purpose
     =======
 
-    CGEMM  performs one of the matrix-matrix operations
+    DGEMM  performs one of the matrix-matrix operations
 
        C := alpha*op( A )*op( B ) + beta*C,
 
     where  op( X ) is one of
 
-       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
+       op( X ) = X   or   op( X ) = X',
 
     alpha and beta are scalars, and A, B and C are matrices, with op( A )
     an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
@@ -381,7 +354,7 @@ L20:
 
                 TRANSA = 'T' or 't',  op( A ) = A'.
 
-                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).
+                TRANSA = 'C' or 'c',  op( A ) = A'.
 
              Unchanged on exit.
 
@@ -393,7 +366,7 @@ L20:
 
                 TRANSB = 'T' or 't',  op( B ) = B'.
 
-                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).
+                TRANSB = 'C' or 'c',  op( B ) = B'.
 
              Unchanged on exit.
 
@@ -414,11 +387,11 @@ L20:
              be at least  zero.
              Unchanged on exit.
 
-    ALPHA  - COMPLEX         .
+    ALPHA  - DOUBLE PRECISION.
              On entry, ALPHA specifies the scalar alpha.
              Unchanged on exit.
 
-    A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
              k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
              Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
              part of the array  A  must contain the matrix  A,  otherwise
@@ -433,7 +406,7 @@ L20:
              least  max( 1, k ).
              Unchanged on exit.
 
-    B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is
+    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
              n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
              Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
              part of the array  B  must contain the matrix  B,  otherwise
@@ -448,12 +421,12 @@ L20:
              least  max( 1, n ).
              Unchanged on exit.
 
-    BETA   - COMPLEX         .
+    BETA   - DOUBLE PRECISION.
              On entry,  BETA  specifies the scalar  beta.  When  BETA  is
              supplied as zero then C need not be set on input.
              Unchanged on exit.
 
-    C      - COMPLEX          array of DIMENSION ( LDC, n ).
+    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
              Before entry, the leading  m by n  part of the array  C must
              contain the matrix  C,  except when  beta  is zero, in which
              case C need not be set on entry.
@@ -477,10 +450,8 @@ L20:
 
 
        Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-       conjugated or transposed, set  CONJA and CONJB  as true if  A  and
-       B  respectively are to be  transposed but  not conjugated  and set
-       NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A
-       and the number of rows of  B  respectively.
+       transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
+       and  columns of  A  and the  number of  rows  of  B  respectively.
 */
 
     /* Parameter adjustments */
@@ -497,8 +468,6 @@ L20:
     /* Function Body */
     nota = lsame_(transa, "N");
     notb = lsame_(transb, "N");
-    conja = lsame_(transa, "C");
-    conjb = lsame_(transb, "C");
     if (nota) {
 	nrowa = *m;
 	ncola = *k;
@@ -515,9 +484,11 @@ L20:
 /*     Test the input parameters. */
 
     info = 0;
-    if (! nota && ! conja && ! lsame_(transa, "T")) {
+    if (! nota && ! lsame_(transa, "C") && ! lsame_(
+	    transa, "T")) {
 	info = 1;
-    } else if (! notb && ! conjb && ! lsame_(transb, "T")) {
+    } else if (! notb && ! lsame_(transb, "C") && !
+	    lsame_(transb, "T")) {
 	info = 2;
     } else if (*m < 0) {
 	info = 3;
@@ -533,27 +504,25 @@ L20:
 	info = 13;
     }
     if (info != 0) {
-	xerbla_("CGEMM ", &info);
+	xerbla_("DGEMM ", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if (((*m == 0) || (*n == 0)) || (((alpha->r == 0.f && alpha->i == 0.f) ||
-	    (*k == 0)) && (beta->r == 1.f && beta->i == 0.f))) {
+    if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
 	return 0;
     }
 
-/*     And when  alpha.eq.zero. */
+/*     And if  alpha.eq.zero. */
 
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-	if (beta->r == 0.f && beta->i == 0.f) {
+    if (*alpha == 0.) {
+	if (*beta == 0.) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    i__3 = i__ + j * c_dim1;
-		    c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+		    c__[i__ + j * c_dim1] = 0.;
 /* L10: */
 		}
 /* L20: */
@@ -563,12 +532,7 @@ L20:
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    i__3 = i__ + j * c_dim1;
-		    i__4 = i__ + j * c_dim1;
-		    q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i,
-			    q__1.i = beta->r * c__[i__4].i + beta->i * c__[
-			    i__4].r;
-		    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
 /* L30: */
 		}
 /* L40: */
@@ -586,45 +550,27 @@ L20:
 
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (beta->r == 0.f && beta->i == 0.f) {
+		if (*beta == 0.) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+			c__[i__ + j * c_dim1] = 0.;
 /* L50: */
 		    }
-		} else if ((beta->r != 1.f) || (beta->i != 0.f)) {
+		} else if (*beta != 1.) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-				.i, q__1.i = beta->r * c__[i__4].i + beta->i *
-				 c__[i__4].r;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
 /* L60: */
 		    }
 		}
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
-		    i__3 = l + j * b_dim1;
-		    if ((b[i__3].r != 0.f) || (b[i__3].i != 0.f)) {
-			i__3 = l + j * b_dim1;
-			q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
-				q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-				i__3].r;
-			temp.r = q__1.r, temp.i = q__1.i;
+		    if (b[l + j * b_dim1] != 0.) {
+			temp = *alpha * b[l + j * b_dim1];
 			i__3 = *m;
 			for (i__ = 1; i__ <= i__3; ++i__) {
-			    i__4 = i__ + j * c_dim1;
-			    i__5 = i__ + j * c_dim1;
-			    i__6 = i__ + l * a_dim1;
-			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
-				    q__2.i = temp.r * a[i__6].i + temp.i * a[
-				    i__6].r;
-			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-				    .i + q__2.i;
-			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
 /* L70: */
 			}
 		    }
@@ -632,324 +578,65 @@ L20:
 		}
 /* L90: */
 	    }
-	} else if (conja) {
+	} else {
 
-/*           Form  C := alpha*conjg( A' )*B + beta*C. */
+/*           Form  C := alpha*A'*B + beta*C */
 
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp.r = 0.f, temp.i = 0.f;
+		    temp = 0.;
 		    i__3 = *k;
 		    for (l = 1; l <= i__3; ++l) {
-			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-			i__4 = l + j * b_dim1;
-			q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
-				q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
-				.r;
-			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
+			temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
 /* L100: */
 		    }
-		    if (beta->r == 0.f && beta->i == 0.f) {
-			i__3 = i__ + j * c_dim1;
-			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__1.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
 		    } else {
-			i__3 = i__ + j * c_dim1;
-			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__2.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			i__4 = i__ + j * c_dim1;
-			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
-				 c__[i__4].r;
-			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
 		    }
 /* L110: */
 		}
 /* L120: */
 	    }
-	} else {
-
-/*           Form  C := alpha*A'*B + beta*C */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp.r = 0.f, temp.i = 0.f;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			i__4 = l + i__ * a_dim1;
-			i__5 = l + j * b_dim1;
-			q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-				.i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
-				.i * b[i__5].r;
-			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
-/* L130: */
-		    }
-		    if (beta->r == 0.f && beta->i == 0.f) {
-			i__3 = i__ + j * c_dim1;
-			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__1.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-		    } else {
-			i__3 = i__ + j * c_dim1;
-			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__2.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			i__4 = i__ + j * c_dim1;
-			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
-				 c__[i__4].r;
-			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-		    }
-/* L140: */
-		}
-/* L150: */
-	    }
 	}
-    } else if (nota) {
-	if (conjb) {
-
-/*           Form  C := alpha*A*conjg( B' ) + beta*C. */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (beta->r == 0.f && beta->i == 0.f) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L160: */
-		    }
-		} else if ((beta->r != 1.f) || (beta->i != 0.f)) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-				.i, q__1.i = beta->r * c__[i__4].i + beta->i *
-				 c__[i__4].r;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L170: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    i__3 = j + l * b_dim1;
-		    if ((b[i__3].r != 0.f) || (b[i__3].i != 0.f)) {
-			r_cnjg(&q__2, &b[j + l * b_dim1]);
-			q__1.r = alpha->r * q__2.r - alpha->i * q__2.i,
-				q__1.i = alpha->r * q__2.i + alpha->i *
-				q__2.r;
-			temp.r = q__1.r, temp.i = q__1.i;
-			i__3 = *m;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    i__4 = i__ + j * c_dim1;
-			    i__5 = i__ + j * c_dim1;
-			    i__6 = i__ + l * a_dim1;
-			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
-				    q__2.i = temp.r * a[i__6].i + temp.i * a[
-				    i__6].r;
-			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-				    .i + q__2.i;
-			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L180: */
-			}
-		    }
-/* L190: */
-		}
-/* L200: */
-	    }
-	} else {
+    } else {
+	if (nota) {
 
-/*           Form  C := alpha*A*B'          + beta*C */
+/*           Form  C := alpha*A*B' + beta*C */
 
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (beta->r == 0.f && beta->i == 0.f) {
+		if (*beta == 0.) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L210: */
+			c__[i__ + j * c_dim1] = 0.;
+/* L130: */
 		    }
-		} else if ((beta->r != 1.f) || (beta->i != 0.f)) {
+		} else if (*beta != 1.) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-				.i, q__1.i = beta->r * c__[i__4].i + beta->i *
-				 c__[i__4].r;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L220: */
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L140: */
 		    }
 		}
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
-		    i__3 = j + l * b_dim1;
-		    if ((b[i__3].r != 0.f) || (b[i__3].i != 0.f)) {
-			i__3 = j + l * b_dim1;
-			q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
-				q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-				i__3].r;
-			temp.r = q__1.r, temp.i = q__1.i;
+		    if (b[j + l * b_dim1] != 0.) {
+			temp = *alpha * b[j + l * b_dim1];
 			i__3 = *m;
 			for (i__ = 1; i__ <= i__3; ++i__) {
-			    i__4 = i__ + j * c_dim1;
-			    i__5 = i__ + j * c_dim1;
-			    i__6 = i__ + l * a_dim1;
-			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
-				    q__2.i = temp.r * a[i__6].i + temp.i * a[
-				    i__6].r;
-			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-				    .i + q__2.i;
-			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L230: */
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L150: */
 			}
 		    }
-/* L240: */
-		}
-/* L250: */
-	    }
-	}
-    } else if (conja) {
-	if (conjb) {
-
-/*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C. */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp.r = 0.f, temp.i = 0.f;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-			r_cnjg(&q__4, &b[j + l * b_dim1]);
-			q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i =
-				q__3.r * q__4.i + q__3.i * q__4.r;
-			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
-/* L260: */
-		    }
-		    if (beta->r == 0.f && beta->i == 0.f) {
-			i__3 = i__ + j * c_dim1;
-			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__1.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-		    } else {
-			i__3 = i__ + j * c_dim1;
-			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__2.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			i__4 = i__ + j * c_dim1;
-			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
-				 c__[i__4].r;
-			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-		    }
-/* L270: */
-		}
-/* L280: */
-	    }
-	} else {
-
-/*           Form  C := alpha*conjg( A' )*B' + beta*C */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp.r = 0.f, temp.i = 0.f;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-			i__4 = j + l * b_dim1;
-			q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
-				q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
-				.r;
-			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
-/* L290: */
-		    }
-		    if (beta->r == 0.f && beta->i == 0.f) {
-			i__3 = i__ + j * c_dim1;
-			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__1.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-		    } else {
-			i__3 = i__ + j * c_dim1;
-			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__2.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			i__4 = i__ + j * c_dim1;
-			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
-				 c__[i__4].r;
-			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-		    }
-/* L300: */
-		}
-/* L310: */
-	    }
-	}
-    } else {
-	if (conjb) {
-
-/*           Form  C := alpha*A'*conjg( B' ) + beta*C */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp.r = 0.f, temp.i = 0.f;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			i__4 = l + i__ * a_dim1;
-			r_cnjg(&q__3, &b[j + l * b_dim1]);
-			q__2.r = a[i__4].r * q__3.r - a[i__4].i * q__3.i,
-				q__2.i = a[i__4].r * q__3.i + a[i__4].i *
-				q__3.r;
-			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
-/* L320: */
-		    }
-		    if (beta->r == 0.f && beta->i == 0.f) {
-			i__3 = i__ + j * c_dim1;
-			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__1.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-		    } else {
-			i__3 = i__ + j * c_dim1;
-			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__2.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			i__4 = i__ + j * c_dim1;
-			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
-				 c__[i__4].r;
-			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-		    }
-/* L330: */
+/* L160: */
 		}
-/* L340: */
+/* L170: */
 	    }
 	} else {
 
@@ -959,78 +646,53 @@ L20:
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp.r = 0.f, temp.i = 0.f;
+		    temp = 0.;
 		    i__3 = *k;
 		    for (l = 1; l <= i__3; ++l) {
-			i__4 = l + i__ * a_dim1;
-			i__5 = j + l * b_dim1;
-			q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-				.i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
-				.i * b[i__5].r;
-			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
-/* L350: */
+			temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
+/* L180: */
 		    }
-		    if (beta->r == 0.f && beta->i == 0.f) {
-			i__3 = i__ + j * c_dim1;
-			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__1.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
 		    } else {
-			i__3 = i__ + j * c_dim1;
-			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__2.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			i__4 = i__ + j * c_dim1;
-			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
-				 c__[i__4].r;
-			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
 		    }
-/* L360: */
+/* L190: */
 		}
-/* L370: */
+/* L200: */
 	    }
 	}
     }
 
     return 0;
 
-/*     End of CGEMM . */
+/*     End of DGEMM . */
 
-} /* cgemm_ */
+} /* dgemm_ */
 
-/* Subroutine */ int cgemv_(char *trans, integer *m, integer *n, complex *
-	alpha, complex *a, integer *lda, complex *x, integer *incx, complex *
-	beta, complex *y, integer *incy)
+/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal *
+	alpha, doublereal *a, integer *lda, doublereal *x, integer *incx,
+	doublereal *beta, doublereal *y, integer *incy)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+    integer a_dim1, a_offset, i__1, i__2;
 
     /* Local variables */
     static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static complex temp;
+    static doublereal temp;
     static integer lenx, leny;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical noconj;
 
 
 /*
     Purpose
     =======
 
-    CGEMV  performs one of the matrix-vector operations
-
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
+    DGEMV  performs one of the matrix-vector operations
 
-       y := alpha*conjg( A' )*x + beta*y,
+       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
 
     where alpha and beta are scalars, x and y are vectors and A is an
     m by n matrix.
@@ -1046,7 +708,7 @@ L20:
 
                 TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
 
-                TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.
+                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
 
              Unchanged on exit.
 
@@ -1060,11 +722,11 @@ L20:
              N must be at least zero.
              Unchanged on exit.
 
-    ALPHA  - COMPLEX         .
+    ALPHA  - DOUBLE PRECISION.
              On entry, ALPHA specifies the scalar alpha.
              Unchanged on exit.
 
-    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
              Before entry, the leading m by n part of the array A must
              contain the matrix of coefficients.
              Unchanged on exit.
@@ -1075,7 +737,7 @@ L20:
              max( 1, m ).
              Unchanged on exit.
 
-    X      - COMPLEX          array of DIMENSION at least
+    X      - DOUBLE PRECISION array of DIMENSION at least
              ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
              and at least
              ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
@@ -1088,12 +750,12 @@ L20:
              X. INCX must not be zero.
              Unchanged on exit.
 
-    BETA   - COMPLEX         .
+    BETA   - DOUBLE PRECISION.
              On entry, BETA specifies the scalar beta. When BETA is
              supplied as zero then Y need not be set on input.
              Unchanged on exit.
 
-    Y      - COMPLEX          array of DIMENSION at least
+    Y      - DOUBLE PRECISION array of DIMENSION at least
              ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
              and at least
              ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
@@ -1143,19 +805,16 @@ L20:
 	info = 11;
     }
     if (info != 0) {
-	xerbla_("CGEMV ", &info);
+	xerbla_("DGEMV ", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if (((*m == 0) || (*n == 0)) || (alpha->r == 0.f && alpha->i == 0.f && (
-	    beta->r == 1.f && beta->i == 0.f))) {
+    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
 	return 0;
     }
 
-    noconj = lsame_(trans, "T");
-
 /*
        Set  LENX  and  LENY, the lengths of the vectors x and y, and set
        up the start points in  X  and  Y.
@@ -1186,53 +845,41 @@ L20:
        First form  y := beta*y.
 */
 
-    if ((beta->r != 1.f) || (beta->i != 0.f)) {
+    if (*beta != 1.) {
 	if (*incy == 1) {
-	    if (beta->r == 0.f && beta->i == 0.f) {
+	    if (*beta == 0.) {
 		i__1 = leny;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = i__;
-		    y[i__2].r = 0.f, y[i__2].i = 0.f;
+		    y[i__] = 0.;
 /* L10: */
 		}
 	    } else {
 		i__1 = leny;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = i__;
-		    i__3 = i__;
-		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
-			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-			    .r;
-		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		    y[i__] = *beta * y[i__];
 /* L20: */
 		}
 	    }
 	} else {
 	    iy = ky;
-	    if (beta->r == 0.f && beta->i == 0.f) {
+	    if (*beta == 0.) {
 		i__1 = leny;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = iy;
-		    y[i__2].r = 0.f, y[i__2].i = 0.f;
+		    y[iy] = 0.;
 		    iy += *incy;
 /* L30: */
 		}
 	    } else {
 		i__1 = leny;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = iy;
-		    i__3 = iy;
-		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
-			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-			    .r;
-		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		    y[iy] = *beta * y[iy];
 		    iy += *incy;
 /* L40: */
 		}
 	    }
 	}
     }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
+    if (*alpha == 0.) {
 	return 0;
     }
     if (lsame_(trans, "N")) {
@@ -1243,24 +890,11 @@ L20:
 	if (*incy == 1) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = jx;
-		if ((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) {
-		    i__2 = jx;
-		    q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
-			    q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-			    .r;
-		    temp.r = q__1.r, temp.i = q__1.i;
+		if (x[jx] != 0.) {
+		    temp = *alpha * x[jx];
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__;
-			i__4 = i__;
-			i__5 = i__ + j * a_dim1;
-			q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
-				q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-				.r;
-			q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i +
-				q__2.i;
-			y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+			y[i__] += temp * a[i__ + j * a_dim1];
 /* L50: */
 		    }
 		}
@@ -1270,25 +904,12 @@ L20:
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = jx;
-		if ((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) {
-		    i__2 = jx;
-		    q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
-			    q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-			    .r;
-		    temp.r = q__1.r, temp.i = q__1.i;
+		if (x[jx] != 0.) {
+		    temp = *alpha * x[jx];
 		    iy = ky;
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = iy;
-			i__4 = iy;
-			i__5 = i__ + j * a_dim1;
-			q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
-				q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-				.r;
-			q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i +
-				q__2.i;
-			y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+			y[iy] += temp * a[i__ + j * a_dim1];
 			iy += *incy;
 /* L70: */
 		    }
@@ -1299,110 +920,56 @@ L20:
 	}
     } else {
 
-/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y. */
+/*        Form  y := alpha*A'*x + y. */
 
 	jy = ky;
 	if (*incx == 1) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		temp.r = 0.f, temp.i = 0.f;
-		if (noconj) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * a_dim1;
-			i__4 = i__;
-			q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-				.i, q__2.i = a[i__3].r * x[i__4].i + a[i__3]
-				.i * x[i__4].r;
-			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
+		temp = 0.;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp += a[i__ + j * a_dim1] * x[i__];
 /* L90: */
-		    }
-		} else {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-			i__3 = i__;
-			q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
-				q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3]
-				.r;
-			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-		    }
 		}
-		i__2 = jy;
-		i__3 = jy;
-		q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i =
-			alpha->r * temp.i + alpha->i * temp.r;
-		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		y[jy] += *alpha * temp;
 		jy += *incy;
-/* L110: */
+/* L100: */
 	    }
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		temp.r = 0.f, temp.i = 0.f;
+		temp = 0.;
 		ix = kx;
-		if (noconj) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * a_dim1;
-			i__4 = ix;
-			q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-				.i, q__2.i = a[i__3].r * x[i__4].i + a[i__3]
-				.i * x[i__4].r;
-			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
-			ix += *incx;
-/* L120: */
-		    }
-		} else {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-			i__3 = ix;
-			q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
-				q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3]
-				.r;
-			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
-			ix += *incx;
-/* L130: */
-		    }
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp += a[i__ + j * a_dim1] * x[ix];
+		    ix += *incx;
+/* L110: */
 		}
-		i__2 = jy;
-		i__3 = jy;
-		q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i =
-			alpha->r * temp.i + alpha->i * temp.r;
-		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		y[jy] += *alpha * temp;
 		jy += *incy;
-/* L140: */
+/* L120: */
 	    }
 	}
     }
 
     return 0;
 
-/*     End of CGEMV . */
+/*     End of DGEMV . */
 
-} /* cgemv_ */
+} /* dgemv_ */
 
-/* Subroutine */ int cgerc_(integer *m, integer *n, complex *alpha, complex *
-	x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
+/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha,
+	doublereal *x, integer *incx, doublereal *y, integer *incy,
+	doublereal *a, integer *lda)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+    integer a_dim1, a_offset, i__1, i__2;
 
     /* Local variables */
     static integer i__, j, ix, jy, kx, info;
-    static complex temp;
+    static doublereal temp;
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
 
@@ -1410,9 +977,9 @@ L20:
     Purpose
     =======
 
-    CGERC  performs the rank 1 operation
+    DGER   performs the rank 1 operation
 
-       A := alpha*x*conjg( y' ) + A,
+       A := alpha*x*y' + A,
 
     where alpha is a scalar, x is an m element vector, y is an n element
     vector and A is an m by n matrix.
@@ -1430,11 +997,11 @@ L20:
              N must be at least zero.
              Unchanged on exit.
 
-    ALPHA  - COMPLEX         .
+    ALPHA  - DOUBLE PRECISION.
              On entry, ALPHA specifies the scalar alpha.
              Unchanged on exit.
 
-    X      - COMPLEX          array of dimension at least
+    X      - DOUBLE PRECISION array of dimension at least
              ( 1 + ( m - 1 )*abs( INCX ) ).
              Before entry, the incremented array X must contain the m
              element vector x.
@@ -1445,7 +1012,7 @@ L20:
              X. INCX must not be zero.
              Unchanged on exit.
 
-    Y      - COMPLEX          array of dimension at least
+    Y      - DOUBLE PRECISION array of dimension at least
              ( 1 + ( n - 1 )*abs( INCY ) ).
              Before entry, the incremented array Y must contain the n
              element vector y.
@@ -1456,7 +1023,7 @@ L20:
              Y. INCY must not be zero.
              Unchanged on exit.
 
-    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
              Before entry, the leading m by n part of the array A must
              contain the matrix of coefficients. On exit, A is
              overwritten by the updated matrix.
@@ -1501,13 +1068,13 @@ L20:
 	info = 9;
     }
     if (info != 0) {
-	xerbla_("CGERC ", &info);
+	xerbla_("DGER  ", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if (((*m == 0) || (*n == 0)) || (alpha->r == 0.f && alpha->i == 0.f)) {
+    if (*m == 0 || *n == 0 || *alpha == 0.) {
 	return 0;
     }
 
@@ -1524,21 +1091,11 @@ L20:
     if (*incx == 1) {
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
-	    i__2 = jy;
-	    if ((y[i__2].r != 0.f) || (y[i__2].i != 0.f)) {
-		r_cnjg(&q__2, &y[jy]);
-		q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
-			alpha->r * q__2.i + alpha->i * q__2.r;
-		temp.r = q__1.r, temp.i = q__1.i;
+	    if (y[jy] != 0.) {
+		temp = *alpha * y[jy];
 		i__2 = *m;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    i__3 = i__ + j * a_dim1;
-		    i__4 = i__ + j * a_dim1;
-		    i__5 = i__;
-		    q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
-			     x[i__5].r * temp.i + x[i__5].i * temp.r;
-		    q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
-		    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+		    a[i__ + j * a_dim1] += x[i__] * temp;
 /* L10: */
 		}
 	    }
@@ -1553,22 +1110,12 @@ L20:
 	}
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
-	    i__2 = jy;
-	    if ((y[i__2].r != 0.f) || (y[i__2].i != 0.f)) {
-		r_cnjg(&q__2, &y[jy]);
-		q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
-			alpha->r * q__2.i + alpha->i * q__2.r;
-		temp.r = q__1.r, temp.i = q__1.i;
+	    if (y[jy] != 0.) {
+		temp = *alpha * y[jy];
 		ix = kx;
 		i__2 = *m;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    i__3 = i__ + j * a_dim1;
-		    i__4 = i__ + j * a_dim1;
-		    i__5 = ix;
-		    q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
-			     x[i__5].r * temp.i + x[i__5].i * temp.r;
-		    q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
-		    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+		    a[i__ + j * a_dim1] += x[ix] * temp;
 		    ix += *incx;
 /* L30: */
 		}
@@ -1580,304 +1127,415 @@ L20:
 
     return 0;
 
-/*     End of CGERC . */
+/*     End of DGER  . */
 
-} /* cgerc_ */
+} /* dger_ */
 
-/* Subroutine */ int cgeru_(integer *m, integer *n, complex *alpha, complex *
-	x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
+doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2;
+    integer i__1, i__2;
+    doublereal ret_val, d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
 
     /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static complex temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static integer ix;
+    static doublereal ssq, norm, scale, absxi;
 
 
 /*
-    Purpose
-    =======
-
-    CGERU  performs the rank 1 operation
+    DNRM2 returns the euclidean norm of a vector via the function
+    name, so that
 
-       A := alpha*x*y' + A,
+       DNRM2 := sqrt( x'*x )
 
-    where alpha is a scalar, x is an m element vector, y is an n element
-    vector and A is an m by n matrix.
 
-    Parameters
-    ==========
+    -- This version written on 25-October-1982.
+       Modified on 14-October-1993 to inline the call to DLASSQ.
+       Sven Hammarling, Nag Ltd.
+*/
 
-    M      - INTEGER.
-             On entry, M specifies the number of rows of the matrix A.
-             M must be at least zero.
-             Unchanged on exit.
 
-    N      - INTEGER.
-             On entry, N specifies the number of columns of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
+    /* Parameter adjustments */
+    --x;
 
-    ALPHA  - COMPLEX         .
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
+    /* Function Body */
+    if (*n < 1 || *incx < 1) {
+	norm = 0.;
+    } else if (*n == 1) {
+	norm = abs(x[1]);
+    } else {
+	scale = 0.;
+	ssq = 1.;
+/*
+          The following loop is equivalent to this call to the LAPACK
+          auxiliary routine:
+          CALL DLASSQ( N, X, INCX, SCALE, SSQ )
+*/
 
-    X      - COMPLEX          array of dimension at least
-             ( 1 + ( m - 1 )*abs( INCX ) ).
-             Before entry, the incremented array X must contain the m
-             element vector x.
-             Unchanged on exit.
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    if (x[ix] != 0.) {
+		absxi = (d__1 = x[ix], abs(d__1));
+		if (scale < absxi) {
+/* Computing 2nd power */
+		    d__1 = scale / absxi;
+		    ssq = ssq * (d__1 * d__1) + 1.;
+		    scale = absxi;
+		} else {
+/* Computing 2nd power */
+		    d__1 = absxi / scale;
+		    ssq += d__1 * d__1;
+		}
+	    }
+/* L10: */
+	}
+	norm = scale * sqrt(ssq);
+    }
 
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
+    ret_val = norm;
+    return ret_val;
 
-    Y      - COMPLEX          array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCY ) ).
-             Before entry, the incremented array Y must contain the n
-             element vector y.
-             Unchanged on exit.
+/*     End of DNRM2. */
 
-    INCY   - INTEGER.
-             On entry, INCY specifies the increment for the elements of
-             Y. INCY must not be zero.
-             Unchanged on exit.
+} /* dnrm2_ */
 
-    A      - COMPLEX          array of DIMENSION ( LDA, n ).
-             Before entry, the leading m by n part of the array A must
-             contain the matrix of coefficients. On exit, A is
-             overwritten by the updated matrix.
+/* Subroutine */ int drot_(integer *n, doublereal *dx, integer *incx,
+	doublereal *dy, integer *incy, doublereal *c__, doublereal *s)
+{
+    /* System generated locals */
+    integer i__1;
 
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, m ).
-             Unchanged on exit.
+    /* Local variables */
+    static integer i__, ix, iy;
+    static doublereal dtemp;
 
 
-    Level 2 Blas routine.
+/*
+       applies a plane rotation.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
 
-    -- Written on 22-October-1986.
-       Jack Dongarra, Argonne National Lab.
-       Jeremy Du Croz, Nag Central Office.
-       Sven Hammarling, Nag Central Office.
-       Richard Hanson, Sandia National Labs.
 
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
+    /* Parameter adjustments */
+    --dy;
+    --dx;
 
     /* Function Body */
-    info = 0;
-    if (*m < 0) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*incx == 0) {
-	info = 5;
-    } else if (*incy == 0) {
-	info = 7;
-    } else if (*lda < max(1,*m)) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("CGERU ", &info);
+    if (*n <= 0) {
 	return 0;
     }
-
-/*     Quick return if possible. */
-
-    if (((*m == 0) || (*n == 0)) || (alpha->r == 0.f && alpha->i == 0.f)) {
-	return 0;
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
     }
 
 /*
-       Start the operations. In this version the elements of A are
-       accessed sequentially with one pass through A.
+         code for unequal increments or equal increments not equal
+           to 1
 */
 
-    if (*incy > 0) {
-	jy = 1;
-    } else {
-	jy = 1 - (*n - 1) * *incy;
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
     }
-    if (*incx == 1) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = jy;
-	    if ((y[i__2].r != 0.f) || (y[i__2].i != 0.f)) {
-		i__2 = jy;
-		q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i =
-			 alpha->r * y[i__2].i + alpha->i * y[i__2].r;
-		temp.r = q__1.r, temp.i = q__1.i;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    i__3 = i__ + j * a_dim1;
-		    i__4 = i__ + j * a_dim1;
-		    i__5 = i__;
-		    q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
-			     x[i__5].r * temp.i + x[i__5].i * temp.r;
-		    q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
-		    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp = *c__ * dx[ix] + *s * dy[iy];
+	dy[iy] = *c__ * dy[iy] - *s * dx[ix];
+	dx[ix] = dtemp;
+	ix += *incx;
+	iy += *incy;
 /* L10: */
-		}
-	    }
-	    jy += *incy;
-/* L20: */
-	}
-    } else {
-	if (*incx > 0) {
-	    kx = 1;
-	} else {
-	    kx = 1 - (*m - 1) * *incx;
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = jy;
-	    if ((y[i__2].r != 0.f) || (y[i__2].i != 0.f)) {
-		i__2 = jy;
-		q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i =
-			 alpha->r * y[i__2].i + alpha->i * y[i__2].r;
-		temp.r = q__1.r, temp.i = q__1.i;
-		ix = kx;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    i__3 = i__ + j * a_dim1;
-		    i__4 = i__ + j * a_dim1;
-		    i__5 = ix;
-		    q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
-			     x[i__5].r * temp.i + x[i__5].i * temp.r;
-		    q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
-		    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-		    ix += *incx;
-/* L30: */
-		}
-	    }
-	    jy += *incy;
-/* L40: */
-	}
     }
-
     return 0;
 
-/*     End of CGERU . */
+/*       code for both increments equal to 1 */
 
-} /* cgeru_ */
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp = *c__ * dx[i__] + *s * dy[i__];
+	dy[i__] = *c__ * dy[i__] - *s * dx[i__];
+	dx[i__] = dtemp;
+/* L30: */
+    }
+    return 0;
+} /* drot_ */
 
-/* Subroutine */ int chemv_(char *uplo, integer *n, complex *alpha, complex *
-	a, integer *lda, complex *x, integer *incx, complex *beta, complex *y,
-	 integer *incy)
+/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx,
+	integer *incx)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+    integer i__1, i__2;
 
     /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static integer i__, m, mp1, nincx;
 
 
 /*
-    Purpose
-    =======
-
-    CHEMV  performs the matrix-vector  operation
+       scales a vector by a constant.
+       uses unrolled loops for increment equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
 
-       y := alpha*A*x + beta*y,
 
-    where alpha and beta are scalars, x and y are n element vectors and
-    A is an n by n hermitian matrix.
+    /* Parameter adjustments */
+    --dx;
 
-    Parameters
-    ==========
+    /* Function Body */
+    if (*n <= 0 || *incx <= 0) {
+	return 0;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
 
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the upper or lower
-             triangular part of the array A is to be referenced as
-             follows:
+/*        code for increment not equal to 1 */
 
-                UPLO = 'U' or 'u'   Only the upper triangular part of A
-                                    is to be referenced.
+    nincx = *n * *incx;
+    i__1 = nincx;
+    i__2 = *incx;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	dx[i__] = *da * dx[i__];
+/* L10: */
+    }
+    return 0;
 
-                UPLO = 'L' or 'l'   Only the lower triangular part of A
-                                    is to be referenced.
+/*
+          code for increment equal to 1
 
-             Unchanged on exit.
 
-    N      - INTEGER.
-             On entry, N specifies the order of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
+          clean-up loop
+*/
 
-    ALPHA  - COMPLEX         .
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
+L20:
+    m = *n % 5;
+    if (m == 0) {
+	goto L40;
+    }
+    i__2 = m;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+	dx[i__] = *da * dx[i__];
+/* L30: */
+    }
+    if (*n < 5) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__2 = *n;
+    for (i__ = mp1; i__ <= i__2; i__ += 5) {
+	dx[i__] = *da * dx[i__];
+	dx[i__ + 1] = *da * dx[i__ + 1];
+	dx[i__ + 2] = *da * dx[i__ + 2];
+	dx[i__ + 3] = *da * dx[i__ + 3];
+	dx[i__ + 4] = *da * dx[i__ + 4];
+/* L50: */
+    }
+    return 0;
+} /* dscal_ */
 
-    A      - COMPLEX          array of DIMENSION ( LDA, n ).
-             Before entry with  UPLO = 'U' or 'u', the leading n by n
-             upper triangular part of the array A must contain the upper
-             triangular part of the hermitian matrix and the strictly
-             lower triangular part of A is not referenced.
-             Before entry with UPLO = 'L' or 'l', the leading n by n
-             lower triangular part of the array A must contain the lower
-             triangular part of the hermitian matrix and the strictly
-             upper triangular part of A is not referenced.
-             Note that the imaginary parts of the diagonal elements need
-             not be set and are assumed to be zero.
-             Unchanged on exit.
+/* Subroutine */ int dswap_(integer *n, doublereal *dx, integer *incx,
+	doublereal *dy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
 
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, n ).
-             Unchanged on exit.
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+    static doublereal dtemp;
 
-    X      - COMPLEX          array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCX ) ).
-             Before entry, the incremented array X must contain the n
-             element vector x.
-             Unchanged on exit.
 
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
+/*
+       interchanges two vectors.
+       uses unrolled loops for increments equal one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
 
-    BETA   - COMPLEX         .
-             On entry, BETA specifies the scalar beta. When BETA is
-             supplied as zero then Y need not be set on input.
-             Unchanged on exit.
 
-    Y      - COMPLEX          array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCY ) ).
-             Before entry, the incremented array Y must contain the n
-             element vector y. On exit, Y is overwritten by the updated
-             vector y.
+    /* Parameter adjustments */
+    --dy;
+    --dx;
 
-    INCY   - INTEGER.
-             On entry, INCY specifies the increment for the elements of
-             Y. INCY must not be zero.
-             Unchanged on exit.
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
 
+/*
+         code for unequal increments or equal increments not equal
+           to 1
+*/
 
-    Level 2 Blas routine.
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp = dx[ix];
+	dx[ix] = dy[iy];
+	dy[iy] = dtemp;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*
+         code for both increments equal to 1
+
+
+         clean-up loop
+*/
+
+L20:
+    m = *n % 3;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp = dx[i__];
+	dx[i__] = dy[i__];
+	dy[i__] = dtemp;
+/* L30: */
+    }
+    if (*n < 3) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 3) {
+	dtemp = dx[i__];
+	dx[i__] = dy[i__];
+	dy[i__] = dtemp;
+	dtemp = dx[i__ + 1];
+	dx[i__ + 1] = dy[i__ + 1];
+	dy[i__ + 1] = dtemp;
+	dtemp = dx[i__ + 2];
+	dx[i__ + 2] = dy[i__ + 2];
+	dy[i__ + 2] = dtemp;
+/* L50: */
+    }
+    return 0;
+} /* dswap_ */
+
+/* Subroutine */ int dsymv_(char *uplo, integer *n, doublereal *alpha,
+	doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal
+	*beta, doublereal *y, integer *incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static doublereal temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DSYMV  performs the matrix-vector  operation
+
+       y := alpha*A*x + beta*y,
+
+    where alpha and beta are scalars, x and y are n element vectors and
+    A is an n by n symmetric matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the symmetric matrix and the strictly
+             lower triangular part of A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the symmetric matrix and the strictly
+             upper triangular part of A is not referenced.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION.
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y. On exit, Y is overwritten by the updated
+             vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
 
     -- Written on 22-October-1986.
        Jack Dongarra, Argonne National Lab.
@@ -1910,14 +1568,13 @@ L20:
 	info = 10;
     }
     if (info != 0) {
-	xerbla_("CHEMV ", &info);
+	xerbla_("DSYMV ", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if ((*n == 0) || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f &&
-	     beta->i == 0.f))) {
+    if (*n == 0 || *alpha == 0. && *beta == 1.) {
 	return 0;
     }
 
@@ -1942,53 +1599,41 @@ L20:
        First form  y := beta*y.
 */
 
-    if ((beta->r != 1.f) || (beta->i != 0.f)) {
+    if (*beta != 1.) {
 	if (*incy == 1) {
-	    if (beta->r == 0.f && beta->i == 0.f) {
+	    if (*beta == 0.) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = i__;
-		    y[i__2].r = 0.f, y[i__2].i = 0.f;
+		    y[i__] = 0.;
 /* L10: */
 		}
 	    } else {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = i__;
-		    i__3 = i__;
-		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
-			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-			    .r;
-		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		    y[i__] = *beta * y[i__];
 /* L20: */
 		}
 	    }
 	} else {
 	    iy = ky;
-	    if (beta->r == 0.f && beta->i == 0.f) {
+	    if (*beta == 0.) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = iy;
-		    y[i__2].r = 0.f, y[i__2].i = 0.f;
+		    y[iy] = 0.;
 		    iy += *incy;
 /* L30: */
 		}
 	    } else {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    i__2 = iy;
-		    i__3 = iy;
-		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
-			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-			    .r;
-		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		    y[iy] = *beta * y[iy];
 		    iy += *incy;
 /* L40: */
 		}
 	    }
 	}
     }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
+    if (*alpha == 0.) {
 	return 0;
     }
     if (lsame_(uplo, "U")) {
@@ -1998,39 +1643,15 @@ L20:
 	if (*incx == 1 && *incy == 1) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = q__1.r, temp1.i = q__1.i;
-		temp2.r = 0.f, temp2.i = 0.f;
+		temp1 = *alpha * x[j];
+		temp2 = 0.;
 		i__2 = j - 1;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    i__3 = i__;
-		    i__4 = i__;
-		    i__5 = i__ + j * a_dim1;
-		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
-			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-			    .r;
-		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-		    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-		    i__3 = i__;
-		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-		    temp2.r = q__1.r, temp2.i = q__1.i;
+		    y[i__] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[i__];
 /* L50: */
 		}
-		i__2 = j;
-		i__3 = j;
-		i__4 = j + j * a_dim1;
-		r__1 = a[i__4].r;
-		q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-		q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-		q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i =
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
 /* L60: */
 	    }
 	} else {
@@ -2038,43 +1659,19 @@ L20:
 	    jy = ky;
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = jx;
-		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = q__1.r, temp1.i = q__1.i;
-		temp2.r = 0.f, temp2.i = 0.f;
+		temp1 = *alpha * x[jx];
+		temp2 = 0.;
 		ix = kx;
 		iy = ky;
 		i__2 = j - 1;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    i__3 = iy;
-		    i__4 = iy;
-		    i__5 = i__ + j * a_dim1;
-		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
-			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-			    .r;
-		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-		    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-		    i__3 = ix;
-		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-		    temp2.r = q__1.r, temp2.i = q__1.i;
+		    y[iy] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[ix];
 		    ix += *incx;
 		    iy += *incy;
 /* L70: */
 		}
-		i__2 = jy;
-		i__3 = jy;
-		i__4 = j + j * a_dim1;
-		r__1 = a[i__4].r;
-		q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-		q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-		q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i =
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
 		jx += *incx;
 		jy += *incy;
 /* L80: */
@@ -2087,42 +1684,16 @@ L20:
 	if (*incx == 1 && *incy == 1) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = q__1.r, temp1.i = q__1.i;
-		temp2.r = 0.f, temp2.i = 0.f;
-		i__2 = j;
-		i__3 = j;
-		i__4 = j + j * a_dim1;
-		r__1 = a[i__4].r;
-		q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		temp1 = *alpha * x[j];
+		temp2 = 0.;
+		y[j] += temp1 * a[j + j * a_dim1];
 		i__2 = *n;
 		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    i__3 = i__;
-		    i__4 = i__;
-		    i__5 = i__ + j * a_dim1;
-		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
-			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-			    .r;
-		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-		    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-		    i__3 = i__;
-		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-		    temp2.r = q__1.r, temp2.i = q__1.i;
+		    y[i__] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[i__];
 /* L90: */
 		}
-		i__2 = j;
-		i__3 = j;
-		q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i =
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		y[j] += *alpha * temp2;
 /* L100: */
 	    }
 	} else {
@@ -2130,46 +1701,20 @@ L20:
 	    jy = ky;
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = jx;
-		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-		temp1.r = q__1.r, temp1.i = q__1.i;
-		temp2.r = 0.f, temp2.i = 0.f;
-		i__2 = jy;
-		i__3 = jy;
-		i__4 = j + j * a_dim1;
-		r__1 = a[i__4].r;
-		q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		temp1 = *alpha * x[jx];
+		temp2 = 0.;
+		y[jy] += temp1 * a[j + j * a_dim1];
 		ix = jx;
 		iy = jy;
 		i__2 = *n;
 		for (i__ = j + 1; i__ <= i__2; ++i__) {
 		    ix += *incx;
 		    iy += *incy;
-		    i__3 = iy;
-		    i__4 = iy;
-		    i__5 = i__ + j * a_dim1;
-		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
-			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-			    .r;
-		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-		    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-		    i__3 = ix;
-		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-		    temp2.r = q__1.r, temp2.i = q__1.i;
+		    y[iy] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[ix];
 /* L110: */
 		}
-		i__2 = jy;
-		i__3 = jy;
-		q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i =
-			alpha->r * temp2.i + alpha->i * temp2.r;
-		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		y[jy] += *alpha * temp2;
 		jx += *incx;
 		jy += *incy;
 /* L120: */
@@ -2179,24 +1724,20 @@ L20:
 
     return 0;
 
-/*     End of CHEMV . */
+/*     End of DSYMV . */
 
-} /* chemv_ */
+} /* dsymv_ */
 
-/* Subroutine */ int cher2_(char *uplo, integer *n, complex *alpha, complex *
-	x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
+/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha,
+	doublereal *x, integer *incx, doublereal *y, integer *incy,
+	doublereal *a, integer *lda)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+    integer a_dim1, a_offset, i__1, i__2;
 
     /* Local variables */
     static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static complex temp1, temp2;
+    static doublereal temp1, temp2;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
@@ -2205,12 +1746,12 @@ L20:
     Purpose
     =======
 
-    CHER2  performs the hermitian rank 2 operation
+    DSYR2  performs the symmetric rank 2 operation
 
-       A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+       A := alpha*x*y' + alpha*y*x' + A,
 
     where alpha is a scalar, x and y are n element vectors and A is an n
-    by n hermitian matrix.
+    by n symmetric matrix.
 
     Parameters
     ==========
@@ -2233,11 +1774,11 @@ L20:
              N must be at least zero.
              Unchanged on exit.
 
-    ALPHA  - COMPLEX         .
+    ALPHA  - DOUBLE PRECISION.
              On entry, ALPHA specifies the scalar alpha.
              Unchanged on exit.
 
-    X      - COMPLEX          array of dimension at least
+    X      - DOUBLE PRECISION array of dimension at least
              ( 1 + ( n - 1 )*abs( INCX ) ).
              Before entry, the incremented array X must contain the n
              element vector x.
@@ -2248,7 +1789,7 @@ L20:
              X. INCX must not be zero.
              Unchanged on exit.
 
-    Y      - COMPLEX          array of dimension at least
+    Y      - DOUBLE PRECISION array of dimension at least
              ( 1 + ( n - 1 )*abs( INCY ) ).
              Before entry, the incremented array Y must contain the n
              element vector y.
@@ -2259,22 +1800,19 @@ L20:
              Y. INCY must not be zero.
              Unchanged on exit.
 
-    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
              Before entry with  UPLO = 'U' or 'u', the leading n by n
              upper triangular part of the array A must contain the upper
-             triangular part of the hermitian matrix and the strictly
+             triangular part of the symmetric matrix and the strictly
              lower triangular part of A is not referenced. On exit, the
              upper triangular part of the array A is overwritten by the
              upper triangular part of the updated matrix.
              Before entry with UPLO = 'L' or 'l', the leading n by n
              lower triangular part of the array A must contain the lower
-             triangular part of the hermitian matrix and the strictly
+             triangular part of the symmetric matrix and the strictly
              upper triangular part of A is not referenced. On exit, the
              lower triangular part of the array A is overwritten by the
              lower triangular part of the updated matrix.
-             Note that the imaginary parts of the diagonal elements need
-             not be set, they are assumed to be zero, and on exit they
-             are set to zero.
 
     LDA    - INTEGER.
              On entry, LDA specifies the first dimension of A as declared
@@ -2316,13 +1854,13 @@ L20:
 	info = 9;
     }
     if (info != 0) {
-	xerbla_("CHER2 ", &info);
+	xerbla_("DSYR2 ", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if ((*n == 0) || (alpha->r == 0.f && alpha->i == 0.f)) {
+    if (*n == 0 || *alpha == 0.) {
 	return 0;
     }
 
@@ -2331,7 +1869,7 @@ L20:
        unity.
 */
 
-    if ((*incx != 1) || (*incy != 1)) {
+    if (*incx != 1 || *incy != 1) {
 	if (*incx > 0) {
 	    kx = 1;
 	} else {
@@ -2359,116 +1897,34 @@ L20:
 	if (*incx == 1 && *incy == 1) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		i__3 = j;
-		if (((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) || (((y[i__3]
-			.r != 0.f) || (y[i__3].i != 0.f)))) {
-		    r_cnjg(&q__2, &y[j]);
-		    q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
-			    alpha->r * q__2.i + alpha->i * q__2.r;
-		    temp1.r = q__1.r, temp1.i = q__1.i;
+		if (x[j] != 0. || y[j] != 0.) {
+		    temp1 = *alpha * y[j];
+		    temp2 = *alpha * x[j];
 		    i__2 = j;
-		    q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
-			    q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-			    .r;
-		    r_cnjg(&q__1, &q__2);
-		    temp2.r = q__1.r, temp2.i = q__1.i;
-		    i__2 = j - 1;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * a_dim1;
-			i__4 = i__ + j * a_dim1;
-			i__5 = i__;
-			q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
-				q__3.i = x[i__5].r * temp1.i + x[i__5].i *
-				temp1.r;
-			q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i +
-				q__3.i;
-			i__6 = i__;
-			q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
-				q__4.i = y[i__6].r * temp2.i + y[i__6].i *
-				temp2.r;
-			q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
+				temp1 + y[i__] * temp2;
 /* L10: */
 		    }
-		    i__2 = j + j * a_dim1;
-		    i__3 = j + j * a_dim1;
-		    i__4 = j;
-		    q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
-			    q__2.i = x[i__4].r * temp1.i + x[i__4].i *
-			    temp1.r;
-		    i__5 = j;
-		    q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
-			    q__3.i = y[i__5].r * temp2.i + y[i__5].i *
-			    temp2.r;
-		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-		    r__1 = a[i__3].r + q__1.r;
-		    a[i__2].r = r__1, a[i__2].i = 0.f;
-		} else {
-		    i__2 = j + j * a_dim1;
-		    i__3 = j + j * a_dim1;
-		    r__1 = a[i__3].r;
-		    a[i__2].r = r__1, a[i__2].i = 0.f;
 		}
 /* L20: */
 	    }
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = jx;
-		i__3 = jy;
-		if (((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) || (((y[i__3]
-			.r != 0.f) || (y[i__3].i != 0.f)))) {
-		    r_cnjg(&q__2, &y[jy]);
-		    q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
-			    alpha->r * q__2.i + alpha->i * q__2.r;
-		    temp1.r = q__1.r, temp1.i = q__1.i;
-		    i__2 = jx;
-		    q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
-			    q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-			    .r;
-		    r_cnjg(&q__1, &q__2);
-		    temp2.r = q__1.r, temp2.i = q__1.i;
+		if (x[jx] != 0. || y[jy] != 0.) {
+		    temp1 = *alpha * y[jy];
+		    temp2 = *alpha * x[jx];
 		    ix = kx;
 		    iy = ky;
-		    i__2 = j - 1;
+		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * a_dim1;
-			i__4 = i__ + j * a_dim1;
-			i__5 = ix;
-			q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
-				q__3.i = x[i__5].r * temp1.i + x[i__5].i *
-				temp1.r;
-			q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i +
-				q__3.i;
-			i__6 = iy;
-			q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
-				q__4.i = y[i__6].r * temp2.i + y[i__6].i *
-				temp2.r;
-			q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
+				temp1 + y[iy] * temp2;
 			ix += *incx;
 			iy += *incy;
 /* L30: */
 		    }
-		    i__2 = j + j * a_dim1;
-		    i__3 = j + j * a_dim1;
-		    i__4 = jx;
-		    q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
-			    q__2.i = x[i__4].r * temp1.i + x[i__4].i *
-			    temp1.r;
-		    i__5 = jy;
-		    q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
-			    q__3.i = y[i__5].r * temp2.i + y[i__5].i *
-			    temp2.r;
-		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-		    r__1 = a[i__3].r + q__1.r;
-		    a[i__2].r = r__1, a[i__2].i = 0.f;
-		} else {
-		    i__2 = j + j * a_dim1;
-		    i__3 = j + j * a_dim1;
-		    r__1 = a[i__3].r;
-		    a[i__2].r = r__1, a[i__2].i = 0.f;
 		}
 		jx += *incx;
 		jy += *incy;
@@ -2482,116 +1938,34 @@ L20:
 	if (*incx == 1 && *incy == 1) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		i__3 = j;
-		if (((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) || (((y[i__3]
-			.r != 0.f) || (y[i__3].i != 0.f)))) {
-		    r_cnjg(&q__2, &y[j]);
-		    q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
-			    alpha->r * q__2.i + alpha->i * q__2.r;
-		    temp1.r = q__1.r, temp1.i = q__1.i;
-		    i__2 = j;
-		    q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
-			    q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-			    .r;
-		    r_cnjg(&q__1, &q__2);
-		    temp2.r = q__1.r, temp2.i = q__1.i;
-		    i__2 = j + j * a_dim1;
-		    i__3 = j + j * a_dim1;
-		    i__4 = j;
-		    q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
-			    q__2.i = x[i__4].r * temp1.i + x[i__4].i *
-			    temp1.r;
-		    i__5 = j;
-		    q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
-			    q__3.i = y[i__5].r * temp2.i + y[i__5].i *
-			    temp2.r;
-		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-		    r__1 = a[i__3].r + q__1.r;
-		    a[i__2].r = r__1, a[i__2].i = 0.f;
+		if (x[j] != 0. || y[j] != 0.) {
+		    temp1 = *alpha * y[j];
+		    temp2 = *alpha * x[j];
 		    i__2 = *n;
-		    for (i__ = j + 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * a_dim1;
-			i__4 = i__ + j * a_dim1;
-			i__5 = i__;
-			q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
-				q__3.i = x[i__5].r * temp1.i + x[i__5].i *
-				temp1.r;
-			q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i +
-				q__3.i;
-			i__6 = i__;
-			q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
-				q__4.i = y[i__6].r * temp2.i + y[i__6].i *
-				temp2.r;
-			q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
+				temp1 + y[i__] * temp2;
 /* L50: */
 		    }
-		} else {
-		    i__2 = j + j * a_dim1;
-		    i__3 = j + j * a_dim1;
-		    r__1 = a[i__3].r;
-		    a[i__2].r = r__1, a[i__2].i = 0.f;
 		}
 /* L60: */
 	    }
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = jx;
-		i__3 = jy;
-		if (((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) || (((y[i__3]
-			.r != 0.f) || (y[i__3].i != 0.f)))) {
-		    r_cnjg(&q__2, &y[jy]);
-		    q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
-			    alpha->r * q__2.i + alpha->i * q__2.r;
-		    temp1.r = q__1.r, temp1.i = q__1.i;
-		    i__2 = jx;
-		    q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
-			    q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-			    .r;
-		    r_cnjg(&q__1, &q__2);
-		    temp2.r = q__1.r, temp2.i = q__1.i;
-		    i__2 = j + j * a_dim1;
-		    i__3 = j + j * a_dim1;
-		    i__4 = jx;
-		    q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
-			    q__2.i = x[i__4].r * temp1.i + x[i__4].i *
-			    temp1.r;
-		    i__5 = jy;
-		    q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
-			    q__3.i = y[i__5].r * temp2.i + y[i__5].i *
-			    temp2.r;
-		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-		    r__1 = a[i__3].r + q__1.r;
-		    a[i__2].r = r__1, a[i__2].i = 0.f;
+		if (x[jx] != 0. || y[jy] != 0.) {
+		    temp1 = *alpha * y[jy];
+		    temp2 = *alpha * x[jx];
 		    ix = jx;
 		    iy = jy;
 		    i__2 = *n;
-		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
+				temp1 + y[iy] * temp2;
 			ix += *incx;
 			iy += *incy;
-			i__3 = i__ + j * a_dim1;
-			i__4 = i__ + j * a_dim1;
-			i__5 = ix;
-			q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
-				q__3.i = x[i__5].r * temp1.i + x[i__5].i *
-				temp1.r;
-			q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i +
-				q__3.i;
-			i__6 = iy;
-			q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
-				q__4.i = y[i__6].r * temp2.i + y[i__6].i *
-				temp2.r;
-			q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
 /* L70: */
 		    }
-		} else {
-		    i__2 = j + j * a_dim1;
-		    i__3 = j + j * a_dim1;
-		    r__1 = a[i__3].r;
-		    a[i__2].r = r__1, a[i__2].i = 0.f;
 		}
 		jx += *incx;
 		jy += *incy;
@@ -2602,26 +1976,21 @@ L20:
 
     return 0;
 
-/*     End of CHER2 . */
+/*     End of DSYR2 . */
 
-} /* cher2_ */
+} /* dsyr2_ */
 
-/* Subroutine */ int cher2k_(char *uplo, char *trans, integer *n, integer *k,
-	complex *alpha, complex *a, integer *lda, complex *b, integer *ldb,
-	real *beta, complex *c__, integer *ldc)
+/* Subroutine */ int dsyr2k_(char *uplo, char *trans, integer *n, integer *k,
+	doublereal *alpha, doublereal *a, integer *lda, doublereal *b,
+	integer *ldb, doublereal *beta, doublereal *c__, integer *ldc)
 {
     /* System generated locals */
     integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
-	    i__3, i__4, i__5, i__6, i__7;
-    real r__1;
-    complex q__1, q__2, q__3, q__4, q__5, q__6;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+	    i__3;
 
     /* Local variables */
     static integer i__, j, l, info;
-    static complex temp1, temp2;
+    static doublereal temp1, temp2;
     extern logical lsame_(char *, char *);
     static integer nrowa;
     static logical upper;
@@ -2632,17 +2001,17 @@ L20:
     Purpose
     =======
 
-    CHER2K  performs one of the hermitian rank 2k operations
+    DSYR2K  performs one of the symmetric rank 2k operations
 
-       C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C,
+       C := alpha*A*B' + alpha*B*A' + beta*C,
 
     or
 
-       C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C,
+       C := alpha*A'*B + alpha*B'*A + beta*C,
 
-    where  alpha and beta  are scalars with  beta  real,  C is an  n by n
-    hermitian matrix and  A and B  are  n by k matrices in the first case
-    and  k by n  matrices in the second case.
+    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+    and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+    matrices in the second case.
 
     Parameters
     ==========
@@ -2664,13 +2033,14 @@ L20:
              On entry,  TRANS  specifies the operation to be performed as
              follows:
 
-                TRANS = 'N' or 'n'    C := alpha*A*conjg( B' )          +
-                                           conjg( alpha )*B*conjg( A' ) +
-                                           beta*C.
+                TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +
+                                          beta*C.
 
-                TRANS = 'C' or 'c'    C := alpha*conjg( A' )*B          +
-                                           conjg( alpha )*conjg( B' )*A +
-                                           beta*C.
+                TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +
+                                          beta*C.
+
+                TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +
+                                          beta*C.
 
              Unchanged on exit.
 
@@ -2682,15 +2052,15 @@ L20:
     K      - INTEGER.
              On entry with  TRANS = 'N' or 'n',  K  specifies  the number
              of  columns  of the  matrices  A and B,  and on  entry  with
-             TRANS = 'C' or 'c',  K  specifies  the number of rows of the
-             matrices  A and B.  K must be at least zero.
+             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+             of rows of the matrices  A and B.  K must be at least  zero.
              Unchanged on exit.
 
-    ALPHA  - COMPLEX         .
+    ALPHA  - DOUBLE PRECISION.
              On entry, ALPHA specifies the scalar alpha.
              Unchanged on exit.
 
-    A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
              k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
              Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
              part of the array  A  must contain the matrix  A,  otherwise
@@ -2705,7 +2075,7 @@ L20:
              be at least  max( 1, k ).
              Unchanged on exit.
 
-    B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is
+    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
              k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
              Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
              part of the array  B  must contain the matrix  B,  otherwise
@@ -2720,26 +2090,23 @@ L20:
              be at least  max( 1, k ).
              Unchanged on exit.
 
-    BETA   - REAL            .
+    BETA   - DOUBLE PRECISION.
              On entry, BETA specifies the scalar beta.
              Unchanged on exit.
 
-    C      - COMPLEX          array of DIMENSION ( LDC, n ).
+    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
              Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
              upper triangular part of the array C must contain the upper
-             triangular part  of the  hermitian matrix  and the strictly
+             triangular part  of the  symmetric matrix  and the strictly
              lower triangular part of C is not referenced.  On exit, the
              upper triangular part of the array  C is overwritten by the
              upper triangular part of the updated matrix.
              Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
              lower triangular part of the array C must contain the lower
-             triangular part  of the  hermitian matrix  and the strictly
+             triangular part  of the  symmetric matrix  and the strictly
              upper triangular part of C is not referenced.  On exit, the
              lower triangular part of the array  C is overwritten by the
              lower triangular part of the updated matrix.
-             Note that the imaginary parts of the diagonal elements need
-             not be set,  they are assumed to be zero,  and on exit they
-             are set to zero.
 
     LDC    - INTEGER.
              On entry, LDC specifies the first dimension of C as declared
@@ -2750,15 +2117,13 @@ L20:
 
     Level 3 Blas routine.
 
+
     -- Written on 8-February-1989.
        Jack Dongarra, Argonne National Laboratory.
        Iain Duff, AERE Harwell.
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
-    -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
-       Ed Anderson, Cray Research Inc.
-
 
        Test the input parameters.
 */
@@ -2786,7 +2151,7 @@ L20:
     if (! upper && ! lsame_(uplo, "L")) {
 	info = 1;
     } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "C")) {
+	    "T") && ! lsame_(trans, "C")) {
 	info = 2;
     } else if (*n < 0) {
 	info = 3;
@@ -2800,28 +2165,26 @@ L20:
 	info = 12;
     }
     if (info != 0) {
-	xerbla_("CHER2K", &info);
+	xerbla_("DSYR2K", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if ((*n == 0) || (((alpha->r == 0.f && alpha->i == 0.f) || (*k == 0)) && *
-	    beta == 1.f)) {
+    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
 	return 0;
     }
 
 /*     And when  alpha.eq.zero. */
 
-    if (alpha->r == 0.f && alpha->i == 0.f) {
+    if (*alpha == 0.) {
 	if (upper) {
-	    if (*beta == 0.f) {
+	    if (*beta == 0.) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+			c__[i__ + j * c_dim1] = 0.;
 /* L10: */
 		    }
 /* L20: */
@@ -2829,30 +2192,21 @@ L20:
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j - 1;
+		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-				i__4].i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
 /* L30: */
 		    }
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = *beta * c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 /* L40: */
 		}
 	    }
 	} else {
-	    if (*beta == 0.f) {
+	    if (*beta == 0.) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = *n;
 		    for (i__ = j; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+			c__[i__ + j * c_dim1] = 0.;
 /* L50: */
 		    }
 /* L60: */
@@ -2860,17 +2214,9 @@ L20:
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = *beta * c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		    i__2 = *n;
-		    for (i__ = j + 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-				i__4].i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
 /* L70: */
 		    }
 /* L80: */
@@ -2884,90 +2230,36 @@ L20:
 
     if (lsame_(trans, "N")) {
 
-/*
-          Form  C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) +
-                     C.
-*/
+/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
 
 	if (upper) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.f) {
+		if (*beta == 0.) {
 		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+			c__[i__ + j * c_dim1] = 0.;
 /* L90: */
 		    }
-		} else if (*beta != 1.f) {
-		    i__2 = j - 1;
+		} else if (*beta != 1.) {
+		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-				i__4].i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
 /* L100: */
 		    }
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = *beta * c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
-		} else {
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		}
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
-		    i__3 = j + l * a_dim1;
-		    i__4 = j + l * b_dim1;
-		    if (((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) || (((b[
-			    i__4].r != 0.f) || (b[i__4].i != 0.f)))) {
-			r_cnjg(&q__2, &b[j + l * b_dim1]);
-			q__1.r = alpha->r * q__2.r - alpha->i * q__2.i,
-				q__1.i = alpha->r * q__2.i + alpha->i *
-				q__2.r;
-			temp1.r = q__1.r, temp1.i = q__1.i;
-			i__3 = j + l * a_dim1;
-			q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i,
-				q__2.i = alpha->r * a[i__3].i + alpha->i * a[
-				i__3].r;
-			r_cnjg(&q__1, &q__2);
-			temp2.r = q__1.r, temp2.i = q__1.i;
-			i__3 = j - 1;
+		    if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) {
+			temp1 = *alpha * b[j + l * b_dim1];
+			temp2 = *alpha * a[j + l * a_dim1];
+			i__3 = j;
 			for (i__ = 1; i__ <= i__3; ++i__) {
-			    i__4 = i__ + j * c_dim1;
-			    i__5 = i__ + j * c_dim1;
-			    i__6 = i__ + l * a_dim1;
-			    q__3.r = a[i__6].r * temp1.r - a[i__6].i *
-				    temp1.i, q__3.i = a[i__6].r * temp1.i + a[
-				    i__6].i * temp1.r;
-			    q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
-				    .i + q__3.i;
-			    i__7 = i__ + l * b_dim1;
-			    q__4.r = b[i__7].r * temp2.r - b[i__7].i *
-				    temp2.i, q__4.i = b[i__7].r * temp2.i + b[
-				    i__7].i * temp2.r;
-			    q__1.r = q__2.r + q__4.r, q__1.i = q__2.i +
-				    q__4.i;
-			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
+				    i__ + l * a_dim1] * temp1 + b[i__ + l *
+				    b_dim1] * temp2;
 /* L110: */
 			}
-			i__3 = j + j * c_dim1;
-			i__4 = j + j * c_dim1;
-			i__5 = j + l * a_dim1;
-			q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i,
-				q__2.i = a[i__5].r * temp1.i + a[i__5].i *
-				temp1.r;
-			i__6 = j + l * b_dim1;
-			q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i,
-				q__3.i = b[i__6].r * temp2.i + b[i__6].i *
-				temp2.r;
-			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-			r__1 = c__[i__4].r + q__1.r;
-			c__[i__3].r = r__1, c__[i__3].i = 0.f;
 		    }
 /* L120: */
 		}
@@ -2976,82 +2268,31 @@ L20:
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.f) {
+		if (*beta == 0.) {
 		    i__2 = *n;
 		    for (i__ = j; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+			c__[i__ + j * c_dim1] = 0.;
 /* L140: */
 		    }
-		} else if (*beta != 1.f) {
+		} else if (*beta != 1.) {
 		    i__2 = *n;
-		    for (i__ = j + 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-				i__4].i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
 /* L150: */
 		    }
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = *beta * c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
-		} else {
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		}
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
-		    i__3 = j + l * a_dim1;
-		    i__4 = j + l * b_dim1;
-		    if (((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) || (((b[
-			    i__4].r != 0.f) || (b[i__4].i != 0.f)))) {
-			r_cnjg(&q__2, &b[j + l * b_dim1]);
-			q__1.r = alpha->r * q__2.r - alpha->i * q__2.i,
-				q__1.i = alpha->r * q__2.i + alpha->i *
-				q__2.r;
-			temp1.r = q__1.r, temp1.i = q__1.i;
-			i__3 = j + l * a_dim1;
-			q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i,
-				q__2.i = alpha->r * a[i__3].i + alpha->i * a[
-				i__3].r;
-			r_cnjg(&q__1, &q__2);
-			temp2.r = q__1.r, temp2.i = q__1.i;
+		    if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) {
+			temp1 = *alpha * b[j + l * b_dim1];
+			temp2 = *alpha * a[j + l * a_dim1];
 			i__3 = *n;
-			for (i__ = j + 1; i__ <= i__3; ++i__) {
-			    i__4 = i__ + j * c_dim1;
-			    i__5 = i__ + j * c_dim1;
-			    i__6 = i__ + l * a_dim1;
-			    q__3.r = a[i__6].r * temp1.r - a[i__6].i *
-				    temp1.i, q__3.i = a[i__6].r * temp1.i + a[
-				    i__6].i * temp1.r;
-			    q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
-				    .i + q__3.i;
-			    i__7 = i__ + l * b_dim1;
-			    q__4.r = b[i__7].r * temp2.r - b[i__7].i *
-				    temp2.i, q__4.i = b[i__7].r * temp2.i + b[
-				    i__7].i * temp2.r;
-			    q__1.r = q__2.r + q__4.r, q__1.i = q__2.i +
-				    q__4.i;
-			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+			for (i__ = j; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
+				    i__ + l * a_dim1] * temp1 + b[i__ + l *
+				    b_dim1] * temp2;
 /* L160: */
 			}
-			i__3 = j + j * c_dim1;
-			i__4 = j + j * c_dim1;
-			i__5 = j + l * a_dim1;
-			q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i,
-				q__2.i = a[i__5].r * temp1.i + a[i__5].i *
-				temp1.r;
-			i__6 = j + l * b_dim1;
-			q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i,
-				q__3.i = b[i__6].r * temp2.i + b[i__6].i *
-				temp2.r;
-			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-			r__1 = c__[i__4].r + q__1.r;
-			c__[i__3].r = r__1, c__[i__3].i = 0.f;
 		    }
 /* L170: */
 		}
@@ -3060,96 +2301,27 @@ L20:
 	}
     } else {
 
-/*
-          Form  C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A +
-                     C.
-*/
+/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
 
 	if (upper) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = j;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp1.r = 0.f, temp1.i = 0.f;
-		    temp2.r = 0.f, temp2.i = 0.f;
+		    temp1 = 0.;
+		    temp2 = 0.;
 		    i__3 = *k;
 		    for (l = 1; l <= i__3; ++l) {
-			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-			i__4 = l + j * b_dim1;
-			q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
-				q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
-				.r;
-			q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
-			temp1.r = q__1.r, temp1.i = q__1.i;
-			r_cnjg(&q__3, &b[l + i__ * b_dim1]);
-			i__4 = l + j * a_dim1;
-			q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i,
-				q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
-				.r;
-			q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-			temp2.r = q__1.r, temp2.i = q__1.i;
+			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
 /* L190: */
 		    }
-		    if (i__ == j) {
-			if (*beta == 0.f) {
-			    i__3 = j + j * c_dim1;
-			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
-				    q__2.i = alpha->r * temp1.i + alpha->i *
-				    temp1.r;
-			    r_cnjg(&q__4, alpha);
-			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
-				    q__3.i = q__4.r * temp2.i + q__4.i *
-				    temp2.r;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    r__1 = q__1.r;
-			    c__[i__3].r = r__1, c__[i__3].i = 0.f;
-			} else {
-			    i__3 = j + j * c_dim1;
-			    i__4 = j + j * c_dim1;
-			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
-				    q__2.i = alpha->r * temp1.i + alpha->i *
-				    temp1.r;
-			    r_cnjg(&q__4, alpha);
-			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
-				    q__3.i = q__4.r * temp2.i + q__4.i *
-				    temp2.r;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    r__1 = *beta * c__[i__4].r + q__1.r;
-			    c__[i__3].r = r__1, c__[i__3].i = 0.f;
-			}
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
+				temp2;
 		    } else {
-			if (*beta == 0.f) {
-			    i__3 = i__ + j * c_dim1;
-			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
-				    q__2.i = alpha->r * temp1.i + alpha->i *
-				    temp1.r;
-			    r_cnjg(&q__4, alpha);
-			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
-				    q__3.i = q__4.r * temp2.i + q__4.i *
-				    temp2.r;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-			} else {
-			    i__3 = i__ + j * c_dim1;
-			    i__4 = i__ + j * c_dim1;
-			    q__3.r = *beta * c__[i__4].r, q__3.i = *beta *
-				    c__[i__4].i;
-			    q__4.r = alpha->r * temp1.r - alpha->i * temp1.i,
-				    q__4.i = alpha->r * temp1.i + alpha->i *
-				    temp1.r;
-			    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i +
-				    q__4.i;
-			    r_cnjg(&q__6, alpha);
-			    q__5.r = q__6.r * temp2.r - q__6.i * temp2.i,
-				    q__5.i = q__6.r * temp2.i + q__6.i *
-				    temp2.r;
-			    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i +
-				    q__5.i;
-			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-			}
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
+				+ *alpha * temp1 + *alpha * temp2;
 		    }
 /* L200: */
 		}
@@ -3160,86 +2332,20 @@ L20:
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *n;
 		for (i__ = j; i__ <= i__2; ++i__) {
-		    temp1.r = 0.f, temp1.i = 0.f;
-		    temp2.r = 0.f, temp2.i = 0.f;
+		    temp1 = 0.;
+		    temp2 = 0.;
 		    i__3 = *k;
 		    for (l = 1; l <= i__3; ++l) {
-			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-			i__4 = l + j * b_dim1;
-			q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
-				q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
-				.r;
-			q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
-			temp1.r = q__1.r, temp1.i = q__1.i;
-			r_cnjg(&q__3, &b[l + i__ * b_dim1]);
-			i__4 = l + j * a_dim1;
-			q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i,
-				q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
-				.r;
-			q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-			temp2.r = q__1.r, temp2.i = q__1.i;
+			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
 /* L220: */
 		    }
-		    if (i__ == j) {
-			if (*beta == 0.f) {
-			    i__3 = j + j * c_dim1;
-			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
-				    q__2.i = alpha->r * temp1.i + alpha->i *
-				    temp1.r;
-			    r_cnjg(&q__4, alpha);
-			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
-				    q__3.i = q__4.r * temp2.i + q__4.i *
-				    temp2.r;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    r__1 = q__1.r;
-			    c__[i__3].r = r__1, c__[i__3].i = 0.f;
-			} else {
-			    i__3 = j + j * c_dim1;
-			    i__4 = j + j * c_dim1;
-			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
-				    q__2.i = alpha->r * temp1.i + alpha->i *
-				    temp1.r;
-			    r_cnjg(&q__4, alpha);
-			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
-				    q__3.i = q__4.r * temp2.i + q__4.i *
-				    temp2.r;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    r__1 = *beta * c__[i__4].r + q__1.r;
-			    c__[i__3].r = r__1, c__[i__3].i = 0.f;
-			}
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
+				temp2;
 		    } else {
-			if (*beta == 0.f) {
-			    i__3 = i__ + j * c_dim1;
-			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
-				    q__2.i = alpha->r * temp1.i + alpha->i *
-				    temp1.r;
-			    r_cnjg(&q__4, alpha);
-			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
-				    q__3.i = q__4.r * temp2.i + q__4.i *
-				    temp2.r;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-			} else {
-			    i__3 = i__ + j * c_dim1;
-			    i__4 = i__ + j * c_dim1;
-			    q__3.r = *beta * c__[i__4].r, q__3.i = *beta *
-				    c__[i__4].i;
-			    q__4.r = alpha->r * temp1.r - alpha->i * temp1.i,
-				    q__4.i = alpha->r * temp1.i + alpha->i *
-				    temp1.r;
-			    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i +
-				    q__4.i;
-			    r_cnjg(&q__6, alpha);
-			    q__5.r = q__6.r * temp2.r - q__6.i * temp2.i,
-				    q__5.i = q__6.r * temp2.i + q__6.i *
-				    temp2.r;
-			    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i +
-				    q__5.i;
-			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-			}
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
+				+ *alpha * temp1 + *alpha * temp2;
 		    }
 /* L230: */
 		}
@@ -3250,29 +2356,22 @@ L20:
 
     return 0;
 
-/*     End of CHER2K. */
+/*     End of DSYR2K. */
 
-} /* cher2k_ */
+} /* dsyr2k_ */
 
-/* Subroutine */ int cherk_(char *uplo, char *trans, integer *n, integer *k,
-	real *alpha, complex *a, integer *lda, real *beta, complex *c__,
-	integer *ldc)
+/* Subroutine */ int dsyrk_(char *uplo, char *trans, integer *n, integer *k,
+	doublereal *alpha, doublereal *a, integer *lda, doublereal *beta,
+	doublereal *c__, integer *ldc)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5,
-	    i__6;
-    real r__1;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__, j, l, info;
-    static complex temp;
+    static doublereal temp;
     extern logical lsame_(char *, char *);
     static integer nrowa;
-    static real rtemp;
     static logical upper;
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
@@ -3281,17 +2380,17 @@ L20:
     Purpose
     =======
 
-    CHERK  performs one of the hermitian rank k operations
+    DSYRK  performs one of the symmetric rank k operations
 
-       C := alpha*A*conjg( A' ) + beta*C,
+       C := alpha*A*A' + beta*C,
 
     or
 
-       C := alpha*conjg( A' )*A + beta*C,
+       C := alpha*A'*A + beta*C,
 
-    where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
-    matrix and  A  is an  n by k  matrix in the  first case and a  k by n
-    matrix in the second case.
+    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+    and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+    in the second case.
 
     Parameters
     ==========
@@ -3313,9 +2412,11 @@ L20:
              On entry,  TRANS  specifies the operation to be performed as
              follows:
 
-                TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C.
+                TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
 
-                TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C.
+                TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+
+                TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C.
 
              Unchanged on exit.
 
@@ -3327,15 +2428,15 @@ L20:
     K      - INTEGER.
              On entry with  TRANS = 'N' or 'n',  K  specifies  the number
              of  columns   of  the   matrix   A,   and  on   entry   with
-             TRANS = 'C' or 'c',  K  specifies  the number of rows of the
-             matrix A.  K must be at least zero.
+             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+             of rows of the matrix  A.  K must be at least zero.
              Unchanged on exit.
 
-    ALPHA  - REAL            .
+    ALPHA  - DOUBLE PRECISION.
              On entry, ALPHA specifies the scalar alpha.
              Unchanged on exit.
 
-    A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
              k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
              Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
              part of the array  A  must contain the matrix  A,  otherwise
@@ -3350,26 +2451,23 @@ L20:
              be at least  max( 1, k ).
              Unchanged on exit.
 
-    BETA   - REAL            .
+    BETA   - DOUBLE PRECISION.
              On entry, BETA specifies the scalar beta.
              Unchanged on exit.
 
-    C      - COMPLEX          array of DIMENSION ( LDC, n ).
+    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
              Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
              upper triangular part of the array C must contain the upper
-             triangular part  of the  hermitian matrix  and the strictly
+             triangular part  of the  symmetric matrix  and the strictly
              lower triangular part of C is not referenced.  On exit, the
              upper triangular part of the array  C is overwritten by the
              upper triangular part of the updated matrix.
              Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
              lower triangular part of the array C must contain the lower
-             triangular part  of the  hermitian matrix  and the strictly
+             triangular part  of the  symmetric matrix  and the strictly
              upper triangular part of C is not referenced.  On exit, the
              lower triangular part of the array  C is overwritten by the
              lower triangular part of the updated matrix.
-             Note that the imaginary parts of the diagonal elements need
-             not be set,  they are assumed to be zero,  and on exit they
-             are set to zero.
 
     LDC    - INTEGER.
              On entry, LDC specifies the first dimension of C as declared
@@ -3386,9 +2484,6 @@ L20:
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
-    -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
-       Ed Anderson, Cray Research Inc.
-
 
        Test the input parameters.
 */
@@ -3413,7 +2508,7 @@ L20:
     if (! upper && ! lsame_(uplo, "L")) {
 	info = 1;
     } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "C")) {
+	    "T") && ! lsame_(trans, "C")) {
 	info = 2;
     } else if (*n < 0) {
 	info = 3;
@@ -3425,27 +2520,26 @@ L20:
 	info = 10;
     }
     if (info != 0) {
-	xerbla_("CHERK ", &info);
+	xerbla_("DSYRK ", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if ((*n == 0) || (((*alpha == 0.f) || (*k == 0)) && *beta == 1.f)) {
+    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
 	return 0;
     }
 
 /*     And when  alpha.eq.zero. */
 
-    if (*alpha == 0.f) {
+    if (*alpha == 0.) {
 	if (upper) {
-	    if (*beta == 0.f) {
+	    if (*beta == 0.) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+			c__[i__ + j * c_dim1] = 0.;
 /* L10: */
 		    }
 /* L20: */
@@ -3453,30 +2547,21 @@ L20:
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j - 1;
+		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-				i__4].i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
 /* L30: */
 		    }
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = *beta * c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 /* L40: */
 		}
 	    }
 	} else {
-	    if (*beta == 0.f) {
+	    if (*beta == 0.) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = *n;
 		    for (i__ = j; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+			c__[i__ + j * c_dim1] = 0.;
 /* L50: */
 		    }
 /* L60: */
@@ -3484,17 +2569,9 @@ L20:
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = *beta * c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		    i__2 = *n;
-		    for (i__ = j + 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-				i__4].i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
 /* L70: */
 		    }
 /* L80: */
@@ -3508,66 +2585,34 @@ L20:
 
     if (lsame_(trans, "N")) {
 
-/*        Form  C := alpha*A*conjg( A' ) + beta*C. */
+/*        Form  C := alpha*A*A' + beta*C. */
 
 	if (upper) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.f) {
+		if (*beta == 0.) {
 		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+			c__[i__ + j * c_dim1] = 0.;
 /* L90: */
 		    }
-		} else if (*beta != 1.f) {
-		    i__2 = j - 1;
+		} else if (*beta != 1.) {
+		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-				i__4].i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
 /* L100: */
 		    }
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = *beta * c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
-		} else {
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		}
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
-		    i__3 = j + l * a_dim1;
-		    if ((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) {
-			r_cnjg(&q__2, &a[j + l * a_dim1]);
-			q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
-			i__3 = j - 1;
+		    if (a[j + l * a_dim1] != 0.) {
+			temp = *alpha * a[j + l * a_dim1];
+			i__3 = j;
 			for (i__ = 1; i__ <= i__3; ++i__) {
-			    i__4 = i__ + j * c_dim1;
-			    i__5 = i__ + j * c_dim1;
-			    i__6 = i__ + l * a_dim1;
-			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
-				    q__2.i = temp.r * a[i__6].i + temp.i * a[
-				    i__6].r;
-			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-				    .i + q__2.i;
-			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
 /* L110: */
 			}
-			i__3 = j + j * c_dim1;
-			i__4 = j + j * c_dim1;
-			i__5 = i__ + l * a_dim1;
-			q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
-				q__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
-				.r;
-			r__1 = c__[i__4].r + q__1.r;
-			c__[i__3].r = r__1, c__[i__3].i = 0.f;
 		    }
 /* L120: */
 		}
@@ -3576,59 +2621,27 @@ L20:
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.f) {
+		if (*beta == 0.) {
 		    i__2 = *n;
 		    for (i__ = j; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+			c__[i__ + j * c_dim1] = 0.;
 /* L140: */
 		    }
-		} else if (*beta != 1.f) {
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = *beta * c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		} else if (*beta != 1.) {
 		    i__2 = *n;
-		    for (i__ = j + 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-				i__4].i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
 /* L150: */
 		    }
-		} else {
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		}
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
-		    i__3 = j + l * a_dim1;
-		    if ((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) {
-			r_cnjg(&q__2, &a[j + l * a_dim1]);
-			q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
-			i__3 = j + j * c_dim1;
-			i__4 = j + j * c_dim1;
-			i__5 = j + l * a_dim1;
-			q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
-				q__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
-				.r;
-			r__1 = c__[i__4].r + q__1.r;
-			c__[i__3].r = r__1, c__[i__3].i = 0.f;
+		    if (a[j + l * a_dim1] != 0.) {
+			temp = *alpha * a[j + l * a_dim1];
 			i__3 = *n;
-			for (i__ = j + 1; i__ <= i__3; ++i__) {
-			    i__4 = i__ + j * c_dim1;
-			    i__5 = i__ + j * c_dim1;
-			    i__6 = i__ + l * a_dim1;
-			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
-				    q__2.i = temp.r * a[i__6].i + temp.i * a[
-				    i__6].r;
-			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-				    .i + q__2.i;
-			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+			for (i__ = j; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
 /* L160: */
 			}
 		    }
@@ -3639,361 +2652,89 @@ L20:
 	}
     } else {
 
-/*        Form  C := alpha*conjg( A' )*A + beta*C. */
+/*        Form  C := alpha*A'*A + beta*C. */
 
 	if (upper) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j - 1;
+		i__2 = j;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp.r = 0.f, temp.i = 0.f;
+		    temp = 0.;
 		    i__3 = *k;
 		    for (l = 1; l <= i__3; ++l) {
-			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-			i__4 = l + j * a_dim1;
-			q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i,
-				q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
-				.r;
-			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
+			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
 /* L190: */
 		    }
-		    if (*beta == 0.f) {
-			i__3 = i__ + j * c_dim1;
-			q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
 		    } else {
-			i__3 = i__ + j * c_dim1;
-			q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i;
-			i__4 = i__ + j * c_dim1;
-			q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[
-				i__4].i;
-			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
 		    }
 /* L200: */
 		}
-		rtemp = 0.f;
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    r_cnjg(&q__3, &a[l + j * a_dim1]);
-		    i__3 = l + j * a_dim1;
-		    q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i =
-			     q__3.r * a[i__3].i + q__3.i * a[i__3].r;
-		    q__1.r = rtemp + q__2.r, q__1.i = q__2.i;
-		    rtemp = q__1.r;
 /* L210: */
-		}
-		if (*beta == 0.f) {
-		    i__2 = j + j * c_dim1;
-		    r__1 = *alpha * rtemp;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
-		} else {
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = *alpha * rtemp + *beta * c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
-		}
-/* L220: */
 	    }
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		rtemp = 0.f;
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    r_cnjg(&q__3, &a[l + j * a_dim1]);
-		    i__3 = l + j * a_dim1;
-		    q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i =
-			     q__3.r * a[i__3].i + q__3.i * a[i__3].r;
-		    q__1.r = rtemp + q__2.r, q__1.i = q__2.i;
-		    rtemp = q__1.r;
-/* L230: */
-		}
-		if (*beta == 0.f) {
-		    i__2 = j + j * c_dim1;
-		    r__1 = *alpha * rtemp;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
-		} else {
-		    i__2 = j + j * c_dim1;
-		    i__3 = j + j * c_dim1;
-		    r__1 = *alpha * rtemp + *beta * c__[i__3].r;
-		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
-		}
 		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    temp.r = 0.f, temp.i = 0.f;
+		for (i__ = j; i__ <= i__2; ++i__) {
+		    temp = 0.;
 		    i__3 = *k;
 		    for (l = 1; l <= i__3; ++l) {
-			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-			i__4 = l + j * a_dim1;
-			q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i,
-				q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
-				.r;
-			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-			temp.r = q__1.r, temp.i = q__1.i;
-/* L240: */
+			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
+/* L220: */
 		    }
-		    if (*beta == 0.f) {
-			i__3 = i__ + j * c_dim1;
-			q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
 		    } else {
-			i__3 = i__ + j * c_dim1;
-			q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i;
-			i__4 = i__ + j * c_dim1;
-			q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[
-				i__4].i;
-			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
 		    }
-/* L250: */
+/* L230: */
 		}
-/* L260: */
+/* L240: */
 	    }
 	}
     }
 
     return 0;
 
-/*     End of CHERK . */
-
-} /* cherk_ */
-
-/* Subroutine */ int cscal_(integer *n, complex *ca, complex *cx, integer *
-	incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    complex q__1;
-
-    /* Local variables */
-    static integer i__, nincx;
-
-
-/*
-       scales a vector by a constant.
-       jack dongarra, linpack,  3/11/78.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --cx;
-
-    /* Function Body */
-    if ((*n <= 0) || (*incx <= 0)) {
-	return 0;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	i__3 = i__;
-	i__4 = i__;
-	q__1.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__1.i = ca->r * cx[
-		i__4].i + ca->i * cx[i__4].r;
-	cx[i__3].r = q__1.r, cx[i__3].i = q__1.i;
-/* L10: */
-    }
-    return 0;
-
-/*        code for increment equal to 1 */
-
-L20:
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	i__1 = i__;
-	i__3 = i__;
-	q__1.r = ca->r * cx[i__3].r - ca->i * cx[i__3].i, q__1.i = ca->r * cx[
-		i__3].i + ca->i * cx[i__3].r;
-	cx[i__1].r = q__1.r, cx[i__1].i = q__1.i;
-/* L30: */
-    }
-    return 0;
-} /* cscal_ */
-
-/* Subroutine */ int csscal_(integer *n, real *sa, complex *cx, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    real r__1, r__2;
-    complex q__1;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-
-    /* Local variables */
-    static integer i__, nincx;
-
-
-/*
-       scales a complex vector by a real constant.
-       jack dongarra, linpack, 3/11/78.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --cx;
-
-    /* Function Body */
-    if ((*n <= 0) || (*incx <= 0)) {
-	return 0;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	i__3 = i__;
-	i__4 = i__;
-	r__1 = *sa * cx[i__4].r;
-	r__2 = *sa * r_imag(&cx[i__]);
-	q__1.r = r__1, q__1.i = r__2;
-	cx[i__3].r = q__1.r, cx[i__3].i = q__1.i;
-/* L10: */
-    }
-    return 0;
-
-/*        code for increment equal to 1 */
-
-L20:
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	i__1 = i__;
-	i__3 = i__;
-	r__1 = *sa * cx[i__3].r;
-	r__2 = *sa * r_imag(&cx[i__]);
-	q__1.r = r__1, q__1.i = r__2;
-	cx[i__1].r = q__1.r, cx[i__1].i = q__1.i;
-/* L30: */
-    }
-    return 0;
-} /* csscal_ */
-
-/* Subroutine */ int cswap_(integer *n, complex *cx, integer *incx, complex *
-	cy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static complex ctemp;
-
-
-/*
-       interchanges two vectors.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-         code for unequal increments or equal increments not equal
-           to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = ix;
-	ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i;
-	i__2 = ix;
-	i__3 = iy;
-	cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i;
-	i__2 = iy;
-	cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i;
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
+/*     End of DSYRK . */
 
-/*       code for both increments equal to 1 */
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i;
-	i__2 = i__;
-	i__3 = i__;
-	cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i;
-	i__2 = i__;
-	cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i;
-/* L30: */
-    }
-    return 0;
-} /* cswap_ */
+} /* dsyrk_ */
 
-/* Subroutine */ int ctrmm_(char *side, char *uplo, char *transa, char *diag,
-	integer *m, integer *n, complex *alpha, complex *a, integer *lda,
-	complex *b, integer *ldb)
+/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
+	lda, doublereal *b, integer *ldb)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5,
-	    i__6;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__, j, k, info;
-    static complex temp;
-    extern logical lsame_(char *, char *);
+    static doublereal temp;
     static logical lside;
+    extern logical lsame_(char *, char *);
     static integer nrowa;
     static logical upper;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical noconj, nounit;
+    static logical nounit;
 
 
 /*
     Purpose
     =======
 
-    CTRMM  performs one of the matrix-matrix operations
+    DTRMM  performs one of the matrix-matrix operations
 
-       B := alpha*op( A )*B,   or   B := alpha*B*op( A )
+       B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
 
     where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
     non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
 
-       op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+       op( A ) = A   or   op( A ) = A'.
 
     Parameters
     ==========
@@ -4026,7 +2767,7 @@ L20:
 
                 TRANSA = 'T' or 't'   op( A ) = A'.
 
-                TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+                TRANSA = 'C' or 'c'   op( A ) = A'.
 
              Unchanged on exit.
 
@@ -4051,13 +2792,13 @@ L20:
              at least zero.
              Unchanged on exit.
 
-    ALPHA  - COMPLEX         .
+    ALPHA  - DOUBLE PRECISION.
              On entry,  ALPHA specifies the scalar  alpha. When  alpha is
              zero then  A is not referenced and  B need not be set before
              entry.
              Unchanged on exit.
 
-    A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
              when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
              Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
              upper triangular part of the array  A must contain the upper
@@ -4078,7 +2819,7 @@ L20:
              then LDA must be at least max( 1, n ).
              Unchanged on exit.
 
-    B      - COMPLEX          array of DIMENSION ( LDB, n ).
+    B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
              Before entry,  the leading  m by n part of the array  B must
              contain the matrix  B,  and  on exit  is overwritten  by the
              transformed matrix.
@@ -4117,7 +2858,6 @@ L20:
     } else {
 	nrowa = *n;
     }
-    noconj = lsame_(transa, "T");
     nounit = lsame_(diag, "N");
     upper = lsame_(uplo, "U");
 
@@ -4142,7 +2882,7 @@ L20:
 	info = 11;
     }
     if (info != 0) {
-	xerbla_("CTRMM ", &info);
+	xerbla_("DTRMM ", &info);
 	return 0;
     }
 
@@ -4154,13 +2894,12 @@ L20:
 
 /*     And when  alpha.eq.zero. */
 
-    if (alpha->r == 0.f && alpha->i == 0.f) {
+    if (*alpha == 0.) {
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = *m;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * b_dim1;
-		b[i__3].r = 0.f, b[i__3].i = 0.f;
+		b[i__ + j * b_dim1] = 0.;
 /* L10: */
 	    }
 /* L20: */
@@ -4180,35 +2919,18 @@ L20:
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = *m;
 		    for (k = 1; k <= i__2; ++k) {
-			i__3 = k + j * b_dim1;
-			if ((b[i__3].r != 0.f) || (b[i__3].i != 0.f)) {
-			    i__3 = k + j * b_dim1;
-			    q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-				    .i, q__1.i = alpha->r * b[i__3].i +
-				    alpha->i * b[i__3].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
+			if (b[k + j * b_dim1] != 0.) {
+			    temp = *alpha * b[k + j * b_dim1];
 			    i__3 = k - 1;
 			    for (i__ = 1; i__ <= i__3; ++i__) {
-				i__4 = i__ + j * b_dim1;
-				i__5 = i__ + j * b_dim1;
-				i__6 = i__ + k * a_dim1;
-				q__2.r = temp.r * a[i__6].r - temp.i * a[i__6]
-					.i, q__2.i = temp.r * a[i__6].i +
-					temp.i * a[i__6].r;
-				q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
-					.i + q__2.i;
-				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+				b[i__ + j * b_dim1] += temp * a[i__ + k *
+					a_dim1];
 /* L30: */
 			    }
 			    if (nounit) {
-				i__3 = k + k * a_dim1;
-				q__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
-					.i, q__1.i = temp.r * a[i__3].i +
-					temp.i * a[i__3].r;
-				temp.r = q__1.r, temp.i = q__1.i;
+				temp *= a[k + k * a_dim1];
 			    }
-			    i__3 = k + j * b_dim1;
-			    b[i__3].r = temp.r, b[i__3].i = temp.i;
+			    b[k + j * b_dim1] = temp;
 			}
 /* L40: */
 		    }
@@ -4218,35 +2940,16 @@ L20:
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    for (k = *m; k >= 1; --k) {
-			i__2 = k + j * b_dim1;
-			if ((b[i__2].r != 0.f) || (b[i__2].i != 0.f)) {
-			    i__2 = k + j * b_dim1;
-			    q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2]
-				    .i, q__1.i = alpha->r * b[i__2].i +
-				    alpha->i * b[i__2].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			    i__2 = k + j * b_dim1;
-			    b[i__2].r = temp.r, b[i__2].i = temp.i;
+			if (b[k + j * b_dim1] != 0.) {
+			    temp = *alpha * b[k + j * b_dim1];
+			    b[k + j * b_dim1] = temp;
 			    if (nounit) {
-				i__2 = k + j * b_dim1;
-				i__3 = k + j * b_dim1;
-				i__4 = k + k * a_dim1;
-				q__1.r = b[i__3].r * a[i__4].r - b[i__3].i *
-					a[i__4].i, q__1.i = b[i__3].r * a[
-					i__4].i + b[i__3].i * a[i__4].r;
-				b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+				b[k + j * b_dim1] *= a[k + k * a_dim1];
 			    }
 			    i__2 = *m;
 			    for (i__ = k + 1; i__ <= i__2; ++i__) {
-				i__3 = i__ + j * b_dim1;
-				i__4 = i__ + j * b_dim1;
-				i__5 = i__ + k * a_dim1;
-				q__2.r = temp.r * a[i__5].r - temp.i * a[i__5]
-					.i, q__2.i = temp.r * a[i__5].i +
-					temp.i * a[i__5].r;
-				q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
-					.i + q__2.i;
-				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+				b[i__ + j * b_dim1] += temp * a[i__ + k *
+					a_dim1];
 /* L60: */
 			    }
 			}
@@ -4257,120 +2960,44 @@ L20:
 	    }
 	} else {
 
-/*           Form  B := alpha*A'*B   or   B := alpha*conjg( A' )*B. */
+/*           Form  B := alpha*A'*B. */
 
 	    if (upper) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    for (i__ = *m; i__ >= 1; --i__) {
-			i__2 = i__ + j * b_dim1;
-			temp.r = b[i__2].r, temp.i = b[i__2].i;
-			if (noconj) {
-			    if (nounit) {
-				i__2 = i__ + i__ * a_dim1;
-				q__1.r = temp.r * a[i__2].r - temp.i * a[i__2]
-					.i, q__1.i = temp.r * a[i__2].i +
-					temp.i * a[i__2].r;
-				temp.r = q__1.r, temp.i = q__1.i;
-			    }
-			    i__2 = i__ - 1;
-			    for (k = 1; k <= i__2; ++k) {
-				i__3 = k + i__ * a_dim1;
-				i__4 = k + j * b_dim1;
-				q__2.r = a[i__3].r * b[i__4].r - a[i__3].i *
-					b[i__4].i, q__2.i = a[i__3].r * b[
-					i__4].i + a[i__3].i * b[i__4].r;
-				q__1.r = temp.r + q__2.r, q__1.i = temp.i +
-					q__2.i;
-				temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-			    }
-			} else {
-			    if (nounit) {
-				r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
-				q__1.r = temp.r * q__2.r - temp.i * q__2.i,
-					q__1.i = temp.r * q__2.i + temp.i *
-					q__2.r;
-				temp.r = q__1.r, temp.i = q__1.i;
-			    }
-			    i__2 = i__ - 1;
-			    for (k = 1; k <= i__2; ++k) {
-				r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-				i__3 = k + j * b_dim1;
-				q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3]
-					.i, q__2.i = q__3.r * b[i__3].i +
-					q__3.i * b[i__3].r;
-				q__1.r = temp.r + q__2.r, q__1.i = temp.i +
-					q__2.i;
-				temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-			    }
+			temp = b[i__ + j * b_dim1];
+			if (nounit) {
+			    temp *= a[i__ + i__ * a_dim1];
 			}
-			i__2 = i__ + j * b_dim1;
-			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__1.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L110: */
+			i__2 = i__ - 1;
+			for (k = 1; k <= i__2; ++k) {
+			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L90: */
+			}
+			b[i__ + j * b_dim1] = *alpha * temp;
+/* L100: */
 		    }
-/* L120: */
+/* L110: */
 		}
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * b_dim1;
-			temp.r = b[i__3].r, temp.i = b[i__3].i;
-			if (noconj) {
-			    if (nounit) {
-				i__3 = i__ + i__ * a_dim1;
-				q__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
-					.i, q__1.i = temp.r * a[i__3].i +
-					temp.i * a[i__3].r;
-				temp.r = q__1.r, temp.i = q__1.i;
-			    }
-			    i__3 = *m;
-			    for (k = i__ + 1; k <= i__3; ++k) {
-				i__4 = k + i__ * a_dim1;
-				i__5 = k + j * b_dim1;
-				q__2.r = a[i__4].r * b[i__5].r - a[i__4].i *
-					b[i__5].i, q__2.i = a[i__4].r * b[
-					i__5].i + a[i__4].i * b[i__5].r;
-				q__1.r = temp.r + q__2.r, q__1.i = temp.i +
-					q__2.i;
-				temp.r = q__1.r, temp.i = q__1.i;
-/* L130: */
-			    }
-			} else {
-			    if (nounit) {
-				r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
-				q__1.r = temp.r * q__2.r - temp.i * q__2.i,
-					q__1.i = temp.r * q__2.i + temp.i *
-					q__2.r;
-				temp.r = q__1.r, temp.i = q__1.i;
-			    }
-			    i__3 = *m;
-			    for (k = i__ + 1; k <= i__3; ++k) {
-				r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-				i__4 = k + j * b_dim1;
-				q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4]
-					.i, q__2.i = q__3.r * b[i__4].i +
-					q__3.i * b[i__4].r;
-				q__1.r = temp.r + q__2.r, q__1.i = temp.i +
-					q__2.i;
-				temp.r = q__1.r, temp.i = q__1.i;
-/* L140: */
-			    }
+			temp = b[i__ + j * b_dim1];
+			if (nounit) {
+			    temp *= a[i__ + i__ * a_dim1];
 			}
-			i__3 = i__ + j * b_dim1;
-			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
-				q__1.i = alpha->r * temp.i + alpha->i *
-				temp.r;
-			b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L150: */
+			i__3 = *m;
+			for (k = i__ + 1; k <= i__3; ++k) {
+			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L120: */
+			}
+			b[i__ + j * b_dim1] = *alpha * temp;
+/* L130: */
 		    }
-/* L160: */
+/* L140: */
 		}
 	    }
 	}
@@ -4381,234 +3008,118 @@ L20:
 
 	    if (upper) {
 		for (j = *n; j >= 1; --j) {
-		    temp.r = alpha->r, temp.i = alpha->i;
+		    temp = *alpha;
 		    if (nounit) {
-			i__1 = j + j * a_dim1;
-			q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
-				q__1.i = temp.r * a[i__1].i + temp.i * a[i__1]
-				.r;
-			temp.r = q__1.r, temp.i = q__1.i;
+			temp *= a[j + j * a_dim1];
 		    }
 		    i__1 = *m;
 		    for (i__ = 1; i__ <= i__1; ++i__) {
-			i__2 = i__ + j * b_dim1;
-			i__3 = i__ + j * b_dim1;
-			q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
-				q__1.i = temp.r * b[i__3].i + temp.i * b[i__3]
-				.r;
-			b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L170: */
+			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L150: */
 		    }
 		    i__1 = j - 1;
 		    for (k = 1; k <= i__1; ++k) {
-			i__2 = k + j * a_dim1;
-			if ((a[i__2].r != 0.f) || (a[i__2].i != 0.f)) {
-			    i__2 = k + j * a_dim1;
-			    q__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2]
-				    .i, q__1.i = alpha->r * a[i__2].i +
-				    alpha->i * a[i__2].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
+			if (a[k + j * a_dim1] != 0.) {
+			    temp = *alpha * a[k + j * a_dim1];
 			    i__2 = *m;
 			    for (i__ = 1; i__ <= i__2; ++i__) {
-				i__3 = i__ + j * b_dim1;
-				i__4 = i__ + j * b_dim1;
-				i__5 = i__ + k * b_dim1;
-				q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-					.i, q__2.i = temp.r * b[i__5].i +
-					temp.i * b[i__5].r;
-				q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
-					.i + q__2.i;
-				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L180: */
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L160: */
 			    }
 			}
-/* L190: */
+/* L170: */
 		    }
-/* L200: */
+/* L180: */
 		}
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    temp.r = alpha->r, temp.i = alpha->i;
+		    temp = *alpha;
 		    if (nounit) {
-			i__2 = j + j * a_dim1;
-			q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
-				q__1.i = temp.r * a[i__2].i + temp.i * a[i__2]
-				.r;
-			temp.r = q__1.r, temp.i = q__1.i;
+			temp *= a[j + j * a_dim1];
 		    }
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * b_dim1;
-			i__4 = i__ + j * b_dim1;
-			q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
-				q__1.i = temp.r * b[i__4].i + temp.i * b[i__4]
-				.r;
-			b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L210: */
+			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L190: */
 		    }
 		    i__2 = *n;
 		    for (k = j + 1; k <= i__2; ++k) {
-			i__3 = k + j * a_dim1;
-			if ((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) {
-			    i__3 = k + j * a_dim1;
-			    q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3]
-				    .i, q__1.i = alpha->r * a[i__3].i +
-				    alpha->i * a[i__3].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
+			if (a[k + j * a_dim1] != 0.) {
+			    temp = *alpha * a[k + j * a_dim1];
 			    i__3 = *m;
 			    for (i__ = 1; i__ <= i__3; ++i__) {
-				i__4 = i__ + j * b_dim1;
-				i__5 = i__ + j * b_dim1;
-				i__6 = i__ + k * b_dim1;
-				q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-					.i, q__2.i = temp.r * b[i__6].i +
-					temp.i * b[i__6].r;
-				q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
-					.i + q__2.i;
-				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L220: */
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L200: */
 			    }
 			}
-/* L230: */
+/* L210: */
 		    }
-/* L240: */
+/* L220: */
 		}
 	    }
 	} else {
 
-/*           Form  B := alpha*B*A'   or   B := alpha*B*conjg( A' ). */
+/*           Form  B := alpha*B*A'. */
 
 	    if (upper) {
 		i__1 = *n;
 		for (k = 1; k <= i__1; ++k) {
 		    i__2 = k - 1;
 		    for (j = 1; j <= i__2; ++j) {
-			i__3 = j + k * a_dim1;
-			if ((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) {
-			    if (noconj) {
-				i__3 = j + k * a_dim1;
-				q__1.r = alpha->r * a[i__3].r - alpha->i * a[
-					i__3].i, q__1.i = alpha->r * a[i__3]
-					.i + alpha->i * a[i__3].r;
-				temp.r = q__1.r, temp.i = q__1.i;
-			    } else {
-				r_cnjg(&q__2, &a[j + k * a_dim1]);
-				q__1.r = alpha->r * q__2.r - alpha->i *
-					q__2.i, q__1.i = alpha->r * q__2.i +
-					alpha->i * q__2.r;
-				temp.r = q__1.r, temp.i = q__1.i;
-			    }
+			if (a[j + k * a_dim1] != 0.) {
+			    temp = *alpha * a[j + k * a_dim1];
 			    i__3 = *m;
 			    for (i__ = 1; i__ <= i__3; ++i__) {
-				i__4 = i__ + j * b_dim1;
-				i__5 = i__ + j * b_dim1;
-				i__6 = i__ + k * b_dim1;
-				q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-					.i, q__2.i = temp.r * b[i__6].i +
-					temp.i * b[i__6].r;
-				q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
-					.i + q__2.i;
-				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L250: */
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L230: */
 			    }
 			}
-/* L260: */
+/* L240: */
 		    }
-		    temp.r = alpha->r, temp.i = alpha->i;
+		    temp = *alpha;
 		    if (nounit) {
-			if (noconj) {
-			    i__2 = k + k * a_dim1;
-			    q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
-				    q__1.i = temp.r * a[i__2].i + temp.i * a[
-				    i__2].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			} else {
-			    r_cnjg(&q__2, &a[k + k * a_dim1]);
-			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
-				    q__1.i = temp.r * q__2.i + temp.i *
-				    q__2.r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
+			temp *= a[k + k * a_dim1];
 		    }
-		    if ((temp.r != 1.f) || (temp.i != 0.f)) {
+		    if (temp != 1.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + k * b_dim1;
-			    i__4 = i__ + k * b_dim1;
-			    q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
-				    q__1.i = temp.r * b[i__4].i + temp.i * b[
-				    i__4].r;
-			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L270: */
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L250: */
 			}
 		    }
-/* L280: */
+/* L260: */
 		}
 	    } else {
 		for (k = *n; k >= 1; --k) {
 		    i__1 = *n;
 		    for (j = k + 1; j <= i__1; ++j) {
-			i__2 = j + k * a_dim1;
-			if ((a[i__2].r != 0.f) || (a[i__2].i != 0.f)) {
-			    if (noconj) {
-				i__2 = j + k * a_dim1;
-				q__1.r = alpha->r * a[i__2].r - alpha->i * a[
-					i__2].i, q__1.i = alpha->r * a[i__2]
-					.i + alpha->i * a[i__2].r;
-				temp.r = q__1.r, temp.i = q__1.i;
-			    } else {
-				r_cnjg(&q__2, &a[j + k * a_dim1]);
-				q__1.r = alpha->r * q__2.r - alpha->i *
-					q__2.i, q__1.i = alpha->r * q__2.i +
-					alpha->i * q__2.r;
-				temp.r = q__1.r, temp.i = q__1.i;
-			    }
+			if (a[j + k * a_dim1] != 0.) {
+			    temp = *alpha * a[j + k * a_dim1];
 			    i__2 = *m;
 			    for (i__ = 1; i__ <= i__2; ++i__) {
-				i__3 = i__ + j * b_dim1;
-				i__4 = i__ + j * b_dim1;
-				i__5 = i__ + k * b_dim1;
-				q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-					.i, q__2.i = temp.r * b[i__5].i +
-					temp.i * b[i__5].r;
-				q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
-					.i + q__2.i;
-				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L290: */
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L270: */
 			    }
 			}
-/* L300: */
+/* L280: */
 		    }
-		    temp.r = alpha->r, temp.i = alpha->i;
+		    temp = *alpha;
 		    if (nounit) {
-			if (noconj) {
-			    i__1 = k + k * a_dim1;
-			    q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
-				    q__1.i = temp.r * a[i__1].i + temp.i * a[
-				    i__1].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			} else {
-			    r_cnjg(&q__2, &a[k + k * a_dim1]);
-			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
-				    q__1.i = temp.r * q__2.i + temp.i *
-				    q__2.r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
+			temp *= a[k + k * a_dim1];
 		    }
-		    if ((temp.r != 1.f) || (temp.i != 0.f)) {
+		    if (temp != 1.) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
-			    i__2 = i__ + k * b_dim1;
-			    i__3 = i__ + k * b_dim1;
-			    q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
-				    q__1.i = temp.r * b[i__3].i + temp.i * b[
-				    i__3].r;
-			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L310: */
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L290: */
 			}
 		    }
-/* L320: */
+/* L300: */
 		}
 	    }
 	}
@@ -4616,35 +3127,31 @@ L20:
 
     return 0;
 
-/*     End of CTRMM . */
+/*     End of DTRMM . */
 
-} /* ctrmm_ */
+} /* dtrmm_ */
 
-/* Subroutine */ int ctrmv_(char *uplo, char *trans, char *diag, integer *n,
-	complex *a, integer *lda, complex *x, integer *incx)
+/* Subroutine */ int dtrmv_(char *uplo, char *trans, char *diag, integer *n,
+	doublereal *a, integer *lda, doublereal *x, integer *incx)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+    integer a_dim1, a_offset, i__1, i__2;
 
     /* Local variables */
     static integer i__, j, ix, jx, kx, info;
-    static complex temp;
+    static doublereal temp;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical noconj, nounit;
+    static logical nounit;
 
 
 /*
     Purpose
     =======
 
-    CTRMV  performs one of the matrix-vector operations
+    DTRMV  performs one of the matrix-vector operations
 
-       x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+       x := A*x,   or   x := A'*x,
 
     where x is an n element vector and  A is an n by n unit, or non-unit,
     upper or lower triangular matrix.
@@ -4670,7 +3177,7 @@ L20:
 
                 TRANS = 'T' or 't'   x := A'*x.
 
-                TRANS = 'C' or 'c'   x := conjg( A' )*x.
+                TRANS = 'C' or 'c'   x := A'*x.
 
              Unchanged on exit.
 
@@ -4690,7 +3197,7 @@ L20:
              N must be at least zero.
              Unchanged on exit.
 
-    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
              Before entry with  UPLO = 'U' or 'u', the leading n by n
              upper triangular part of the array A must contain the upper
              triangular matrix and the strictly lower triangular part of
@@ -4709,7 +3216,7 @@ L20:
              max( 1, n ).
              Unchanged on exit.
 
-    X      - COMPLEX          array of dimension at least
+    X      - DOUBLE PRECISION array of dimension at least
              ( 1 + ( n - 1 )*abs( INCX ) ).
              Before entry, the incremented array X must contain the n
              element vector x. On exit, X is overwritten with the
@@ -4757,7 +3264,7 @@ L20:
 	info = 8;
     }
     if (info != 0) {
-	xerbla_("CTRMV ", &info);
+	xerbla_("DTRMV ", &info);
 	return 0;
     }
 
@@ -4767,7 +3274,6 @@ L20:
 	return 0;
     }
 
-    noconj = lsame_(trans, "T");
     nounit = lsame_(diag, "N");
 
 /*
@@ -4794,31 +3300,15 @@ L20:
 	    if (*incx == 1) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    if ((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) {
-			i__2 = j;
-			temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    if (x[j] != 0.) {
+			temp = x[j];
 			i__2 = j - 1;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__;
-			    i__4 = i__;
-			    i__5 = i__ + j * a_dim1;
-			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
-				    q__2.i = temp.r * a[i__5].i + temp.i * a[
-				    i__5].r;
-			    q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i +
-				    q__2.i;
-			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+			    x[i__] += temp * a[i__ + j * a_dim1];
 /* L10: */
 			}
 			if (nounit) {
-			    i__2 = j;
-			    i__3 = j;
-			    i__4 = j + j * a_dim1;
-			    q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
-				    i__4].i, q__1.i = x[i__3].r * a[i__4].i +
-				    x[i__3].i * a[i__4].r;
-			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+			    x[j] *= a[j + j * a_dim1];
 			}
 		    }
 /* L20: */
@@ -4827,33 +3317,17 @@ L20:
 		jx = kx;
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = jx;
-		    if ((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) {
-			i__2 = jx;
-			temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    if (x[jx] != 0.) {
+			temp = x[jx];
 			ix = kx;
 			i__2 = j - 1;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = ix;
-			    i__4 = ix;
-			    i__5 = i__ + j * a_dim1;
-			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
-				    q__2.i = temp.r * a[i__5].i + temp.i * a[
-				    i__5].r;
-			    q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i +
-				    q__2.i;
-			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+			    x[ix] += temp * a[i__ + j * a_dim1];
 			    ix += *incx;
 /* L30: */
 			}
 			if (nounit) {
-			    i__2 = jx;
-			    i__3 = jx;
-			    i__4 = j + j * a_dim1;
-			    q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
-				    i__4].i, q__1.i = x[i__3].r * a[i__4].i +
-				    x[i__3].i * a[i__4].r;
-			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+			    x[jx] *= a[j + j * a_dim1];
 			}
 		    }
 		    jx += *incx;
@@ -4863,31 +3337,15 @@ L20:
 	} else {
 	    if (*incx == 1) {
 		for (j = *n; j >= 1; --j) {
-		    i__1 = j;
-		    if ((x[i__1].r != 0.f) || (x[i__1].i != 0.f)) {
-			i__1 = j;
-			temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    if (x[j] != 0.) {
+			temp = x[j];
 			i__1 = j + 1;
 			for (i__ = *n; i__ >= i__1; --i__) {
-			    i__2 = i__;
-			    i__3 = i__;
-			    i__4 = i__ + j * a_dim1;
-			    q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i,
-				    q__2.i = temp.r * a[i__4].i + temp.i * a[
-				    i__4].r;
-			    q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i +
-				    q__2.i;
-			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+			    x[i__] += temp * a[i__ + j * a_dim1];
 /* L50: */
 			}
 			if (nounit) {
-			    i__1 = j;
-			    i__2 = j;
-			    i__3 = j + j * a_dim1;
-			    q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-				    i__3].i, q__1.i = x[i__2].r * a[i__3].i +
-				    x[i__2].i * a[i__3].r;
-			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+			    x[j] *= a[j + j * a_dim1];
 			}
 		    }
 /* L60: */
@@ -4896,33 +3354,17 @@ L20:
 		kx += (*n - 1) * *incx;
 		jx = kx;
 		for (j = *n; j >= 1; --j) {
-		    i__1 = jx;
-		    if ((x[i__1].r != 0.f) || (x[i__1].i != 0.f)) {
-			i__1 = jx;
-			temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    if (x[jx] != 0.) {
+			temp = x[jx];
 			ix = kx;
 			i__1 = j + 1;
 			for (i__ = *n; i__ >= i__1; --i__) {
-			    i__2 = ix;
-			    i__3 = ix;
-			    i__4 = i__ + j * a_dim1;
-			    q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i,
-				    q__2.i = temp.r * a[i__4].i + temp.i * a[
-				    i__4].r;
-			    q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i +
-				    q__2.i;
-			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+			    x[ix] += temp * a[i__ + j * a_dim1];
 			    ix -= *incx;
 /* L70: */
 			}
 			if (nounit) {
-			    i__1 = jx;
-			    i__2 = jx;
-			    i__3 = j + j * a_dim1;
-			    q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-				    i__3].i, q__1.i = x[i__2].r * a[i__3].i +
-				    x[i__2].i * a[i__3].r;
-			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+			    x[jx] *= a[j + j * a_dim1];
 			}
 		    }
 		    jx -= *incx;
@@ -4932,214 +3374,74 @@ L20:
 	}
     } else {
 
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
+/*        Form  x := A'*x. */
 
 	if (lsame_(uplo, "U")) {
 	    if (*incx == 1) {
 		for (j = *n; j >= 1; --j) {
-		    i__1 = j;
-		    temp.r = x[i__1].r, temp.i = x[i__1].i;
-		    if (noconj) {
-			if (nounit) {
-			    i__1 = j + j * a_dim1;
-			    q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
-				    q__1.i = temp.r * a[i__1].i + temp.i * a[
-				    i__1].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-			for (i__ = j - 1; i__ >= 1; --i__) {
-			    i__1 = i__ + j * a_dim1;
-			    i__2 = i__;
-			    q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-				    i__2].i, q__2.i = a[i__1].r * x[i__2].i +
-				    a[i__1].i * x[i__2].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
+		    temp = x[j];
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    for (i__ = j - 1; i__ >= 1; --i__) {
+			temp += a[i__ + j * a_dim1] * x[i__];
 /* L90: */
-			}
-		    } else {
-			if (nounit) {
-			    r_cnjg(&q__2, &a[j + j * a_dim1]);
-			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
-				    q__1.i = temp.r * q__2.i + temp.i *
-				    q__2.r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-			for (i__ = j - 1; i__ >= 1; --i__) {
-			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-			    i__1 = i__;
-			    q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i,
-				    q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-				    i__1].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-			}
 		    }
-		    i__1 = j;
-		    x[i__1].r = temp.r, x[i__1].i = temp.i;
-/* L110: */
+		    x[j] = temp;
+/* L100: */
 		}
 	    } else {
 		jx = kx + (*n - 1) * *incx;
 		for (j = *n; j >= 1; --j) {
-		    i__1 = jx;
-		    temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    temp = x[jx];
 		    ix = jx;
-		    if (noconj) {
-			if (nounit) {
-			    i__1 = j + j * a_dim1;
-			    q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
-				    q__1.i = temp.r * a[i__1].i + temp.i * a[
-				    i__1].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-			for (i__ = j - 1; i__ >= 1; --i__) {
-			    ix -= *incx;
-			    i__1 = i__ + j * a_dim1;
-			    i__2 = ix;
-			    q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-				    i__2].i, q__2.i = a[i__1].r * x[i__2].i +
-				    a[i__1].i * x[i__2].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L120: */
-			}
-		    } else {
-			if (nounit) {
-			    r_cnjg(&q__2, &a[j + j * a_dim1]);
-			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
-				    q__1.i = temp.r * q__2.i + temp.i *
-				    q__2.r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-			for (i__ = j - 1; i__ >= 1; --i__) {
-			    ix -= *incx;
-			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-			    i__1 = ix;
-			    q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i,
-				    q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-				    i__1].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L130: */
-			}
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
 		    }
-		    i__1 = jx;
-		    x[i__1].r = temp.r, x[i__1].i = temp.i;
+		    for (i__ = j - 1; i__ >= 1; --i__) {
+			ix -= *incx;
+			temp += a[i__ + j * a_dim1] * x[ix];
+/* L110: */
+		    }
+		    x[jx] = temp;
 		    jx -= *incx;
-/* L140: */
+/* L120: */
 		}
 	    }
 	} else {
 	    if (*incx == 1) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    temp.r = x[i__2].r, temp.i = x[i__2].i;
-		    if (noconj) {
-			if (nounit) {
-			    i__2 = j + j * a_dim1;
-			    q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
-				    q__1.i = temp.r * a[i__2].i + temp.i * a[
-				    i__2].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-			i__2 = *n;
-			for (i__ = j + 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + j * a_dim1;
-			    i__4 = i__;
-			    q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-				    i__4].i, q__2.i = a[i__3].r * x[i__4].i +
-				    a[i__3].i * x[i__4].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-			}
-		    } else {
-			if (nounit) {
-			    r_cnjg(&q__2, &a[j + j * a_dim1]);
-			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
-				    q__1.i = temp.r * q__2.i + temp.i *
-				    q__2.r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-			i__2 = *n;
-			for (i__ = j + 1; i__ <= i__2; ++i__) {
-			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-			    i__3 = i__;
-			    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
-				    q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-				    i__3].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-			}
+		    temp = x[j];
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
 		    }
-		    i__2 = j;
-		    x[i__2].r = temp.r, x[i__2].i = temp.i;
-/* L170: */
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			temp += a[i__ + j * a_dim1] * x[i__];
+/* L130: */
+		    }
+		    x[j] = temp;
+/* L140: */
 		}
 	    } else {
 		jx = kx;
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = jx;
-		    temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    temp = x[jx];
 		    ix = jx;
-		    if (noconj) {
-			if (nounit) {
-			    i__2 = j + j * a_dim1;
-			    q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
-				    q__1.i = temp.r * a[i__2].i + temp.i * a[
-				    i__2].r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-			i__2 = *n;
-			for (i__ = j + 1; i__ <= i__2; ++i__) {
-			    ix += *incx;
-			    i__3 = i__ + j * a_dim1;
-			    i__4 = ix;
-			    q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-				    i__4].i, q__2.i = a[i__3].r * x[i__4].i +
-				    a[i__3].i * x[i__4].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L180: */
-			}
-		    } else {
-			if (nounit) {
-			    r_cnjg(&q__2, &a[j + j * a_dim1]);
-			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
-				    q__1.i = temp.r * q__2.i + temp.i *
-				    q__2.r;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-			i__2 = *n;
-			for (i__ = j + 1; i__ <= i__2; ++i__) {
-			    ix += *incx;
-			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-			    i__3 = ix;
-			    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
-				    q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-				    i__3].r;
-			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L190: */
-			}
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
 		    }
-		    i__2 = jx;
-		    x[i__2].r = temp.r, x[i__2].i = temp.i;
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			ix += *incx;
+			temp += a[i__ + j * a_dim1] * x[ix];
+/* L150: */
+		    }
+		    x[jx] = temp;
 		    jx += *incx;
-/* L200: */
+/* L160: */
 		}
 	    }
 	}
@@ -5147,45 +3449,40 @@ L20:
 
     return 0;
 
-/*     End of CTRMV . */
+/*     End of DTRMV . */
 
-} /* ctrmv_ */
+} /* dtrmv_ */
 
-/* Subroutine */ int ctrsm_(char *side, char *uplo, char *transa, char *diag,
-	integer *m, integer *n, complex *alpha, complex *a, integer *lda,
-	complex *b, integer *ldb)
+/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
+	lda, doublereal *b, integer *ldb)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5,
-	    i__6, i__7;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__, j, k, info;
-    static complex temp;
-    extern logical lsame_(char *, char *);
+    static doublereal temp;
     static logical lside;
+    extern logical lsame_(char *, char *);
     static integer nrowa;
     static logical upper;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical noconj, nounit;
+    static logical nounit;
 
 
 /*
     Purpose
     =======
 
-    CTRSM  solves one of the matrix equations
+    DTRSM  solves one of the matrix equations
 
        op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
 
     where alpha is a scalar, X and B are m by n matrices, A is a unit, or
     non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
 
-       op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+       op( A ) = A   or   op( A ) = A'.
 
     The matrix X is overwritten on B.
 
@@ -5220,7 +3517,7 @@ L20:
 
                 TRANSA = 'T' or 't'   op( A ) = A'.
 
-                TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+                TRANSA = 'C' or 'c'   op( A ) = A'.
 
              Unchanged on exit.
 
@@ -5245,13 +3542,13 @@ L20:
              at least zero.
              Unchanged on exit.
 
-    ALPHA  - COMPLEX         .
+    ALPHA  - DOUBLE PRECISION.
              On entry,  ALPHA specifies the scalar  alpha. When  alpha is
              zero then  A is not referenced and  B need not be set before
              entry.
              Unchanged on exit.
 
-    A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
              when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
              Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
              upper triangular part of the array  A must contain the upper
@@ -5272,7 +3569,7 @@ L20:
              then LDA must be at least max( 1, n ).
              Unchanged on exit.
 
-    B      - COMPLEX          array of DIMENSION ( LDB, n ).
+    B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
              Before entry,  the leading  m by n part of the array  B must
              contain  the  right-hand  side  matrix  B,  and  on exit  is
              overwritten by the solution matrix  X.
@@ -5286,6 +3583,7 @@ L20:
 
     Level 3 Blas routine.
 
+
     -- Written on 8-February-1989.
        Jack Dongarra, Argonne National Laboratory.
        Iain Duff, AERE Harwell.
@@ -5311,7 +3609,6 @@ L20:
     } else {
 	nrowa = *n;
     }
-    noconj = lsame_(transa, "T");
     nounit = lsame_(diag, "N");
     upper = lsame_(uplo, "U");
 
@@ -5336,7 +3633,7 @@ L20:
 	info = 11;
     }
     if (info != 0) {
-	xerbla_("CTRSM ", &info);
+	xerbla_("DTRSM ", &info);
 	return 0;
     }
 
@@ -5348,13 +3645,12 @@ L20:
 
 /*     And when  alpha.eq.zero. */
 
-    if (alpha->r == 0.f && alpha->i == 0.f) {
+    if (*alpha == 0.) {
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = *m;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * b_dim1;
-		b[i__3].r = 0.f, b[i__3].i = 0.f;
+		b[i__ + j * b_dim1] = 0.;
 /* L10: */
 	    }
 /* L20: */
@@ -5372,39 +3668,23 @@ L20:
 	    if (upper) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    if ((alpha->r != 1.f) || (alpha->i != 0.f)) {
+		    if (*alpha != 1.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + j * b_dim1;
-			    i__4 = i__ + j * b_dim1;
-			    q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-				    .i, q__1.i = alpha->r * b[i__4].i +
-				    alpha->i * b[i__4].r;
-			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
 /* L30: */
 			}
 		    }
 		    for (k = *m; k >= 1; --k) {
-			i__2 = k + j * b_dim1;
-			if ((b[i__2].r != 0.f) || (b[i__2].i != 0.f)) {
+			if (b[k + j * b_dim1] != 0.) {
 			    if (nounit) {
-				i__2 = k + j * b_dim1;
-				c_div(&q__1, &b[k + j * b_dim1], &a[k + k *
-					a_dim1]);
-				b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+				b[k + j * b_dim1] /= a[k + k * a_dim1];
 			    }
 			    i__2 = k - 1;
 			    for (i__ = 1; i__ <= i__2; ++i__) {
-				i__3 = i__ + j * b_dim1;
-				i__4 = i__ + j * b_dim1;
-				i__5 = k + j * b_dim1;
-				i__6 = i__ + k * a_dim1;
-				q__2.r = b[i__5].r * a[i__6].r - b[i__5].i *
-					a[i__6].i, q__2.i = b[i__5].r * a[
-					i__6].i + b[i__5].i * a[i__6].r;
-				q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
-					.i - q__2.i;
-				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
+					i__ + k * a_dim1];
 /* L40: */
 			    }
 			}
@@ -5415,40 +3695,24 @@ L20:
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    if ((alpha->r != 1.f) || (alpha->i != 0.f)) {
+		    if (*alpha != 1.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + j * b_dim1;
-			    i__4 = i__ + j * b_dim1;
-			    q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-				    .i, q__1.i = alpha->r * b[i__4].i +
-				    alpha->i * b[i__4].r;
-			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
 /* L70: */
 			}
 		    }
 		    i__2 = *m;
 		    for (k = 1; k <= i__2; ++k) {
-			i__3 = k + j * b_dim1;
-			if ((b[i__3].r != 0.f) || (b[i__3].i != 0.f)) {
+			if (b[k + j * b_dim1] != 0.) {
 			    if (nounit) {
-				i__3 = k + j * b_dim1;
-				c_div(&q__1, &b[k + j * b_dim1], &a[k + k *
-					a_dim1]);
-				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+				b[k + j * b_dim1] /= a[k + k * a_dim1];
 			    }
 			    i__3 = *m;
 			    for (i__ = k + 1; i__ <= i__3; ++i__) {
-				i__4 = i__ + j * b_dim1;
-				i__5 = i__ + j * b_dim1;
-				i__6 = k + j * b_dim1;
-				i__7 = i__ + k * a_dim1;
-				q__2.r = b[i__6].r * a[i__7].r - b[i__6].i *
-					a[i__7].i, q__2.i = b[i__6].r * a[
-					i__7].i + b[i__6].i * a[i__7].r;
-				q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
-					.i - q__2.i;
-				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
+					i__ + k * a_dim1];
 /* L80: */
 			    }
 			}
@@ -5459,113 +3723,44 @@ L20:
 	    }
 	} else {
 
-/*
-             Form  B := alpha*inv( A' )*B
-             or    B := alpha*inv( conjg( A' ) )*B.
-*/
+/*           Form  B := alpha*inv( A' )*B. */
 
 	    if (upper) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * b_dim1;
-			q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
-				q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-				i__3].r;
-			temp.r = q__1.r, temp.i = q__1.i;
-			if (noconj) {
-			    i__3 = i__ - 1;
-			    for (k = 1; k <= i__3; ++k) {
-				i__4 = k + i__ * a_dim1;
-				i__5 = k + j * b_dim1;
-				q__2.r = a[i__4].r * b[i__5].r - a[i__4].i *
-					b[i__5].i, q__2.i = a[i__4].r * b[
-					i__5].i + a[i__4].i * b[i__5].r;
-				q__1.r = temp.r - q__2.r, q__1.i = temp.i -
-					q__2.i;
-				temp.r = q__1.r, temp.i = q__1.i;
+			temp = *alpha * b[i__ + j * b_dim1];
+			i__3 = i__ - 1;
+			for (k = 1; k <= i__3; ++k) {
+			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
 /* L110: */
-			    }
-			    if (nounit) {
-				c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]);
-				temp.r = q__1.r, temp.i = q__1.i;
-			    }
-			} else {
-			    i__3 = i__ - 1;
-			    for (k = 1; k <= i__3; ++k) {
-				r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-				i__4 = k + j * b_dim1;
-				q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4]
-					.i, q__2.i = q__3.r * b[i__4].i +
-					q__3.i * b[i__4].r;
-				q__1.r = temp.r - q__2.r, q__1.i = temp.i -
-					q__2.i;
-				temp.r = q__1.r, temp.i = q__1.i;
-/* L120: */
-			    }
-			    if (nounit) {
-				r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
-				c_div(&q__1, &temp, &q__2);
-				temp.r = q__1.r, temp.i = q__1.i;
-			    }
 			}
-			i__3 = i__ + j * b_dim1;
-			b[i__3].r = temp.r, b[i__3].i = temp.i;
-/* L130: */
-		    }
-/* L140: */
-		}
-	    } else {
+			if (nounit) {
+			    temp /= a[i__ + i__ * a_dim1];
+			}
+			b[i__ + j * b_dim1] = temp;
+/* L120: */
+		    }
+/* L130: */
+		}
+	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    for (i__ = *m; i__ >= 1; --i__) {
-			i__2 = i__ + j * b_dim1;
-			q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i,
-				q__1.i = alpha->r * b[i__2].i + alpha->i * b[
-				i__2].r;
-			temp.r = q__1.r, temp.i = q__1.i;
-			if (noconj) {
-			    i__2 = *m;
-			    for (k = i__ + 1; k <= i__2; ++k) {
-				i__3 = k + i__ * a_dim1;
-				i__4 = k + j * b_dim1;
-				q__2.r = a[i__3].r * b[i__4].r - a[i__3].i *
-					b[i__4].i, q__2.i = a[i__3].r * b[
-					i__4].i + a[i__3].i * b[i__4].r;
-				q__1.r = temp.r - q__2.r, q__1.i = temp.i -
-					q__2.i;
-				temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-			    }
-			    if (nounit) {
-				c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]);
-				temp.r = q__1.r, temp.i = q__1.i;
-			    }
-			} else {
-			    i__2 = *m;
-			    for (k = i__ + 1; k <= i__2; ++k) {
-				r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-				i__3 = k + j * b_dim1;
-				q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3]
-					.i, q__2.i = q__3.r * b[i__3].i +
-					q__3.i * b[i__3].r;
-				q__1.r = temp.r - q__2.r, q__1.i = temp.i -
-					q__2.i;
-				temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-			    }
-			    if (nounit) {
-				r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
-				c_div(&q__1, &temp, &q__2);
-				temp.r = q__1.r, temp.i = q__1.i;
-			    }
+			temp = *alpha * b[i__ + j * b_dim1];
+			i__2 = *m;
+			for (k = i__ + 1; k <= i__2; ++k) {
+			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L140: */
 			}
-			i__2 = i__ + j * b_dim1;
-			b[i__2].r = temp.r, b[i__2].i = temp.i;
-/* L170: */
+			if (nounit) {
+			    temp /= a[i__ + i__ * a_dim1];
+			}
+			b[i__ + j * b_dim1] = temp;
+/* L150: */
 		    }
-/* L180: */
+/* L160: */
 		}
 	    }
 	}
@@ -5577,240 +3772,139 @@ L20:
 	    if (upper) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    if ((alpha->r != 1.f) || (alpha->i != 0.f)) {
+		    if (*alpha != 1.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + j * b_dim1;
-			    i__4 = i__ + j * b_dim1;
-			    q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-				    .i, q__1.i = alpha->r * b[i__4].i +
-				    alpha->i * b[i__4].r;
-			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L190: */
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L170: */
 			}
 		    }
 		    i__2 = j - 1;
 		    for (k = 1; k <= i__2; ++k) {
-			i__3 = k + j * a_dim1;
-			if ((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) {
+			if (a[k + j * a_dim1] != 0.) {
 			    i__3 = *m;
 			    for (i__ = 1; i__ <= i__3; ++i__) {
-				i__4 = i__ + j * b_dim1;
-				i__5 = i__ + j * b_dim1;
-				i__6 = k + j * a_dim1;
-				i__7 = i__ + k * b_dim1;
-				q__2.r = a[i__6].r * b[i__7].r - a[i__6].i *
-					b[i__7].i, q__2.i = a[i__6].r * b[
-					i__7].i + a[i__6].i * b[i__7].r;
-				q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
-					.i - q__2.i;
-				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L200: */
+				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
+					i__ + k * b_dim1];
+/* L180: */
 			    }
 			}
-/* L210: */
+/* L190: */
 		    }
 		    if (nounit) {
-			c_div(&q__1, &c_b21, &a[j + j * a_dim1]);
-			temp.r = q__1.r, temp.i = q__1.i;
+			temp = 1. / a[j + j * a_dim1];
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + j * b_dim1;
-			    i__4 = i__ + j * b_dim1;
-			    q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
-				    q__1.i = temp.r * b[i__4].i + temp.i * b[
-				    i__4].r;
-			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L220: */
+			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L200: */
 			}
 		    }
-/* L230: */
+/* L210: */
 		}
 	    } else {
 		for (j = *n; j >= 1; --j) {
-		    if ((alpha->r != 1.f) || (alpha->i != 0.f)) {
+		    if (*alpha != 1.) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
-			    i__2 = i__ + j * b_dim1;
-			    i__3 = i__ + j * b_dim1;
-			    q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-				    .i, q__1.i = alpha->r * b[i__3].i +
-				    alpha->i * b[i__3].r;
-			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L240: */
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L220: */
 			}
 		    }
 		    i__1 = *n;
 		    for (k = j + 1; k <= i__1; ++k) {
-			i__2 = k + j * a_dim1;
-			if ((a[i__2].r != 0.f) || (a[i__2].i != 0.f)) {
+			if (a[k + j * a_dim1] != 0.) {
 			    i__2 = *m;
 			    for (i__ = 1; i__ <= i__2; ++i__) {
-				i__3 = i__ + j * b_dim1;
-				i__4 = i__ + j * b_dim1;
-				i__5 = k + j * a_dim1;
-				i__6 = i__ + k * b_dim1;
-				q__2.r = a[i__5].r * b[i__6].r - a[i__5].i *
-					b[i__6].i, q__2.i = a[i__5].r * b[
-					i__6].i + a[i__5].i * b[i__6].r;
-				q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
-					.i - q__2.i;
-				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L250: */
+				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
+					i__ + k * b_dim1];
+/* L230: */
 			    }
 			}
-/* L260: */
+/* L240: */
 		    }
 		    if (nounit) {
-			c_div(&q__1, &c_b21, &a[j + j * a_dim1]);
-			temp.r = q__1.r, temp.i = q__1.i;
+			temp = 1. / a[j + j * a_dim1];
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
-			    i__2 = i__ + j * b_dim1;
-			    i__3 = i__ + j * b_dim1;
-			    q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
-				    q__1.i = temp.r * b[i__3].i + temp.i * b[
-				    i__3].r;
-			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L270: */
+			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L250: */
 			}
 		    }
-/* L280: */
+/* L260: */
 		}
 	    }
 	} else {
 
-/*
-             Form  B := alpha*B*inv( A' )
-             or    B := alpha*B*inv( conjg( A' ) ).
-*/
+/*           Form  B := alpha*B*inv( A' ). */
 
 	    if (upper) {
 		for (k = *n; k >= 1; --k) {
 		    if (nounit) {
-			if (noconj) {
-			    c_div(&q__1, &c_b21, &a[k + k * a_dim1]);
-			    temp.r = q__1.r, temp.i = q__1.i;
-			} else {
-			    r_cnjg(&q__2, &a[k + k * a_dim1]);
-			    c_div(&q__1, &c_b21, &q__2);
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
+			temp = 1. / a[k + k * a_dim1];
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
-			    i__2 = i__ + k * b_dim1;
-			    i__3 = i__ + k * b_dim1;
-			    q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
-				    q__1.i = temp.r * b[i__3].i + temp.i * b[
-				    i__3].r;
-			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L290: */
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L270: */
 			}
 		    }
 		    i__1 = k - 1;
 		    for (j = 1; j <= i__1; ++j) {
-			i__2 = j + k * a_dim1;
-			if ((a[i__2].r != 0.f) || (a[i__2].i != 0.f)) {
-			    if (noconj) {
-				i__2 = j + k * a_dim1;
-				temp.r = a[i__2].r, temp.i = a[i__2].i;
-			    } else {
-				r_cnjg(&q__1, &a[j + k * a_dim1]);
-				temp.r = q__1.r, temp.i = q__1.i;
-			    }
+			if (a[j + k * a_dim1] != 0.) {
+			    temp = a[j + k * a_dim1];
 			    i__2 = *m;
 			    for (i__ = 1; i__ <= i__2; ++i__) {
-				i__3 = i__ + j * b_dim1;
-				i__4 = i__ + j * b_dim1;
-				i__5 = i__ + k * b_dim1;
-				q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-					.i, q__2.i = temp.r * b[i__5].i +
-					temp.i * b[i__5].r;
-				q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
-					.i - q__2.i;
-				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L300: */
+				b[i__ + j * b_dim1] -= temp * b[i__ + k *
+					b_dim1];
+/* L280: */
 			    }
 			}
-/* L310: */
+/* L290: */
 		    }
-		    if ((alpha->r != 1.f) || (alpha->i != 0.f)) {
+		    if (*alpha != 1.) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
-			    i__2 = i__ + k * b_dim1;
-			    i__3 = i__ + k * b_dim1;
-			    q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-				    .i, q__1.i = alpha->r * b[i__3].i +
-				    alpha->i * b[i__3].r;
-			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L320: */
+			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
+				    ;
+/* L300: */
 			}
 		    }
-/* L330: */
+/* L310: */
 		}
 	    } else {
 		i__1 = *n;
 		for (k = 1; k <= i__1; ++k) {
 		    if (nounit) {
-			if (noconj) {
-			    c_div(&q__1, &c_b21, &a[k + k * a_dim1]);
-			    temp.r = q__1.r, temp.i = q__1.i;
-			} else {
-			    r_cnjg(&q__2, &a[k + k * a_dim1]);
-			    c_div(&q__1, &c_b21, &q__2);
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
+			temp = 1. / a[k + k * a_dim1];
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + k * b_dim1;
-			    i__4 = i__ + k * b_dim1;
-			    q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
-				    q__1.i = temp.r * b[i__4].i + temp.i * b[
-				    i__4].r;
-			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L340: */
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L320: */
 			}
 		    }
 		    i__2 = *n;
 		    for (j = k + 1; j <= i__2; ++j) {
-			i__3 = j + k * a_dim1;
-			if ((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) {
-			    if (noconj) {
-				i__3 = j + k * a_dim1;
-				temp.r = a[i__3].r, temp.i = a[i__3].i;
-			    } else {
-				r_cnjg(&q__1, &a[j + k * a_dim1]);
-				temp.r = q__1.r, temp.i = q__1.i;
-			    }
+			if (a[j + k * a_dim1] != 0.) {
+			    temp = a[j + k * a_dim1];
 			    i__3 = *m;
 			    for (i__ = 1; i__ <= i__3; ++i__) {
-				i__4 = i__ + j * b_dim1;
-				i__5 = i__ + j * b_dim1;
-				i__6 = i__ + k * b_dim1;
-				q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-					.i, q__2.i = temp.r * b[i__6].i +
-					temp.i * b[i__6].r;
-				q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
-					.i - q__2.i;
-				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L350: */
+				b[i__ + j * b_dim1] -= temp * b[i__ + k *
+					b_dim1];
+/* L330: */
 			    }
 			}
-/* L360: */
+/* L340: */
 		    }
-		    if ((alpha->r != 1.f) || (alpha->i != 0.f)) {
+		    if (*alpha != 1.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + k * b_dim1;
-			    i__4 = i__ + k * b_dim1;
-			    q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-				    .i, q__1.i = alpha->r * b[i__4].i +
-				    alpha->i * b[i__4].r;
-			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L370: */
+			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
+				    ;
+/* L350: */
 			}
 		    }
-/* L380: */
+/* L360: */
 		}
 	    }
 	}
@@ -5818,8969 +3912,401 @@ L20:
 
     return 0;
 
-/*     End of CTRSM . */
+/*     End of DTRSM . */
 
-} /* ctrsm_ */
+} /* dtrsm_ */
 
-/* Subroutine */ int ctrsv_(char *uplo, char *trans, char *diag, integer *n,
-	complex *a, integer *lda, complex *x, integer *incx)
+doublereal dzasum_(integer *n, doublecomplex *zx, integer *incx)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
+    integer i__1;
+    doublereal ret_val;
 
     /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical noconj, nounit;
+    static integer i__, ix;
+    static doublereal stemp;
+    extern doublereal dcabs1_(doublecomplex *);
 
 
 /*
-    Purpose
-    =======
-
-    CTRSV  solves one of the systems of equations
+       takes the sum of the absolute values.
+       jack dongarra, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
 
-       A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
 
-    where b and x are n element vectors and A is an n by n unit, or
-    non-unit, upper or lower triangular matrix.
+    /* Parameter adjustments */
+    --zx;
 
-    No test for singularity or near-singularity is included in this
-    routine. Such tests must be performed before calling this routine.
+    /* Function Body */
+    ret_val = 0.;
+    stemp = 0.;
+    if (*n <= 0 || *incx <= 0) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
 
-    Parameters
-    ==========
+/*        code for increment not equal to 1 */
 
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the matrix is an upper or
-             lower triangular matrix as follows:
+    ix = 1;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp += dcabs1_(&zx[ix]);
+	ix += *incx;
+/* L10: */
+    }
+    ret_val = stemp;
+    return ret_val;
 
-                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+/*        code for increment equal to 1 */
 
-                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp += dcabs1_(&zx[i__]);
+/* L30: */
+    }
+    ret_val = stemp;
+    return ret_val;
+} /* dzasum_ */
 
-             Unchanged on exit.
+doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    doublereal ret_val, d__1;
 
-    TRANS  - CHARACTER*1.
-             On entry, TRANS specifies the equations to be solved as
-             follows:
-
-                TRANS = 'N' or 'n'   A*x = b.
+    /* Builtin functions */
+    double d_imag(doublecomplex *), sqrt(doublereal);
 
-                TRANS = 'T' or 't'   A'*x = b.
+    /* Local variables */
+    static integer ix;
+    static doublereal ssq, temp, norm, scale;
 
-                TRANS = 'C' or 'c'   conjg( A' )*x = b.
 
-             Unchanged on exit.
+/*
+    DZNRM2 returns the euclidean norm of a vector via the function
+    name, so that
 
-    DIAG   - CHARACTER*1.
-             On entry, DIAG specifies whether or not A is unit
-             triangular as follows:
+       DZNRM2 := sqrt( conjg( x' )*x )
 
-                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
 
-                DIAG = 'N' or 'n'   A is not assumed to be unit
-                                    triangular.
+    -- This version written on 25-October-1982.
+       Modified on 14-October-1993 to inline the call to ZLASSQ.
+       Sven Hammarling, Nag Ltd.
+*/
 
-             Unchanged on exit.
 
-    N      - INTEGER.
-             On entry, N specifies the order of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
+    /* Parameter adjustments */
+    --x;
 
-    A      - COMPLEX          array of DIMENSION ( LDA, n ).
-             Before entry with  UPLO = 'U' or 'u', the leading n by n
-             upper triangular part of the array A must contain the upper
-             triangular matrix and the strictly lower triangular part of
-             A is not referenced.
-             Before entry with UPLO = 'L' or 'l', the leading n by n
-             lower triangular part of the array A must contain the lower
-             triangular matrix and the strictly upper triangular part of
-             A is not referenced.
-             Note that when  DIAG = 'U' or 'u', the diagonal elements of
-             A are not referenced either, but are assumed to be unity.
-             Unchanged on exit.
+    /* Function Body */
+    if (*n < 1 || *incx < 1) {
+	norm = 0.;
+    } else {
+	scale = 0.;
+	ssq = 1.;
+/*
+          The following loop is equivalent to this call to the LAPACK
+          auxiliary routine:
+          CALL ZLASSQ( N, X, INCX, SCALE, SSQ )
+*/
 
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, n ).
-             Unchanged on exit.
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    i__3 = ix;
+	    if (x[i__3].r != 0.) {
+		i__3 = ix;
+		temp = (d__1 = x[i__3].r, abs(d__1));
+		if (scale < temp) {
+/* Computing 2nd power */
+		    d__1 = scale / temp;
+		    ssq = ssq * (d__1 * d__1) + 1.;
+		    scale = temp;
+		} else {
+/* Computing 2nd power */
+		    d__1 = temp / scale;
+		    ssq += d__1 * d__1;
+		}
+	    }
+	    if (d_imag(&x[ix]) != 0.) {
+		temp = (d__1 = d_imag(&x[ix]), abs(d__1));
+		if (scale < temp) {
+/* Computing 2nd power */
+		    d__1 = scale / temp;
+		    ssq = ssq * (d__1 * d__1) + 1.;
+		    scale = temp;
+		} else {
+/* Computing 2nd power */
+		    d__1 = temp / scale;
+		    ssq += d__1 * d__1;
+		}
+	    }
+/* L10: */
+	}
+	norm = scale * sqrt(ssq);
+    }
 
-    X      - COMPLEX          array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCX ) ).
-             Before entry, the incremented array X must contain the n
-             element right-hand side vector b. On exit, X is overwritten
-             with the solution vector x.
+    ret_val = norm;
+    return ret_val;
 
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
+/*     End of DZNRM2. */
 
+} /* dznrm2_ */
 
-    Level 2 Blas routine.
+integer idamax_(integer *n, doublereal *dx, integer *incx)
+{
+    /* System generated locals */
+    integer ret_val, i__1;
+    doublereal d__1;
 
-    -- Written on 22-October-1986.
-       Jack Dongarra, Argonne National Lab.
-       Jeremy Du Croz, Nag Central Office.
-       Sven Hammarling, Nag Central Office.
-       Richard Hanson, Sandia National Labs.
+    /* Local variables */
+    static integer i__, ix;
+    static doublereal dmax__;
 
 
-       Test the input parameters.
+/*
+       finds the index of element having max. absolute value.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
 */
 
+
     /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
+    --dx;
 
     /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
-	info = 1;
-    } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "T") && ! lsame_(trans, "C")) {
-	info = 2;
-    } else if (! lsame_(diag, "U") && ! lsame_(diag,
-	    "N")) {
-	info = 3;
-    } else if (*n < 0) {
-	info = 4;
-    } else if (*lda < max(1,*n)) {
-	info = 6;
-    } else if (*incx == 0) {
-	info = 8;
+    ret_val = 0;
+    if (*n < 1 || *incx <= 0) {
+	return ret_val;
     }
-    if (info != 0) {
-	xerbla_("CTRSV ", &info);
-	return 0;
+    ret_val = 1;
+    if (*n == 1) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
     }
 
-/*     Quick return if possible. */
+/*        code for increment not equal to 1 */
 
-    if (*n == 0) {
-	return 0;
+    ix = 1;
+    dmax__ = abs(dx[1]);
+    ix += *incx;
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if ((d__1 = dx[ix], abs(d__1)) <= dmax__) {
+	    goto L5;
+	}
+	ret_val = i__;
+	dmax__ = (d__1 = dx[ix], abs(d__1));
+L5:
+	ix += *incx;
+/* L10: */
     }
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    dmax__ = abs(dx[1]);
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if ((d__1 = dx[i__], abs(d__1)) <= dmax__) {
+	    goto L30;
+	}
+	ret_val = i__;
+	dmax__ = (d__1 = dx[i__], abs(d__1));
+L30:
+	;
+    }
+    return ret_val;
+} /* idamax_ */
+
+integer izamax_(integer *n, doublecomplex *zx, integer *incx)
+{
+    /* System generated locals */
+    integer ret_val, i__1;
+
+    /* Local variables */
+    static integer i__, ix;
+    static doublereal smax;
+    extern doublereal dcabs1_(doublecomplex *);
 
-    noconj = lsame_(trans, "T");
-    nounit = lsame_(diag, "N");
 
 /*
-       Set up the start point in X if the increment is not unity. This
-       will be  ( N - 1 )*INCX  too small for descending loops.
+       finds the index of element having max. absolute value.
+       jack dongarra, 1/15/85.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
 */
 
-    if (*incx <= 0) {
-	kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-	kx = 1;
+
+    /* Parameter adjustments */
+    --zx;
+
+    /* Function Body */
+    ret_val = 0;
+    if (*n < 1 || *incx <= 0) {
+	return ret_val;
+    }
+    ret_val = 1;
+    if (*n == 1) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
     }
 
-/*
-       Start the operations. In this version the elements of A are
-       accessed sequentially with one pass through A.
-*/
+/*        code for increment not equal to 1 */
 
-    if (lsame_(trans, "N")) {
+    ix = 1;
+    smax = dcabs1_(&zx[1]);
+    ix += *incx;
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if (dcabs1_(&zx[ix]) <= smax) {
+	    goto L5;
+	}
+	ret_val = i__;
+	smax = dcabs1_(&zx[ix]);
+L5:
+	ix += *incx;
+/* L10: */
+    }
+    return ret_val;
 
-/*        Form  x := inv( A )*x. */
+/*        code for increment equal to 1 */
 
-	if (lsame_(uplo, "U")) {
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    i__1 = j;
-		    if ((x[i__1].r != 0.f) || (x[i__1].i != 0.f)) {
-			if (nounit) {
-			    i__1 = j;
-			    c_div(&q__1, &x[j], &a[j + j * a_dim1]);
-			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-			}
-			i__1 = j;
-			temp.r = x[i__1].r, temp.i = x[i__1].i;
-			for (i__ = j - 1; i__ >= 1; --i__) {
-			    i__1 = i__;
-			    i__2 = i__;
-			    i__3 = i__ + j * a_dim1;
-			    q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i,
-				    q__2.i = temp.r * a[i__3].i + temp.i * a[
-				    i__3].r;
-			    q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i -
-				    q__2.i;
-			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-/* L10: */
-			}
-		    }
-/* L20: */
-		}
-	    } else {
-		jx = kx + (*n - 1) * *incx;
-		for (j = *n; j >= 1; --j) {
-		    i__1 = jx;
-		    if ((x[i__1].r != 0.f) || (x[i__1].i != 0.f)) {
-			if (nounit) {
-			    i__1 = jx;
-			    c_div(&q__1, &x[jx], &a[j + j * a_dim1]);
-			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-			}
-			i__1 = jx;
-			temp.r = x[i__1].r, temp.i = x[i__1].i;
-			ix = jx;
-			for (i__ = j - 1; i__ >= 1; --i__) {
-			    ix -= *incx;
-			    i__1 = ix;
-			    i__2 = ix;
-			    i__3 = i__ + j * a_dim1;
-			    q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i,
-				    q__2.i = temp.r * a[i__3].i + temp.i * a[
-				    i__3].r;
-			    q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i -
-				    q__2.i;
-			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-/* L30: */
-			}
-		    }
-		    jx -= *incx;
-/* L40: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    if ((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) {
-			if (nounit) {
-			    i__2 = j;
-			    c_div(&q__1, &x[j], &a[j + j * a_dim1]);
-			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-			}
-			i__2 = j;
-			temp.r = x[i__2].r, temp.i = x[i__2].i;
-			i__2 = *n;
-			for (i__ = j + 1; i__ <= i__2; ++i__) {
-			    i__3 = i__;
-			    i__4 = i__;
-			    i__5 = i__ + j * a_dim1;
-			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
-				    q__2.i = temp.r * a[i__5].i + temp.i * a[
-				    i__5].r;
-			    q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i -
-				    q__2.i;
-			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-/* L50: */
-			}
-		    }
-/* L60: */
-		}
-	    } else {
-		jx = kx;
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = jx;
-		    if ((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) {
-			if (nounit) {
-			    i__2 = jx;
-			    c_div(&q__1, &x[jx], &a[j + j * a_dim1]);
-			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-			}
-			i__2 = jx;
-			temp.r = x[i__2].r, temp.i = x[i__2].i;
-			ix = jx;
-			i__2 = *n;
-			for (i__ = j + 1; i__ <= i__2; ++i__) {
-			    ix += *incx;
-			    i__3 = ix;
-			    i__4 = ix;
-			    i__5 = i__ + j * a_dim1;
-			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
-				    q__2.i = temp.r * a[i__5].i + temp.i * a[
-				    i__5].r;
-			    q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i -
-				    q__2.i;
-			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-/* L70: */
-			}
-		    }
-		    jx += *incx;
-/* L80: */
-		}
-	    }
-	}
-    } else {
-
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */
-
-	if (lsame_(uplo, "U")) {
-	    if (*incx == 1) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    temp.r = x[i__2].r, temp.i = x[i__2].i;
-		    if (noconj) {
-			i__2 = j - 1;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + j * a_dim1;
-			    i__4 = i__;
-			    q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-				    i__4].i, q__2.i = a[i__3].r * x[i__4].i +
-				    a[i__3].i * x[i__4].r;
-			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-			}
-			if (nounit) {
-			    c_div(&q__1, &temp, &a[j + j * a_dim1]);
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-		    } else {
-			i__2 = j - 1;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-			    i__3 = i__;
-			    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
-				    q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-				    i__3].r;
-			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-			}
-			if (nounit) {
-			    r_cnjg(&q__2, &a[j + j * a_dim1]);
-			    c_div(&q__1, &temp, &q__2);
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-		    }
-		    i__2 = j;
-		    x[i__2].r = temp.r, x[i__2].i = temp.i;
-/* L110: */
-		}
-	    } else {
-		jx = kx;
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    ix = kx;
-		    i__2 = jx;
-		    temp.r = x[i__2].r, temp.i = x[i__2].i;
-		    if (noconj) {
-			i__2 = j - 1;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + j * a_dim1;
-			    i__4 = ix;
-			    q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-				    i__4].i, q__2.i = a[i__3].r * x[i__4].i +
-				    a[i__3].i * x[i__4].r;
-			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			    ix += *incx;
-/* L120: */
-			}
-			if (nounit) {
-			    c_div(&q__1, &temp, &a[j + j * a_dim1]);
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-		    } else {
-			i__2 = j - 1;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-			    i__3 = ix;
-			    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
-				    q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-				    i__3].r;
-			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			    ix += *incx;
-/* L130: */
-			}
-			if (nounit) {
-			    r_cnjg(&q__2, &a[j + j * a_dim1]);
-			    c_div(&q__1, &temp, &q__2);
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-		    }
-		    i__2 = jx;
-		    x[i__2].r = temp.r, x[i__2].i = temp.i;
-		    jx += *incx;
-/* L140: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    i__1 = j;
-		    temp.r = x[i__1].r, temp.i = x[i__1].i;
-		    if (noconj) {
-			i__1 = j + 1;
-			for (i__ = *n; i__ >= i__1; --i__) {
-			    i__2 = i__ + j * a_dim1;
-			    i__3 = i__;
-			    q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-				    i__3].i, q__2.i = a[i__2].r * x[i__3].i +
-				    a[i__2].i * x[i__3].r;
-			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-			}
-			if (nounit) {
-			    c_div(&q__1, &temp, &a[j + j * a_dim1]);
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-		    } else {
-			i__1 = j + 1;
-			for (i__ = *n; i__ >= i__1; --i__) {
-			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-			    i__2 = i__;
-			    q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i,
-				    q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-				    i__2].r;
-			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-			}
-			if (nounit) {
-			    r_cnjg(&q__2, &a[j + j * a_dim1]);
-			    c_div(&q__1, &temp, &q__2);
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-		    }
-		    i__1 = j;
-		    x[i__1].r = temp.r, x[i__1].i = temp.i;
-/* L170: */
-		}
-	    } else {
-		kx += (*n - 1) * *incx;
-		jx = kx;
-		for (j = *n; j >= 1; --j) {
-		    ix = kx;
-		    i__1 = jx;
-		    temp.r = x[i__1].r, temp.i = x[i__1].i;
-		    if (noconj) {
-			i__1 = j + 1;
-			for (i__ = *n; i__ >= i__1; --i__) {
-			    i__2 = i__ + j * a_dim1;
-			    i__3 = ix;
-			    q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-				    i__3].i, q__2.i = a[i__2].r * x[i__3].i +
-				    a[i__2].i * x[i__3].r;
-			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			    ix -= *incx;
-/* L180: */
-			}
-			if (nounit) {
-			    c_div(&q__1, &temp, &a[j + j * a_dim1]);
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-		    } else {
-			i__1 = j + 1;
-			for (i__ = *n; i__ >= i__1; --i__) {
-			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-			    i__2 = ix;
-			    q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i,
-				    q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-				    i__2].r;
-			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
-				    q__2.i;
-			    temp.r = q__1.r, temp.i = q__1.i;
-			    ix -= *incx;
-/* L190: */
-			}
-			if (nounit) {
-			    r_cnjg(&q__2, &a[j + j * a_dim1]);
-			    c_div(&q__1, &temp, &q__2);
-			    temp.r = q__1.r, temp.i = q__1.i;
-			}
-		    }
-		    i__1 = jx;
-		    x[i__1].r = temp.r, x[i__1].i = temp.i;
-		    jx -= *incx;
-/* L200: */
-		}
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of CTRSV . */
-
-} /* ctrsv_ */
-
-/* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx,
-	integer *incx, doublereal *dy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-
-
-/*
-       constant times a vector plus a vector.
-       uses unrolled loops for increments equal to one.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*da == 0.) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-          code for unequal increments or equal increments
-            not equal to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dy[iy] += *da * dx[ix];
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*
-          code for both increments equal to 1
-
-
-          clean-up loop
-*/
-
-L20:
-    m = *n % 4;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dy[i__] += *da * dx[i__];
-/* L30: */
-    }
-    if (*n < 4) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 4) {
-	dy[i__] += *da * dx[i__];
-	dy[i__ + 1] += *da * dx[i__ + 1];
-	dy[i__ + 2] += *da * dx[i__ + 2];
-	dy[i__ + 3] += *da * dx[i__ + 3];
-/* L50: */
-    }
-    return 0;
-} /* daxpy_ */
-
-doublereal dcabs1_(doublecomplex *z__)
-{
-    /* System generated locals */
-    doublereal ret_val;
-    static doublecomplex equiv_0[1];
-
-    /* Local variables */
-#define t ((doublereal *)equiv_0)
-#define zz (equiv_0)
-
-    zz->r = z__->r, zz->i = z__->i;
-    ret_val = abs(t[0]) + abs(t[1]);
-    return ret_val;
-} /* dcabs1_ */
-
-#undef zz
-#undef t
-
-
-/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx,
-	doublereal *dy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-
-
-/*
-       copies a vector, x, to a vector, y.
-       uses unrolled loops for increments equal to one.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-          code for unequal increments or equal increments
-            not equal to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dy[iy] = dx[ix];
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*
-          code for both increments equal to 1
-
-
-          clean-up loop
-*/
-
-L20:
-    m = *n % 7;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dy[i__] = dx[i__];
-/* L30: */
-    }
-    if (*n < 7) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 7) {
-	dy[i__] = dx[i__];
-	dy[i__ + 1] = dx[i__ + 1];
-	dy[i__ + 2] = dx[i__ + 2];
-	dy[i__ + 3] = dx[i__ + 3];
-	dy[i__ + 4] = dx[i__ + 4];
-	dy[i__ + 5] = dx[i__ + 5];
-	dy[i__ + 6] = dx[i__ + 6];
-/* L50: */
-    }
-    return 0;
-} /* dcopy_ */
-
-doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
-	integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal ret_val;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static doublereal dtemp;
-
-
-/*
-       forms the dot product of two vectors.
-       uses unrolled loops for increments equal to one.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-
-    /* Function Body */
-    ret_val = 0.;
-    dtemp = 0.;
-    if (*n <= 0) {
-	return ret_val;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-          code for unequal increments or equal increments
-            not equal to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp += dx[ix] * dy[iy];
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    ret_val = dtemp;
-    return ret_val;
-
-/*
-          code for both increments equal to 1
-
-
-          clean-up loop
-*/
-
-L20:
-    m = *n % 5;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp += dx[i__] * dy[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-	goto L60;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 5) {
-	dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[
-		i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ +
-		4] * dy[i__ + 4];
-/* L50: */
-    }
-L60:
-    ret_val = dtemp;
-    return ret_val;
-} /* ddot_ */
-
-/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer *
-	n, integer *k, doublereal *alpha, doublereal *a, integer *lda,
-	doublereal *b, integer *ldb, doublereal *beta, doublereal *c__,
-	integer *ldc)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
-	    i__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static logical nota, notb;
-    static doublereal temp;
-    static integer ncola;
-    extern logical lsame_(char *, char *);
-    static integer nrowa, nrowb;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    DGEMM  performs one of the matrix-matrix operations
-
-       C := alpha*op( A )*op( B ) + beta*C,
-
-    where  op( X ) is one of
-
-       op( X ) = X   or   op( X ) = X',
-
-    alpha and beta are scalars, and A, B and C are matrices, with op( A )
-    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
-
-    Parameters
-    ==========
-
-    TRANSA - CHARACTER*1.
-             On entry, TRANSA specifies the form of op( A ) to be used in
-             the matrix multiplication as follows:
-
-                TRANSA = 'N' or 'n',  op( A ) = A.
-
-                TRANSA = 'T' or 't',  op( A ) = A'.
-
-                TRANSA = 'C' or 'c',  op( A ) = A'.
-
-             Unchanged on exit.
-
-    TRANSB - CHARACTER*1.
-             On entry, TRANSB specifies the form of op( B ) to be used in
-             the matrix multiplication as follows:
-
-                TRANSB = 'N' or 'n',  op( B ) = B.
-
-                TRANSB = 'T' or 't',  op( B ) = B'.
-
-                TRANSB = 'C' or 'c',  op( B ) = B'.
-
-             Unchanged on exit.
-
-    M      - INTEGER.
-             On entry,  M  specifies  the number  of rows  of the  matrix
-             op( A )  and of the  matrix  C.  M  must  be at least  zero.
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry,  N  specifies the number  of columns of the matrix
-             op( B ) and the number of columns of the matrix C. N must be
-             at least zero.
-             Unchanged on exit.
-
-    K      - INTEGER.
-             On entry,  K  specifies  the number of columns of the matrix
-             op( A ) and the number of rows of the matrix op( B ). K must
-             be at least  zero.
-             Unchanged on exit.
-
-    ALPHA  - DOUBLE PRECISION.
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
-             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
-             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
-             part of the array  A  must contain the matrix  A,  otherwise
-             the leading  k by m  part of the array  A  must contain  the
-             matrix A.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. When  TRANSA = 'N' or 'n' then
-             LDA must be at least  max( 1, m ), otherwise  LDA must be at
-             least  max( 1, k ).
-             Unchanged on exit.
-
-    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
-             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
-             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
-             part of the array  B  must contain the matrix  B,  otherwise
-             the leading  n by k  part of the array  B  must contain  the
-             matrix B.
-             Unchanged on exit.
-
-    LDB    - INTEGER.
-             On entry, LDB specifies the first dimension of B as declared
-             in the calling (sub) program. When  TRANSB = 'N' or 'n' then
-             LDB must be at least  max( 1, k ), otherwise  LDB must be at
-             least  max( 1, n ).
-             Unchanged on exit.
-
-    BETA   - DOUBLE PRECISION.
-             On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-             supplied as zero then C need not be set on input.
-             Unchanged on exit.
-
-    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
-             Before entry, the leading  m by n  part of the array  C must
-             contain the matrix  C,  except when  beta  is zero, in which
-             case C need not be set on entry.
-             On exit, the array  C  is overwritten by the  m by n  matrix
-             ( alpha*op( A )*op( B ) + beta*C ).
-
-    LDC    - INTEGER.
-             On entry, LDC specifies the first dimension of C as declared
-             in  the  calling  (sub)  program.   LDC  must  be  at  least
-             max( 1, m ).
-             Unchanged on exit.
-
-
-    Level 3 Blas routine.
-
-    -- Written on 8-February-1989.
-       Jack Dongarra, Argonne National Laboratory.
-       Iain Duff, AERE Harwell.
-       Jeremy Du Croz, Numerical Algorithms Group Ltd.
-       Sven Hammarling, Numerical Algorithms Group Ltd.
-
-
-       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-       transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
-       and  columns of  A  and the  number of  rows  of  B  respectively.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    nota = lsame_(transa, "N");
-    notb = lsame_(transb, "N");
-    if (nota) {
-	nrowa = *m;
-	ncola = *k;
-    } else {
-	nrowa = *k;
-	ncola = *m;
-    }
-    if (notb) {
-	nrowb = *k;
-    } else {
-	nrowb = *n;
-    }
-
-/*     Test the input parameters. */
-
-    info = 0;
-    if (! nota && ! lsame_(transa, "C") && ! lsame_(
-	    transa, "T")) {
-	info = 1;
-    } else if (! notb && ! lsame_(transb, "C") && !
-	    lsame_(transb, "T")) {
-	info = 2;
-    } else if (*m < 0) {
-	info = 3;
-    } else if (*n < 0) {
-	info = 4;
-    } else if (*k < 0) {
-	info = 5;
-    } else if (*lda < max(1,nrowa)) {
-	info = 8;
-    } else if (*ldb < max(1,nrowb)) {
-	info = 10;
-    } else if (*ldc < max(1,*m)) {
-	info = 13;
-    }
-    if (info != 0) {
-	xerbla_("DGEMM ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (((*m == 0) || (*n == 0)) || (((*alpha == 0.) || (*k == 0)) && *beta ==
-	     1.)) {
-	return 0;
-    }
-
-/*     And if  alpha.eq.zero. */
-
-    if (*alpha == 0.) {
-	if (*beta == 0.) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    c__[i__ + j * c_dim1] = 0.;
-/* L10: */
-		}
-/* L20: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-		}
-/* L40: */
-	    }
-	}
-	return 0;
-    }
-
-/*     Start the operations. */
-
-    if (notb) {
-	if (nota) {
-
-/*           Form  C := alpha*A*B + beta*C. */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
-/* L50: */
-		    }
-		} else if (*beta != 1.) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L60: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    if (b[l + j * b_dim1] != 0.) {
-			temp = *alpha * b[l + j * b_dim1];
-			i__3 = *m;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
-				    a_dim1];
-/* L70: */
-			}
-		    }
-/* L80: */
-		}
-/* L90: */
-	    }
-	} else {
-
-/*           Form  C := alpha*A'*B + beta*C */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp = 0.;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-/* L100: */
-		    }
-		    if (*beta == 0.) {
-			c__[i__ + j * c_dim1] = *alpha * temp;
-		    } else {
-			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-				i__ + j * c_dim1];
-		    }
-/* L110: */
-		}
-/* L120: */
-	    }
-	}
-    } else {
-	if (nota) {
-
-/*           Form  C := alpha*A*B' + beta*C */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
-/* L130: */
-		    }
-		} else if (*beta != 1.) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L140: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    if (b[j + l * b_dim1] != 0.) {
-			temp = *alpha * b[j + l * b_dim1];
-			i__3 = *m;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
-				    a_dim1];
-/* L150: */
-			}
-		    }
-/* L160: */
-		}
-/* L170: */
-	    }
-	} else {
-
-/*           Form  C := alpha*A'*B' + beta*C */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp = 0.;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
-/* L180: */
-		    }
-		    if (*beta == 0.) {
-			c__[i__ + j * c_dim1] = *alpha * temp;
-		    } else {
-			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-				i__ + j * c_dim1];
-		    }
-/* L190: */
-		}
-/* L200: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DGEMM . */
-
-} /* dgemm_ */
-
-/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal *
-	alpha, doublereal *a, integer *lda, doublereal *x, integer *incx,
-	doublereal *beta, doublereal *y, integer *incy)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    DGEMV  performs one of the matrix-vector operations
-
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
-
-    where alpha and beta are scalars, x and y are vectors and A is an
-    m by n matrix.
-
-    Parameters
-    ==========
-
-    TRANS  - CHARACTER*1.
-             On entry, TRANS specifies the operation to be performed as
-             follows:
-
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
-
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
-
-             Unchanged on exit.
-
-    M      - INTEGER.
-             On entry, M specifies the number of rows of the matrix A.
-             M must be at least zero.
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the number of columns of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
-
-    ALPHA  - DOUBLE PRECISION.
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-             Before entry, the leading m by n part of the array A must
-             contain the matrix of coefficients.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, m ).
-             Unchanged on exit.
-
-    X      - DOUBLE PRECISION array of DIMENSION at least
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-             and at least
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-             Before entry, the incremented array X must contain the
-             vector x.
-             Unchanged on exit.
-
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
-
-    BETA   - DOUBLE PRECISION.
-             On entry, BETA specifies the scalar beta. When BETA is
-             supplied as zero then Y need not be set on input.
-             Unchanged on exit.
-
-    Y      - DOUBLE PRECISION array of DIMENSION at least
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-             and at least
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-             Before entry with BETA non-zero, the incremented array Y
-             must contain the vector y. On exit, Y is overwritten by the
-             updated vector y.
-
-    INCY   - INTEGER.
-             On entry, INCY specifies the increment for the elements of
-             Y. INCY must not be zero.
-             Unchanged on exit.
-
-
-    Level 2 Blas routine.
-
-    -- Written on 22-October-1986.
-       Jack Dongarra, Argonne National Lab.
-       Jeremy Du Croz, Nag Central Office.
-       Sven Hammarling, Nag Central Office.
-       Richard Hanson, Sandia National Labs.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
-	    ) {
-	info = 1;
-    } else if (*m < 0) {
-	info = 2;
-    } else if (*n < 0) {
-	info = 3;
-    } else if (*lda < max(1,*m)) {
-	info = 6;
-    } else if (*incx == 0) {
-	info = 8;
-    } else if (*incy == 0) {
-	info = 11;
-    }
-    if (info != 0) {
-	xerbla_("DGEMV ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (((*m == 0) || (*n == 0)) || (*alpha == 0. && *beta == 1.)) {
-	return 0;
-    }
-
-/*
-       Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-       up the start points in  X  and  Y.
-*/
-
-    if (lsame_(trans, "N")) {
-	lenx = *n;
-	leny = *m;
-    } else {
-	lenx = *m;
-	leny = *n;
-    }
-    if (*incx > 0) {
-	kx = 1;
-    } else {
-	kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-	ky = 1;
-    } else {
-	ky = 1 - (leny - 1) * *incy;
-    }
-
-/*
-       Start the operations. In this version the elements of A are
-       accessed sequentially with one pass through A.
-
-       First form  y := beta*y.
-*/
-
-    if (*beta != 1.) {
-	if (*incy == 1) {
-	    if (*beta == 0.) {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = 0.;
-/* L10: */
-		}
-	    } else {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = *beta * y[i__];
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (*beta == 0.) {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = 0.;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = *beta * y[iy];
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (*alpha == 0.) {
-	return 0;
-    }
-    if (lsame_(trans, "N")) {
-
-/*        Form  y := alpha*A*x + y. */
-
-	jx = kx;
-	if (*incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (x[jx] != 0.) {
-		    temp = *alpha * x[jx];
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			y[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-		    }
-		}
-		jx += *incx;
-/* L60: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (x[jx] != 0.) {
-		    temp = *alpha * x[jx];
-		    iy = ky;
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			y[iy] += temp * a[i__ + j * a_dim1];
-			iy += *incy;
-/* L70: */
-		    }
-		}
-		jx += *incx;
-/* L80: */
-	    }
-	}
-    } else {
-
-/*        Form  y := alpha*A'*x + y. */
-
-	jy = ky;
-	if (*incx == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = 0.;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-		}
-		y[jy] += *alpha * temp;
-		jy += *incy;
-/* L100: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = 0.;
-		ix = kx;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp += a[i__ + j * a_dim1] * x[ix];
-		    ix += *incx;
-/* L110: */
-		}
-		y[jy] += *alpha * temp;
-		jy += *incy;
-/* L120: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DGEMV . */
-
-} /* dgemv_ */
-
-/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha,
-	doublereal *x, integer *incx, doublereal *y, integer *incy,
-	doublereal *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static doublereal temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    DGER   performs the rank 1 operation
-
-       A := alpha*x*y' + A,
-
-    where alpha is a scalar, x is an m element vector, y is an n element
-    vector and A is an m by n matrix.
-
-    Parameters
-    ==========
-
-    M      - INTEGER.
-             On entry, M specifies the number of rows of the matrix A.
-             M must be at least zero.
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the number of columns of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
-
-    ALPHA  - DOUBLE PRECISION.
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    X      - DOUBLE PRECISION array of dimension at least
-             ( 1 + ( m - 1 )*abs( INCX ) ).
-             Before entry, the incremented array X must contain the m
-             element vector x.
-             Unchanged on exit.
-
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
-
-    Y      - DOUBLE PRECISION array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCY ) ).
-             Before entry, the incremented array Y must contain the n
-             element vector y.
-             Unchanged on exit.
-
-    INCY   - INTEGER.
-             On entry, INCY specifies the increment for the elements of
-             Y. INCY must not be zero.
-             Unchanged on exit.
-
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-             Before entry, the leading m by n part of the array A must
-             contain the matrix of coefficients. On exit, A is
-             overwritten by the updated matrix.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, m ).
-             Unchanged on exit.
-
-
-    Level 2 Blas routine.
-
-    -- Written on 22-October-1986.
-       Jack Dongarra, Argonne National Lab.
-       Jeremy Du Croz, Nag Central Office.
-       Sven Hammarling, Nag Central Office.
-       Richard Hanson, Sandia National Labs.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*incx == 0) {
-	info = 5;
-    } else if (*incy == 0) {
-	info = 7;
-    } else if (*lda < max(1,*m)) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("DGER  ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (((*m == 0) || (*n == 0)) || (*alpha == 0.)) {
-	return 0;
-    }
-
-/*
-       Start the operations. In this version the elements of A are
-       accessed sequentially with one pass through A.
-*/
-
-    if (*incy > 0) {
-	jy = 1;
-    } else {
-	jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (y[jy] != 0.) {
-		temp = *alpha * y[jy];
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    a[i__ + j * a_dim1] += x[i__] * temp;
-/* L10: */
-		}
-	    }
-	    jy += *incy;
-/* L20: */
-	}
-    } else {
-	if (*incx > 0) {
-	    kx = 1;
-	} else {
-	    kx = 1 - (*m - 1) * *incx;
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (y[jy] != 0.) {
-		temp = *alpha * y[jy];
-		ix = kx;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    a[i__ + j * a_dim1] += x[ix] * temp;
-		    ix += *incx;
-/* L30: */
-		}
-	    }
-	    jy += *incy;
-/* L40: */
-	}
-    }
-
-    return 0;
-
-/*     End of DGER  . */
-
-} /* dger_ */
-
-doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    doublereal ret_val, d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer ix;
-    static doublereal ssq, norm, scale, absxi;
-
-
-/*
-    DNRM2 returns the euclidean norm of a vector via the function
-    name, so that
-
-       DNRM2 := sqrt( x'*x )
-
-
-    -- This version written on 25-October-1982.
-       Modified on 14-October-1993 to inline the call to DLASSQ.
-       Sven Hammarling, Nag Ltd.
-*/
-
-
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if ((*n < 1) || (*incx < 1)) {
-	norm = 0.;
-    } else if (*n == 1) {
-	norm = abs(x[1]);
-    } else {
-	scale = 0.;
-	ssq = 1.;
-/*
-          The following loop is equivalent to this call to the LAPACK
-          auxiliary routine:
-          CALL DLASSQ( N, X, INCX, SCALE, SSQ )
-*/
-
-	i__1 = (*n - 1) * *incx + 1;
-	i__2 = *incx;
-	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-	    if (x[ix] != 0.) {
-		absxi = (d__1 = x[ix], abs(d__1));
-		if (scale < absxi) {
-/* Computing 2nd power */
-		    d__1 = scale / absxi;
-		    ssq = ssq * (d__1 * d__1) + 1.;
-		    scale = absxi;
-		} else {
-/* Computing 2nd power */
-		    d__1 = absxi / scale;
-		    ssq += d__1 * d__1;
-		}
-	    }
-/* L10: */
-	}
-	norm = scale * sqrt(ssq);
-    }
-
-    ret_val = norm;
-    return ret_val;
-
-/*     End of DNRM2. */
-
-} /* dnrm2_ */
-
-/* Subroutine */ int drot_(integer *n, doublereal *dx, integer *incx,
-	doublereal *dy, integer *incy, doublereal *c__, doublereal *s)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublereal dtemp;
-
-
-/*
-       applies a plane rotation.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-         code for unequal increments or equal increments not equal
-           to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp = *c__ * dx[ix] + *s * dy[iy];
-	dy[iy] = *c__ * dy[iy] - *s * dx[ix];
-	dx[ix] = dtemp;
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*       code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp = *c__ * dx[i__] + *s * dy[i__];
-	dy[i__] = *c__ * dy[i__] - *s * dx[i__];
-	dx[i__] = dtemp;
-/* L30: */
-    }
-    return 0;
-} /* drot_ */
-
-/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx,
-	integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, m, mp1, nincx;
-
-
-/*
-       scales a vector by a constant.
-       uses unrolled loops for increment equal to one.
-       jack dongarra, linpack, 3/11/78.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --dx;
-
-    /* Function Body */
-    if ((*n <= 0) || (*incx <= 0)) {
-	return 0;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	dx[i__] = *da * dx[i__];
-/* L10: */
-    }
-    return 0;
-
-/*
-          code for increment equal to 1
-
-
-          clean-up loop
-*/
-
-L20:
-    m = *n % 5;
-    if (m == 0) {
-	goto L40;
-    }
-    i__2 = m;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	dx[i__] = *da * dx[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__2 = *n;
-    for (i__ = mp1; i__ <= i__2; i__ += 5) {
-	dx[i__] = *da * dx[i__];
-	dx[i__ + 1] = *da * dx[i__ + 1];
-	dx[i__ + 2] = *da * dx[i__ + 2];
-	dx[i__ + 3] = *da * dx[i__ + 3];
-	dx[i__ + 4] = *da * dx[i__ + 4];
-/* L50: */
-    }
-    return 0;
-} /* dscal_ */
-
-/* Subroutine */ int dswap_(integer *n, doublereal *dx, integer *incx,
-	doublereal *dy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static doublereal dtemp;
-
-
-/*
-       interchanges two vectors.
-       uses unrolled loops for increments equal one.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-         code for unequal increments or equal increments not equal
-           to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp = dx[ix];
-	dx[ix] = dy[iy];
-	dy[iy] = dtemp;
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*
-         code for both increments equal to 1
-
-
-         clean-up loop
-*/
-
-L20:
-    m = *n % 3;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp = dx[i__];
-	dx[i__] = dy[i__];
-	dy[i__] = dtemp;
-/* L30: */
-    }
-    if (*n < 3) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 3) {
-	dtemp = dx[i__];
-	dx[i__] = dy[i__];
-	dy[i__] = dtemp;
-	dtemp = dx[i__ + 1];
-	dx[i__ + 1] = dy[i__ + 1];
-	dy[i__ + 1] = dtemp;
-	dtemp = dx[i__ + 2];
-	dx[i__ + 2] = dy[i__ + 2];
-	dy[i__ + 2] = dtemp;
-/* L50: */
-    }
-    return 0;
-} /* dswap_ */
-
-/* Subroutine */ int dsymv_(char *uplo, integer *n, doublereal *alpha,
-	doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal
-	*beta, doublereal *y, integer *incy)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    DSYMV  performs the matrix-vector  operation
-
-       y := alpha*A*x + beta*y,
-
-    where alpha and beta are scalars, x and y are n element vectors and
-    A is an n by n symmetric matrix.
-
-    Parameters
-    ==========
-
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the upper or lower
-             triangular part of the array A is to be referenced as
-             follows:
-
-                UPLO = 'U' or 'u'   Only the upper triangular part of A
-                                    is to be referenced.
-
-                UPLO = 'L' or 'l'   Only the lower triangular part of A
-                                    is to be referenced.
-
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the order of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
-
-    ALPHA  - DOUBLE PRECISION.
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-             Before entry with  UPLO = 'U' or 'u', the leading n by n
-             upper triangular part of the array A must contain the upper
-             triangular part of the symmetric matrix and the strictly
-             lower triangular part of A is not referenced.
-             Before entry with UPLO = 'L' or 'l', the leading n by n
-             lower triangular part of the array A must contain the lower
-             triangular part of the symmetric matrix and the strictly
-             upper triangular part of A is not referenced.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, n ).
-             Unchanged on exit.
-
-    X      - DOUBLE PRECISION array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCX ) ).
-             Before entry, the incremented array X must contain the n
-             element vector x.
-             Unchanged on exit.
-
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
-
-    BETA   - DOUBLE PRECISION.
-             On entry, BETA specifies the scalar beta. When BETA is
-             supplied as zero then Y need not be set on input.
-             Unchanged on exit.
-
-    Y      - DOUBLE PRECISION array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCY ) ).
-             Before entry, the incremented array Y must contain the n
-             element vector y. On exit, Y is overwritten by the updated
-             vector y.
-
-    INCY   - INTEGER.
-             On entry, INCY specifies the increment for the elements of
-             Y. INCY must not be zero.
-             Unchanged on exit.
-
-
-    Level 2 Blas routine.
-
-    -- Written on 22-October-1986.
-       Jack Dongarra, Argonne National Lab.
-       Jeremy Du Croz, Nag Central Office.
-       Sven Hammarling, Nag Central Office.
-       Richard Hanson, Sandia National Labs.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*lda < max(1,*n)) {
-	info = 5;
-    } else if (*incx == 0) {
-	info = 7;
-    } else if (*incy == 0) {
-	info = 10;
-    }
-    if (info != 0) {
-	xerbla_("DSYMV ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if ((*n == 0) || (*alpha == 0. && *beta == 1.)) {
-	return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-	kx = 1;
-    } else {
-	kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-	ky = 1;
-    } else {
-	ky = 1 - (*n - 1) * *incy;
-    }
-
-/*
-       Start the operations. In this version the elements of A are
-       accessed sequentially with one pass through the triangular part
-       of A.
-
-       First form  y := beta*y.
-*/
-
-    if (*beta != 1.) {
-	if (*incy == 1) {
-	    if (*beta == 0.) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = 0.;
-/* L10: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = *beta * y[i__];
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (*beta == 0.) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = 0.;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = *beta * y[iy];
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (*alpha == 0.) {
-	return 0;
-    }
-    if (lsame_(uplo, "U")) {
-
-/*        Form  y  when A is stored in upper triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.;
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    y[i__] += temp1 * a[i__ + j * a_dim1];
-		    temp2 += a[i__ + j * a_dim1] * x[i__];
-/* L50: */
-		}
-		y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
-/* L60: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.;
-		ix = kx;
-		iy = ky;
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    y[iy] += temp1 * a[i__ + j * a_dim1];
-		    temp2 += a[i__ + j * a_dim1] * x[ix];
-		    ix += *incx;
-		    iy += *incy;
-/* L70: */
-		}
-		y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
-		jx += *incx;
-		jy += *incy;
-/* L80: */
-	    }
-	}
-    } else {
-
-/*        Form  y  when A is stored in lower triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.;
-		y[j] += temp1 * a[j + j * a_dim1];
-		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    y[i__] += temp1 * a[i__ + j * a_dim1];
-		    temp2 += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-		}
-		y[j] += *alpha * temp2;
-/* L100: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.;
-		y[jy] += temp1 * a[j + j * a_dim1];
-		ix = jx;
-		iy = jy;
-		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    ix += *incx;
-		    iy += *incy;
-		    y[iy] += temp1 * a[i__ + j * a_dim1];
-		    temp2 += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
-		}
-		y[jy] += *alpha * temp2;
-		jx += *incx;
-		jy += *incy;
-/* L120: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DSYMV . */
-
-} /* dsymv_ */
-
-/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha,
-	doublereal *x, integer *incx, doublereal *y, integer *incy,
-	doublereal *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    DSYR2  performs the symmetric rank 2 operation
-
-       A := alpha*x*y' + alpha*y*x' + A,
-
-    where alpha is a scalar, x and y are n element vectors and A is an n
-    by n symmetric matrix.
-
-    Parameters
-    ==========
-
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the upper or lower
-             triangular part of the array A is to be referenced as
-             follows:
-
-                UPLO = 'U' or 'u'   Only the upper triangular part of A
-                                    is to be referenced.
-
-                UPLO = 'L' or 'l'   Only the lower triangular part of A
-                                    is to be referenced.
-
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the order of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
-
-    ALPHA  - DOUBLE PRECISION.
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    X      - DOUBLE PRECISION array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCX ) ).
-             Before entry, the incremented array X must contain the n
-             element vector x.
-             Unchanged on exit.
-
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
-
-    Y      - DOUBLE PRECISION array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCY ) ).
-             Before entry, the incremented array Y must contain the n
-             element vector y.
-             Unchanged on exit.
-
-    INCY   - INTEGER.
-             On entry, INCY specifies the increment for the elements of
-             Y. INCY must not be zero.
-             Unchanged on exit.
-
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-             Before entry with  UPLO = 'U' or 'u', the leading n by n
-             upper triangular part of the array A must contain the upper
-             triangular part of the symmetric matrix and the strictly
-             lower triangular part of A is not referenced. On exit, the
-             upper triangular part of the array A is overwritten by the
-             upper triangular part of the updated matrix.
-             Before entry with UPLO = 'L' or 'l', the leading n by n
-             lower triangular part of the array A must contain the lower
-             triangular part of the symmetric matrix and the strictly
-             upper triangular part of A is not referenced. On exit, the
-             lower triangular part of the array A is overwritten by the
-             lower triangular part of the updated matrix.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, n ).
-             Unchanged on exit.
-
-
-    Level 2 Blas routine.
-
-    -- Written on 22-October-1986.
-       Jack Dongarra, Argonne National Lab.
-       Jeremy Du Croz, Nag Central Office.
-       Sven Hammarling, Nag Central Office.
-       Richard Hanson, Sandia National Labs.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*incx == 0) {
-	info = 5;
-    } else if (*incy == 0) {
-	info = 7;
-    } else if (*lda < max(1,*n)) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("DSYR2 ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if ((*n == 0) || (*alpha == 0.)) {
-	return 0;
-    }
-
-/*
-       Set up the start points in X and Y if the increments are not both
-       unity.
-*/
-
-    if ((*incx != 1) || (*incy != 1)) {
-	if (*incx > 0) {
-	    kx = 1;
-	} else {
-	    kx = 1 - (*n - 1) * *incx;
-	}
-	if (*incy > 0) {
-	    ky = 1;
-	} else {
-	    ky = 1 - (*n - 1) * *incy;
-	}
-	jx = kx;
-	jy = ky;
-    }
-
-/*
-       Start the operations. In this version the elements of A are
-       accessed sequentially with one pass through the triangular part
-       of A.
-*/
-
-    if (lsame_(uplo, "U")) {
-
-/*        Form  A  when A is stored in the upper triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if ((x[j] != 0.) || (y[j] != 0.)) {
-		    temp1 = *alpha * y[j];
-		    temp2 = *alpha * x[j];
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
-				temp1 + y[i__] * temp2;
-/* L10: */
-		    }
-		}
-/* L20: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if ((x[jx] != 0.) || (y[jy] != 0.)) {
-		    temp1 = *alpha * y[jy];
-		    temp2 = *alpha * x[jx];
-		    ix = kx;
-		    iy = ky;
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
-				temp1 + y[iy] * temp2;
-			ix += *incx;
-			iy += *incy;
-/* L30: */
-		    }
-		}
-		jx += *incx;
-		jy += *incy;
-/* L40: */
-	    }
-	}
-    } else {
-
-/*        Form  A  when A is stored in the lower triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if ((x[j] != 0.) || (y[j] != 0.)) {
-		    temp1 = *alpha * y[j];
-		    temp2 = *alpha * x[j];
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
-				temp1 + y[i__] * temp2;
-/* L50: */
-		    }
-		}
-/* L60: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if ((x[jx] != 0.) || (y[jy] != 0.)) {
-		    temp1 = *alpha * y[jy];
-		    temp2 = *alpha * x[jx];
-		    ix = jx;
-		    iy = jy;
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
-				temp1 + y[iy] * temp2;
-			ix += *incx;
-			iy += *incy;
-/* L70: */
-		    }
-		}
-		jx += *incx;
-		jy += *incy;
-/* L80: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DSYR2 . */
-
-} /* dsyr2_ */
-
-/* Subroutine */ int dsyr2k_(char *uplo, char *trans, integer *n, integer *k,
-	doublereal *alpha, doublereal *a, integer *lda, doublereal *b,
-	integer *ldb, doublereal *beta, doublereal *c__, integer *ldc)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
-	    i__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    DSYR2K  performs one of the symmetric rank 2k operations
-
-       C := alpha*A*B' + alpha*B*A' + beta*C,
-
-    or
-
-       C := alpha*A'*B + alpha*B'*A + beta*C,
-
-    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-    and  A and B  are  n by k  matrices  in the  first  case  and  k by n
-    matrices in the second case.
-
-    Parameters
-    ==========
-
-    UPLO   - CHARACTER*1.
-             On  entry,   UPLO  specifies  whether  the  upper  or  lower
-             triangular  part  of the  array  C  is to be  referenced  as
-             follows:
-
-                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-                                    is to be referenced.
-
-                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-                                    is to be referenced.
-
-             Unchanged on exit.
-
-    TRANS  - CHARACTER*1.
-             On entry,  TRANS  specifies the operation to be performed as
-             follows:
-
-                TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +
-                                          beta*C.
-
-                TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +
-                                          beta*C.
-
-                TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +
-                                          beta*C.
-
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry,  N specifies the order of the matrix C.  N must be
-             at least zero.
-             Unchanged on exit.
-
-    K      - INTEGER.
-             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-             of  columns  of the  matrices  A and B,  and on  entry  with
-             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-             of rows of the matrices  A and B.  K must be at least  zero.
-             Unchanged on exit.
-
-    ALPHA  - DOUBLE PRECISION.
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
-             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-             part of the array  A  must contain the matrix  A,  otherwise
-             the leading  k by n  part of the array  A  must contain  the
-             matrix A.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-             then  LDA must be at least  max( 1, n ), otherwise  LDA must
-             be at least  max( 1, k ).
-             Unchanged on exit.
-
-    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
-             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-             part of the array  B  must contain the matrix  B,  otherwise
-             the leading  k by n  part of the array  B  must contain  the
-             matrix B.
-             Unchanged on exit.
-
-    LDB    - INTEGER.
-             On entry, LDB specifies the first dimension of B as declared
-             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-             then  LDB must be at least  max( 1, n ), otherwise  LDB must
-             be at least  max( 1, k ).
-             Unchanged on exit.
-
-    BETA   - DOUBLE PRECISION.
-             On entry, BETA specifies the scalar beta.
-             Unchanged on exit.
-
-    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
-             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-             upper triangular part of the array C must contain the upper
-             triangular part  of the  symmetric matrix  and the strictly
-             lower triangular part of C is not referenced.  On exit, the
-             upper triangular part of the array  C is overwritten by the
-             upper triangular part of the updated matrix.
-             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-             lower triangular part of the array C must contain the lower
-             triangular part  of the  symmetric matrix  and the strictly
-             upper triangular part of C is not referenced.  On exit, the
-             lower triangular part of the array  C is overwritten by the
-             lower triangular part of the updated matrix.
-
-    LDC    - INTEGER.
-             On entry, LDC specifies the first dimension of C as declared
-             in  the  calling  (sub)  program.   LDC  must  be  at  least
-             max( 1, n ).
-             Unchanged on exit.
-
-
-    Level 3 Blas routine.
-
-
-    -- Written on 8-February-1989.
-       Jack Dongarra, Argonne National Laboratory.
-       Iain Duff, AERE Harwell.
-       Jeremy Du Croz, Numerical Algorithms Group Ltd.
-       Sven Hammarling, Numerical Algorithms Group Ltd.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N")) {
-	nrowa = *n;
-    } else {
-	nrowa = *k;
-    }
-    upper = lsame_(uplo, "U");
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L")) {
-	info = 1;
-    } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "T") && ! lsame_(trans, "C")) {
-	info = 2;
-    } else if (*n < 0) {
-	info = 3;
-    } else if (*k < 0) {
-	info = 4;
-    } else if (*lda < max(1,nrowa)) {
-	info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-	info = 9;
-    } else if (*ldc < max(1,*n)) {
-	info = 12;
-    }
-    if (info != 0) {
-	xerbla_("DSYR2K", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if ((*n == 0) || (((*alpha == 0.) || (*k == 0)) && *beta == 1.)) {
-	return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.) {
-	if (upper) {
-	    if (*beta == 0.) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
-/* L10: */
-		    }
-/* L20: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-		    }
-/* L40: */
-		}
-	    }
-	} else {
-	    if (*beta == 0.) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
-/* L50: */
-		    }
-/* L60: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L70: */
-		    }
-/* L80: */
-		}
-	    }
-	}
-	return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N")) {
-
-/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
-
-	if (upper) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
-/* L90: */
-		    }
-		} else if (*beta != 1.) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L100: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    if ((a[j + l * a_dim1] != 0.) || (b[j + l * b_dim1] != 0.)
-			    ) {
-			temp1 = *alpha * b[j + l * b_dim1];
-			temp2 = *alpha * a[j + l * a_dim1];
-			i__3 = j;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-				    i__ + l * a_dim1] * temp1 + b[i__ + l *
-				    b_dim1] * temp2;
-/* L110: */
-			}
-		    }
-/* L120: */
-		}
-/* L130: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
-/* L140: */
-		    }
-		} else if (*beta != 1.) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L150: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    if ((a[j + l * a_dim1] != 0.) || (b[j + l * b_dim1] != 0.)
-			    ) {
-			temp1 = *alpha * b[j + l * b_dim1];
-			temp2 = *alpha * a[j + l * a_dim1];
-			i__3 = *n;
-			for (i__ = j; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-				    i__ + l * a_dim1] * temp1 + b[i__ + l *
-				    b_dim1] * temp2;
-/* L160: */
-			}
-		    }
-/* L170: */
-		}
-/* L180: */
-	    }
-	}
-    } else {
-
-/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
-
-	if (upper) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp1 = 0.;
-		    temp2 = 0.;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
-/* L190: */
-		    }
-		    if (*beta == 0.) {
-			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
-				temp2;
-		    } else {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
-				+ *alpha * temp1 + *alpha * temp2;
-		    }
-/* L200: */
-		}
-/* L210: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *n;
-		for (i__ = j; i__ <= i__2; ++i__) {
-		    temp1 = 0.;
-		    temp2 = 0.;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
-/* L220: */
-		    }
-		    if (*beta == 0.) {
-			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
-				temp2;
-		    } else {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
-				+ *alpha * temp1 + *alpha * temp2;
-		    }
-/* L230: */
-		}
-/* L240: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DSYR2K. */
-
-} /* dsyr2k_ */
-
-/* Subroutine */ int dsyrk_(char *uplo, char *trans, integer *n, integer *k,
-	doublereal *alpha, doublereal *a, integer *lda, doublereal *beta,
-	doublereal *c__, integer *ldc)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    DSYRK  performs one of the symmetric rank k operations
-
-       C := alpha*A*A' + beta*C,
-
-    or
-
-       C := alpha*A'*A + beta*C,
-
-    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-    and  A  is an  n by k  matrix in the first case and a  k by n  matrix
-    in the second case.
-
-    Parameters
-    ==========
-
-    UPLO   - CHARACTER*1.
-             On  entry,   UPLO  specifies  whether  the  upper  or  lower
-             triangular  part  of the  array  C  is to be  referenced  as
-             follows:
-
-                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-                                    is to be referenced.
-
-                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-                                    is to be referenced.
-
-             Unchanged on exit.
-
-    TRANS  - CHARACTER*1.
-             On entry,  TRANS  specifies the operation to be performed as
-             follows:
-
-                TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
-
-                TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
-
-                TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C.
-
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry,  N specifies the order of the matrix C.  N must be
-             at least zero.
-             Unchanged on exit.
-
-    K      - INTEGER.
-             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-             of  columns   of  the   matrix   A,   and  on   entry   with
-             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-             of rows of the matrix  A.  K must be at least zero.
-             Unchanged on exit.
-
-    ALPHA  - DOUBLE PRECISION.
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
-             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-             part of the array  A  must contain the matrix  A,  otherwise
-             the leading  k by n  part of the array  A  must contain  the
-             matrix A.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-             then  LDA must be at least  max( 1, n ), otherwise  LDA must
-             be at least  max( 1, k ).
-             Unchanged on exit.
-
-    BETA   - DOUBLE PRECISION.
-             On entry, BETA specifies the scalar beta.
-             Unchanged on exit.
-
-    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
-             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-             upper triangular part of the array C must contain the upper
-             triangular part  of the  symmetric matrix  and the strictly
-             lower triangular part of C is not referenced.  On exit, the
-             upper triangular part of the array  C is overwritten by the
-             upper triangular part of the updated matrix.
-             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-             lower triangular part of the array C must contain the lower
-             triangular part  of the  symmetric matrix  and the strictly
-             upper triangular part of C is not referenced.  On exit, the
-             lower triangular part of the array  C is overwritten by the
-             lower triangular part of the updated matrix.
-
-    LDC    - INTEGER.
-             On entry, LDC specifies the first dimension of C as declared
-             in  the  calling  (sub)  program.   LDC  must  be  at  least
-             max( 1, n ).
-             Unchanged on exit.
-
-
-    Level 3 Blas routine.
-
-    -- Written on 8-February-1989.
-       Jack Dongarra, Argonne National Laboratory.
-       Iain Duff, AERE Harwell.
-       Jeremy Du Croz, Numerical Algorithms Group Ltd.
-       Sven Hammarling, Numerical Algorithms Group Ltd.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N")) {
-	nrowa = *n;
-    } else {
-	nrowa = *k;
-    }
-    upper = lsame_(uplo, "U");
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L")) {
-	info = 1;
-    } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "T") && ! lsame_(trans, "C")) {
-	info = 2;
-    } else if (*n < 0) {
-	info = 3;
-    } else if (*k < 0) {
-	info = 4;
-    } else if (*lda < max(1,nrowa)) {
-	info = 7;
-    } else if (*ldc < max(1,*n)) {
-	info = 10;
-    }
-    if (info != 0) {
-	xerbla_("DSYRK ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if ((*n == 0) || (((*alpha == 0.) || (*k == 0)) && *beta == 1.)) {
-	return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.) {
-	if (upper) {
-	    if (*beta == 0.) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
-/* L10: */
-		    }
-/* L20: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-		    }
-/* L40: */
-		}
-	    }
-	} else {
-	    if (*beta == 0.) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
-/* L50: */
-		    }
-/* L60: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L70: */
-		    }
-/* L80: */
-		}
-	    }
-	}
-	return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N")) {
-
-/*        Form  C := alpha*A*A' + beta*C. */
-
-	if (upper) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
-/* L90: */
-		    }
-		} else if (*beta != 1.) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L100: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    if (a[j + l * a_dim1] != 0.) {
-			temp = *alpha * a[j + l * a_dim1];
-			i__3 = j;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
-				    a_dim1];
-/* L110: */
-			}
-		    }
-/* L120: */
-		}
-/* L130: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
-/* L140: */
-		    }
-		} else if (*beta != 1.) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L150: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    if (a[j + l * a_dim1] != 0.) {
-			temp = *alpha * a[j + l * a_dim1];
-			i__3 = *n;
-			for (i__ = j; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
-				    a_dim1];
-/* L160: */
-			}
-		    }
-/* L170: */
-		}
-/* L180: */
-	    }
-	}
-    } else {
-
-/*        Form  C := alpha*A'*A + beta*C. */
-
-	if (upper) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp = 0.;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L190: */
-		    }
-		    if (*beta == 0.) {
-			c__[i__ + j * c_dim1] = *alpha * temp;
-		    } else {
-			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-				i__ + j * c_dim1];
-		    }
-/* L200: */
-		}
-/* L210: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *n;
-		for (i__ = j; i__ <= i__2; ++i__) {
-		    temp = 0.;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L220: */
-		    }
-		    if (*beta == 0.) {
-			c__[i__ + j * c_dim1] = *alpha * temp;
-		    } else {
-			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-				i__ + j * c_dim1];
-		    }
-/* L230: */
-		}
-/* L240: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DSYRK . */
-
-} /* dsyrk_ */
-
-/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag,
-	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
-	lda, doublereal *b, integer *ldb)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublereal temp;
-    static logical lside;
-    extern logical lsame_(char *, char *);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical nounit;
-
-
-/*
-    Purpose
-    =======
-
-    DTRMM  performs one of the matrix-matrix operations
-
-       B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
-
-    where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
-    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-
-       op( A ) = A   or   op( A ) = A'.
-
-    Parameters
-    ==========
-
-    SIDE   - CHARACTER*1.
-             On entry,  SIDE specifies whether  op( A ) multiplies B from
-             the left or right as follows:
-
-                SIDE = 'L' or 'l'   B := alpha*op( A )*B.
-
-                SIDE = 'R' or 'r'   B := alpha*B*op( A ).
-
-             Unchanged on exit.
-
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the matrix A is an upper or
-             lower triangular matrix as follows:
-
-                UPLO = 'U' or 'u'   A is an upper triangular matrix.
-
-                UPLO = 'L' or 'l'   A is a lower triangular matrix.
-
-             Unchanged on exit.
-
-    TRANSA - CHARACTER*1.
-             On entry, TRANSA specifies the form of op( A ) to be used in
-             the matrix multiplication as follows:
-
-                TRANSA = 'N' or 'n'   op( A ) = A.
-
-                TRANSA = 'T' or 't'   op( A ) = A'.
-
-                TRANSA = 'C' or 'c'   op( A ) = A'.
-
-             Unchanged on exit.
-
-    DIAG   - CHARACTER*1.
-             On entry, DIAG specifies whether or not A is unit triangular
-             as follows:
-
-                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-
-                DIAG = 'N' or 'n'   A is not assumed to be unit
-                                    triangular.
-
-             Unchanged on exit.
-
-    M      - INTEGER.
-             On entry, M specifies the number of rows of B. M must be at
-             least zero.
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the number of columns of B.  N must be
-             at least zero.
-             Unchanged on exit.
-
-    ALPHA  - DOUBLE PRECISION.
-             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-             zero then  A is not referenced and  B need not be set before
-             entry.
-             Unchanged on exit.
-
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
-             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
-             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-             upper triangular part of the array  A must contain the upper
-             triangular matrix  and the strictly lower triangular part of
-             A is not referenced.
-             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-             lower triangular part of the array  A must contain the lower
-             triangular matrix  and the strictly upper triangular part of
-             A is not referenced.
-             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-             A  are not referenced either,  but are assumed to be  unity.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-             then LDA must be at least max( 1, n ).
-             Unchanged on exit.
-
-    B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
-             Before entry,  the leading  m by n part of the array  B must
-             contain the matrix  B,  and  on exit  is overwritten  by the
-             transformed matrix.
-
-    LDB    - INTEGER.
-             On entry, LDB specifies the first dimension of B as declared
-             in  the  calling  (sub)  program.   LDB  must  be  at  least
-             max( 1, m ).
-             Unchanged on exit.
-
-
-    Level 3 Blas routine.
-
-    -- Written on 8-February-1989.
-       Jack Dongarra, Argonne National Laboratory.
-       Iain Duff, AERE Harwell.
-       Jeremy Du Croz, Numerical Algorithms Group Ltd.
-       Sven Hammarling, Numerical Algorithms Group Ltd.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    lside = lsame_(side, "L");
-    if (lside) {
-	nrowa = *m;
-    } else {
-	nrowa = *n;
-    }
-    nounit = lsame_(diag, "N");
-    upper = lsame_(uplo, "U");
-
-    info = 0;
-    if (! lside && ! lsame_(side, "R")) {
-	info = 1;
-    } else if (! upper && ! lsame_(uplo, "L")) {
-	info = 2;
-    } else if (! lsame_(transa, "N") && ! lsame_(transa,
-	     "T") && ! lsame_(transa, "C")) {
-	info = 3;
-    } else if (! lsame_(diag, "U") && ! lsame_(diag,
-	    "N")) {
-	info = 4;
-    } else if (*m < 0) {
-	info = 5;
-    } else if (*n < 0) {
-	info = 6;
-    } else if (*lda < max(1,nrowa)) {
-	info = 9;
-    } else if (*ldb < max(1,*m)) {
-	info = 11;
-    }
-    if (info != 0) {
-	xerbla_("DTRMM ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		b[i__ + j * b_dim1] = 0.;
-/* L10: */
-	    }
-/* L20: */
-	}
-	return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lside) {
-	if (lsame_(transa, "N")) {
-
-/*           Form  B := alpha*A*B. */
-
-	    if (upper) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (k = 1; k <= i__2; ++k) {
-			if (b[k + j * b_dim1] != 0.) {
-			    temp = *alpha * b[k + j * b_dim1];
-			    i__3 = k - 1;
-			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] += temp * a[i__ + k *
-					a_dim1];
-/* L30: */
-			    }
-			    if (nounit) {
-				temp *= a[k + k * a_dim1];
-			    }
-			    b[k + j * b_dim1] = temp;
-			}
-/* L40: */
-		    }
-/* L50: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    for (k = *m; k >= 1; --k) {
-			if (b[k + j * b_dim1] != 0.) {
-			    temp = *alpha * b[k + j * b_dim1];
-			    b[k + j * b_dim1] = temp;
-			    if (nounit) {
-				b[k + j * b_dim1] *= a[k + k * a_dim1];
-			    }
-			    i__2 = *m;
-			    for (i__ = k + 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] += temp * a[i__ + k *
-					a_dim1];
-/* L60: */
-			    }
-			}
-/* L70: */
-		    }
-/* L80: */
-		}
-	    }
-	} else {
-
-/*           Form  B := alpha*A'*B. */
-
-	    if (upper) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    for (i__ = *m; i__ >= 1; --i__) {
-			temp = b[i__ + j * b_dim1];
-			if (nounit) {
-			    temp *= a[i__ + i__ * a_dim1];
-			}
-			i__2 = i__ - 1;
-			for (k = 1; k <= i__2; ++k) {
-			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L90: */
-			}
-			b[i__ + j * b_dim1] = *alpha * temp;
-/* L100: */
-		    }
-/* L110: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			temp = b[i__ + j * b_dim1];
-			if (nounit) {
-			    temp *= a[i__ + i__ * a_dim1];
-			}
-			i__3 = *m;
-			for (k = i__ + 1; k <= i__3; ++k) {
-			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L120: */
-			}
-			b[i__ + j * b_dim1] = *alpha * temp;
-/* L130: */
-		    }
-/* L140: */
-		}
-	    }
-	}
-    } else {
-	if (lsame_(transa, "N")) {
-
-/*           Form  B := alpha*B*A. */
-
-	    if (upper) {
-		for (j = *n; j >= 1; --j) {
-		    temp = *alpha;
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    i__1 = *m;
-		    for (i__ = 1; i__ <= i__1; ++i__) {
-			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L150: */
-		    }
-		    i__1 = j - 1;
-		    for (k = 1; k <= i__1; ++k) {
-			if (a[k + j * a_dim1] != 0.) {
-			    temp = *alpha * a[k + j * a_dim1];
-			    i__2 = *m;
-			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] += temp * b[i__ + k *
-					b_dim1];
-/* L160: */
-			    }
-			}
-/* L170: */
-		    }
-/* L180: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    temp = *alpha;
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L190: */
-		    }
-		    i__2 = *n;
-		    for (k = j + 1; k <= i__2; ++k) {
-			if (a[k + j * a_dim1] != 0.) {
-			    temp = *alpha * a[k + j * a_dim1];
-			    i__3 = *m;
-			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] += temp * b[i__ + k *
-					b_dim1];
-/* L200: */
-			    }
-			}
-/* L210: */
-		    }
-/* L220: */
-		}
-	    }
-	} else {
-
-/*           Form  B := alpha*B*A'. */
-
-	    if (upper) {
-		i__1 = *n;
-		for (k = 1; k <= i__1; ++k) {
-		    i__2 = k - 1;
-		    for (j = 1; j <= i__2; ++j) {
-			if (a[j + k * a_dim1] != 0.) {
-			    temp = *alpha * a[j + k * a_dim1];
-			    i__3 = *m;
-			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] += temp * b[i__ + k *
-					b_dim1];
-/* L230: */
-			    }
-			}
-/* L240: */
-		    }
-		    temp = *alpha;
-		    if (nounit) {
-			temp *= a[k + k * a_dim1];
-		    }
-		    if (temp != 1.) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L250: */
-			}
-		    }
-/* L260: */
-		}
-	    } else {
-		for (k = *n; k >= 1; --k) {
-		    i__1 = *n;
-		    for (j = k + 1; j <= i__1; ++j) {
-			if (a[j + k * a_dim1] != 0.) {
-			    temp = *alpha * a[j + k * a_dim1];
-			    i__2 = *m;
-			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] += temp * b[i__ + k *
-					b_dim1];
-/* L270: */
-			    }
-			}
-/* L280: */
-		    }
-		    temp = *alpha;
-		    if (nounit) {
-			temp *= a[k + k * a_dim1];
-		    }
-		    if (temp != 1.) {
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L290: */
-			}
-		    }
-/* L300: */
-		}
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DTRMM . */
-
-} /* dtrmm_ */
-
-/* Subroutine */ int dtrmv_(char *uplo, char *trans, char *diag, integer *n,
-	doublereal *a, integer *lda, doublereal *x, integer *incx)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical nounit;
-
-
-/*
-    Purpose
-    =======
-
-    DTRMV  performs one of the matrix-vector operations
-
-       x := A*x,   or   x := A'*x,
-
-    where x is an n element vector and  A is an n by n unit, or non-unit,
-    upper or lower triangular matrix.
-
-    Parameters
-    ==========
-
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the matrix is an upper or
-             lower triangular matrix as follows:
-
-                UPLO = 'U' or 'u'   A is an upper triangular matrix.
-
-                UPLO = 'L' or 'l'   A is a lower triangular matrix.
-
-             Unchanged on exit.
-
-    TRANS  - CHARACTER*1.
-             On entry, TRANS specifies the operation to be performed as
-             follows:
-
-                TRANS = 'N' or 'n'   x := A*x.
-
-                TRANS = 'T' or 't'   x := A'*x.
-
-                TRANS = 'C' or 'c'   x := A'*x.
-
-             Unchanged on exit.
-
-    DIAG   - CHARACTER*1.
-             On entry, DIAG specifies whether or not A is unit
-             triangular as follows:
-
-                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-
-                DIAG = 'N' or 'n'   A is not assumed to be unit
-                                    triangular.
-
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the order of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
-
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-             Before entry with  UPLO = 'U' or 'u', the leading n by n
-             upper triangular part of the array A must contain the upper
-             triangular matrix and the strictly lower triangular part of
-             A is not referenced.
-             Before entry with UPLO = 'L' or 'l', the leading n by n
-             lower triangular part of the array A must contain the lower
-             triangular matrix and the strictly upper triangular part of
-             A is not referenced.
-             Note that when  DIAG = 'U' or 'u', the diagonal elements of
-             A are not referenced either, but are assumed to be unity.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, n ).
-             Unchanged on exit.
-
-    X      - DOUBLE PRECISION array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCX ) ).
-             Before entry, the incremented array X must contain the n
-             element vector x. On exit, X is overwritten with the
-             tranformed vector x.
-
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
-
-
-    Level 2 Blas routine.
-
-    -- Written on 22-October-1986.
-       Jack Dongarra, Argonne National Lab.
-       Jeremy Du Croz, Nag Central Office.
-       Sven Hammarling, Nag Central Office.
-       Richard Hanson, Sandia National Labs.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
-	info = 1;
-    } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "T") && ! lsame_(trans, "C")) {
-	info = 2;
-    } else if (! lsame_(diag, "U") && ! lsame_(diag,
-	    "N")) {
-	info = 3;
-    } else if (*n < 0) {
-	info = 4;
-    } else if (*lda < max(1,*n)) {
-	info = 6;
-    } else if (*incx == 0) {
-	info = 8;
-    }
-    if (info != 0) {
-	xerbla_("DTRMV ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    nounit = lsame_(diag, "N");
-
-/*
-       Set up the start point in X if the increment is not unity. This
-       will be  ( N - 1 )*INCX  too small for descending loops.
-*/
-
-    if (*incx <= 0) {
-	kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-	kx = 1;
-    }
-
-/*
-       Start the operations. In this version the elements of A are
-       accessed sequentially with one pass through A.
-*/
-
-    if (lsame_(trans, "N")) {
-
-/*        Form  x := A*x. */
-
-	if (lsame_(uplo, "U")) {
-	    if (*incx == 1) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (x[j] != 0.) {
-			temp = x[j];
-			i__2 = j - 1;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    x[i__] += temp * a[i__ + j * a_dim1];
-/* L10: */
-			}
-			if (nounit) {
-			    x[j] *= a[j + j * a_dim1];
-			}
-		    }
-/* L20: */
-		}
-	    } else {
-		jx = kx;
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (x[jx] != 0.) {
-			temp = x[jx];
-			ix = kx;
-			i__2 = j - 1;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    x[ix] += temp * a[i__ + j * a_dim1];
-			    ix += *incx;
-/* L30: */
-			}
-			if (nounit) {
-			    x[jx] *= a[j + j * a_dim1];
-			}
-		    }
-		    jx += *incx;
-/* L40: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    if (x[j] != 0.) {
-			temp = x[j];
-			i__1 = j + 1;
-			for (i__ = *n; i__ >= i__1; --i__) {
-			    x[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-			}
-			if (nounit) {
-			    x[j] *= a[j + j * a_dim1];
-			}
-		    }
-/* L60: */
-		}
-	    } else {
-		kx += (*n - 1) * *incx;
-		jx = kx;
-		for (j = *n; j >= 1; --j) {
-		    if (x[jx] != 0.) {
-			temp = x[jx];
-			ix = kx;
-			i__1 = j + 1;
-			for (i__ = *n; i__ >= i__1; --i__) {
-			    x[ix] += temp * a[i__ + j * a_dim1];
-			    ix -= *incx;
-/* L70: */
-			}
-			if (nounit) {
-			    x[jx] *= a[j + j * a_dim1];
-			}
-		    }
-		    jx -= *incx;
-/* L80: */
-		}
-	    }
-	}
-    } else {
-
-/*        Form  x := A'*x. */
-
-	if (lsame_(uplo, "U")) {
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    temp = x[j];
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    for (i__ = j - 1; i__ >= 1; --i__) {
-			temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-		    }
-		    x[j] = temp;
-/* L100: */
-		}
-	    } else {
-		jx = kx + (*n - 1) * *incx;
-		for (j = *n; j >= 1; --j) {
-		    temp = x[jx];
-		    ix = jx;
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    for (i__ = j - 1; i__ >= 1; --i__) {
-			ix -= *incx;
-			temp += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
-		    }
-		    x[jx] = temp;
-		    jx -= *incx;
-/* L120: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    temp = x[j];
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    i__2 = *n;
-		    for (i__ = j + 1; i__ <= i__2; ++i__) {
-			temp += a[i__ + j * a_dim1] * x[i__];
-/* L130: */
-		    }
-		    x[j] = temp;
-/* L140: */
-		}
-	    } else {
-		jx = kx;
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    temp = x[jx];
-		    ix = jx;
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    i__2 = *n;
-		    for (i__ = j + 1; i__ <= i__2; ++i__) {
-			ix += *incx;
-			temp += a[i__ + j * a_dim1] * x[ix];
-/* L150: */
-		    }
-		    x[jx] = temp;
-		    jx += *incx;
-/* L160: */
-		}
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DTRMV . */
-
-} /* dtrmv_ */
-
-/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag,
-	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
-	lda, doublereal *b, integer *ldb)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublereal temp;
-    static logical lside;
-    extern logical lsame_(char *, char *);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical nounit;
-
-
-/*
-    Purpose
-    =======
-
-    DTRSM  solves one of the matrix equations
-
-       op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
-
-    where alpha is a scalar, X and B are m by n matrices, A is a unit, or
-    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-
-       op( A ) = A   or   op( A ) = A'.
-
-    The matrix X is overwritten on B.
-
-    Parameters
-    ==========
-
-    SIDE   - CHARACTER*1.
-             On entry, SIDE specifies whether op( A ) appears on the left
-             or right of X as follows:
-
-                SIDE = 'L' or 'l'   op( A )*X = alpha*B.
-
-                SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
-
-             Unchanged on exit.
-
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the matrix A is an upper or
-             lower triangular matrix as follows:
-
-                UPLO = 'U' or 'u'   A is an upper triangular matrix.
-
-                UPLO = 'L' or 'l'   A is a lower triangular matrix.
-
-             Unchanged on exit.
-
-    TRANSA - CHARACTER*1.
-             On entry, TRANSA specifies the form of op( A ) to be used in
-             the matrix multiplication as follows:
-
-                TRANSA = 'N' or 'n'   op( A ) = A.
-
-                TRANSA = 'T' or 't'   op( A ) = A'.
-
-                TRANSA = 'C' or 'c'   op( A ) = A'.
-
-             Unchanged on exit.
-
-    DIAG   - CHARACTER*1.
-             On entry, DIAG specifies whether or not A is unit triangular
-             as follows:
-
-                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-
-                DIAG = 'N' or 'n'   A is not assumed to be unit
-                                    triangular.
-
-             Unchanged on exit.
-
-    M      - INTEGER.
-             On entry, M specifies the number of rows of B. M must be at
-             least zero.
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the number of columns of B.  N must be
-             at least zero.
-             Unchanged on exit.
-
-    ALPHA  - DOUBLE PRECISION.
-             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-             zero then  A is not referenced and  B need not be set before
-             entry.
-             Unchanged on exit.
-
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
-             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
-             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-             upper triangular part of the array  A must contain the upper
-             triangular matrix  and the strictly lower triangular part of
-             A is not referenced.
-             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-             lower triangular part of the array  A must contain the lower
-             triangular matrix  and the strictly upper triangular part of
-             A is not referenced.
-             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-             A  are not referenced either,  but are assumed to be  unity.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-             then LDA must be at least max( 1, n ).
-             Unchanged on exit.
-
-    B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
-             Before entry,  the leading  m by n part of the array  B must
-             contain  the  right-hand  side  matrix  B,  and  on exit  is
-             overwritten by the solution matrix  X.
-
-    LDB    - INTEGER.
-             On entry, LDB specifies the first dimension of B as declared
-             in  the  calling  (sub)  program.   LDB  must  be  at  least
-             max( 1, m ).
-             Unchanged on exit.
-
-
-    Level 3 Blas routine.
-
-
-    -- Written on 8-February-1989.
-       Jack Dongarra, Argonne National Laboratory.
-       Iain Duff, AERE Harwell.
-       Jeremy Du Croz, Numerical Algorithms Group Ltd.
-       Sven Hammarling, Numerical Algorithms Group Ltd.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    lside = lsame_(side, "L");
-    if (lside) {
-	nrowa = *m;
-    } else {
-	nrowa = *n;
-    }
-    nounit = lsame_(diag, "N");
-    upper = lsame_(uplo, "U");
-
-    info = 0;
-    if (! lside && ! lsame_(side, "R")) {
-	info = 1;
-    } else if (! upper && ! lsame_(uplo, "L")) {
-	info = 2;
-    } else if (! lsame_(transa, "N") && ! lsame_(transa,
-	     "T") && ! lsame_(transa, "C")) {
-	info = 3;
-    } else if (! lsame_(diag, "U") && ! lsame_(diag,
-	    "N")) {
-	info = 4;
-    } else if (*m < 0) {
-	info = 5;
-    } else if (*n < 0) {
-	info = 6;
-    } else if (*lda < max(1,nrowa)) {
-	info = 9;
-    } else if (*ldb < max(1,*m)) {
-	info = 11;
-    }
-    if (info != 0) {
-	xerbla_("DTRSM ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		b[i__ + j * b_dim1] = 0.;
-/* L10: */
-	    }
-/* L20: */
-	}
-	return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lside) {
-	if (lsame_(transa, "N")) {
-
-/*           Form  B := alpha*inv( A )*B. */
-
-	    if (upper) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (*alpha != 1.) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-				    ;
-/* L30: */
-			}
-		    }
-		    for (k = *m; k >= 1; --k) {
-			if (b[k + j * b_dim1] != 0.) {
-			    if (nounit) {
-				b[k + j * b_dim1] /= a[k + k * a_dim1];
-			    }
-			    i__2 = k - 1;
-			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-					i__ + k * a_dim1];
-/* L40: */
-			    }
-			}
-/* L50: */
-		    }
-/* L60: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (*alpha != 1.) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-				    ;
-/* L70: */
-			}
-		    }
-		    i__2 = *m;
-		    for (k = 1; k <= i__2; ++k) {
-			if (b[k + j * b_dim1] != 0.) {
-			    if (nounit) {
-				b[k + j * b_dim1] /= a[k + k * a_dim1];
-			    }
-			    i__3 = *m;
-			    for (i__ = k + 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-					i__ + k * a_dim1];
-/* L80: */
-			    }
-			}
-/* L90: */
-		    }
-/* L100: */
-		}
-	    }
-	} else {
-
-/*           Form  B := alpha*inv( A' )*B. */
-
-	    if (upper) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			temp = *alpha * b[i__ + j * b_dim1];
-			i__3 = i__ - 1;
-			for (k = 1; k <= i__3; ++k) {
-			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L110: */
-			}
-			if (nounit) {
-			    temp /= a[i__ + i__ * a_dim1];
-			}
-			b[i__ + j * b_dim1] = temp;
-/* L120: */
-		    }
-/* L130: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    for (i__ = *m; i__ >= 1; --i__) {
-			temp = *alpha * b[i__ + j * b_dim1];
-			i__2 = *m;
-			for (k = i__ + 1; k <= i__2; ++k) {
-			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L140: */
-			}
-			if (nounit) {
-			    temp /= a[i__ + i__ * a_dim1];
-			}
-			b[i__ + j * b_dim1] = temp;
-/* L150: */
-		    }
-/* L160: */
-		}
-	    }
-	}
-    } else {
-	if (lsame_(transa, "N")) {
-
-/*           Form  B := alpha*B*inv( A ). */
-
-	    if (upper) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (*alpha != 1.) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-				    ;
-/* L170: */
-			}
-		    }
-		    i__2 = j - 1;
-		    for (k = 1; k <= i__2; ++k) {
-			if (a[k + j * a_dim1] != 0.) {
-			    i__3 = *m;
-			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-					i__ + k * b_dim1];
-/* L180: */
-			    }
-			}
-/* L190: */
-		    }
-		    if (nounit) {
-			temp = 1. / a[j + j * a_dim1];
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L200: */
-			}
-		    }
-/* L210: */
-		}
-	    } else {
-		for (j = *n; j >= 1; --j) {
-		    if (*alpha != 1.) {
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-				    ;
-/* L220: */
-			}
-		    }
-		    i__1 = *n;
-		    for (k = j + 1; k <= i__1; ++k) {
-			if (a[k + j * a_dim1] != 0.) {
-			    i__2 = *m;
-			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-					i__ + k * b_dim1];
-/* L230: */
-			    }
-			}
-/* L240: */
-		    }
-		    if (nounit) {
-			temp = 1. / a[j + j * a_dim1];
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L250: */
-			}
-		    }
-/* L260: */
-		}
-	    }
-	} else {
-
-/*           Form  B := alpha*B*inv( A' ). */
-
-	    if (upper) {
-		for (k = *n; k >= 1; --k) {
-		    if (nounit) {
-			temp = 1. / a[k + k * a_dim1];
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L270: */
-			}
-		    }
-		    i__1 = k - 1;
-		    for (j = 1; j <= i__1; ++j) {
-			if (a[j + k * a_dim1] != 0.) {
-			    temp = a[j + k * a_dim1];
-			    i__2 = *m;
-			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] -= temp * b[i__ + k *
-					b_dim1];
-/* L280: */
-			    }
-			}
-/* L290: */
-		    }
-		    if (*alpha != 1.) {
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-				    ;
-/* L300: */
-			}
-		    }
-/* L310: */
-		}
-	    } else {
-		i__1 = *n;
-		for (k = 1; k <= i__1; ++k) {
-		    if (nounit) {
-			temp = 1. / a[k + k * a_dim1];
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L320: */
-			}
-		    }
-		    i__2 = *n;
-		    for (j = k + 1; j <= i__2; ++j) {
-			if (a[j + k * a_dim1] != 0.) {
-			    temp = a[j + k * a_dim1];
-			    i__3 = *m;
-			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] -= temp * b[i__ + k *
-					b_dim1];
-/* L330: */
-			    }
-			}
-/* L340: */
-		    }
-		    if (*alpha != 1.) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-				    ;
-/* L350: */
-			}
-		    }
-/* L360: */
-		}
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DTRSM . */
-
-} /* dtrsm_ */
-
-doublereal dzasum_(integer *n, doublecomplex *zx, integer *incx)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal ret_val;
-
-    /* Local variables */
-    static integer i__, ix;
-    static doublereal stemp;
-    extern doublereal dcabs1_(doublecomplex *);
-
-
-/*
-       takes the sum of the absolute values.
-       jack dongarra, 3/11/78.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --zx;
-
-    /* Function Body */
-    ret_val = 0.;
-    stemp = 0.;
-    if ((*n <= 0) || (*incx <= 0)) {
-	return ret_val;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	stemp += dcabs1_(&zx[ix]);
-	ix += *incx;
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	stemp += dcabs1_(&zx[i__]);
-/* L30: */
-    }
-    ret_val = stemp;
-    return ret_val;
-} /* dzasum_ */
-
-doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    doublereal ret_val, d__1;
-
-    /* Builtin functions */
-    double d_imag(doublecomplex *), sqrt(doublereal);
-
-    /* Local variables */
-    static integer ix;
-    static doublereal ssq, temp, norm, scale;
-
-
-/*
-    DZNRM2 returns the euclidean norm of a vector via the function
-    name, so that
-
-       DZNRM2 := sqrt( conjg( x' )*x )
-
-
-    -- This version written on 25-October-1982.
-       Modified on 14-October-1993 to inline the call to ZLASSQ.
-       Sven Hammarling, Nag Ltd.
-*/
-
-
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if ((*n < 1) || (*incx < 1)) {
-	norm = 0.;
-    } else {
-	scale = 0.;
-	ssq = 1.;
-/*
-          The following loop is equivalent to this call to the LAPACK
-          auxiliary routine:
-          CALL ZLASSQ( N, X, INCX, SCALE, SSQ )
-*/
-
-	i__1 = (*n - 1) * *incx + 1;
-	i__2 = *incx;
-	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-	    i__3 = ix;
-	    if (x[i__3].r != 0.) {
-		i__3 = ix;
-		temp = (d__1 = x[i__3].r, abs(d__1));
-		if (scale < temp) {
-/* Computing 2nd power */
-		    d__1 = scale / temp;
-		    ssq = ssq * (d__1 * d__1) + 1.;
-		    scale = temp;
-		} else {
-/* Computing 2nd power */
-		    d__1 = temp / scale;
-		    ssq += d__1 * d__1;
-		}
-	    }
-	    if (d_imag(&x[ix]) != 0.) {
-		temp = (d__1 = d_imag(&x[ix]), abs(d__1));
-		if (scale < temp) {
-/* Computing 2nd power */
-		    d__1 = scale / temp;
-		    ssq = ssq * (d__1 * d__1) + 1.;
-		    scale = temp;
-		} else {
-/* Computing 2nd power */
-		    d__1 = temp / scale;
-		    ssq += d__1 * d__1;
-		}
-	    }
-/* L10: */
-	}
-	norm = scale * sqrt(ssq);
-    }
-
-    ret_val = norm;
-    return ret_val;
-
-/*     End of DZNRM2. */
-
-} /* dznrm2_ */
-
-integer icamax_(integer *n, complex *cx, integer *incx)
-{
-    /* System generated locals */
-    integer ret_val, i__1, i__2;
-    real r__1, r__2;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-
-    /* Local variables */
-    static integer i__, ix;
-    static real smax;
-
-
-/*
-       finds the index of element having max. absolute value.
-       jack dongarra, linpack, 3/11/78.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --cx;
-
-    /* Function Body */
-    ret_val = 0;
-    if ((*n < 1) || (*incx <= 0)) {
-	return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-	return ret_val;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2));
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	i__2 = ix;
-	if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]), dabs(
-		r__2)) <= smax) {
-	    goto L5;
-	}
-	ret_val = i__;
-	i__2 = ix;
-	smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]),
-		dabs(r__2));
-L5:
-	ix += *incx;
-/* L10: */
-    }
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-L20:
-    smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2));
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]), dabs(
-		r__2)) <= smax) {
-	    goto L30;
-	}
-	ret_val = i__;
-	i__2 = i__;
-	smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]),
-		dabs(r__2));
-L30:
-	;
-    }
-    return ret_val;
-} /* icamax_ */
-
-integer idamax_(integer *n, doublereal *dx, integer *incx)
-{
-    /* System generated locals */
-    integer ret_val, i__1;
-    doublereal d__1;
-
-    /* Local variables */
-    static integer i__, ix;
-    static doublereal dmax__;
-
-
-/*
-       finds the index of element having max. absolute value.
-       jack dongarra, linpack, 3/11/78.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --dx;
-
-    /* Function Body */
-    ret_val = 0;
-    if ((*n < 1) || (*incx <= 0)) {
-	return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-	return ret_val;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    dmax__ = abs(dx[1]);
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	if ((d__1 = dx[ix], abs(d__1)) <= dmax__) {
-	    goto L5;
-	}
-	ret_val = i__;
-	dmax__ = (d__1 = dx[ix], abs(d__1));
-L5:
-	ix += *incx;
-/* L10: */
-    }
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-L20:
-    dmax__ = abs(dx[1]);
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	if ((d__1 = dx[i__], abs(d__1)) <= dmax__) {
-	    goto L30;
-	}
-	ret_val = i__;
-	dmax__ = (d__1 = dx[i__], abs(d__1));
-L30:
-	;
-    }
-    return ret_val;
-} /* idamax_ */
-
-integer isamax_(integer *n, real *sx, integer *incx)
-{
-    /* System generated locals */
-    integer ret_val, i__1;
-    real r__1;
-
-    /* Local variables */
-    static integer i__, ix;
-    static real smax;
-
-
-/*
-       finds the index of element having max. absolute value.
-       jack dongarra, linpack, 3/11/78.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --sx;
-
-    /* Function Body */
-    ret_val = 0;
-    if ((*n < 1) || (*incx <= 0)) {
-	return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-	return ret_val;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    smax = dabs(sx[1]);
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	if ((r__1 = sx[ix], dabs(r__1)) <= smax) {
-	    goto L5;
-	}
-	ret_val = i__;
-	smax = (r__1 = sx[ix], dabs(r__1));
-L5:
-	ix += *incx;
-/* L10: */
-    }
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-L20:
-    smax = dabs(sx[1]);
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	if ((r__1 = sx[i__], dabs(r__1)) <= smax) {
-	    goto L30;
-	}
-	ret_val = i__;
-	smax = (r__1 = sx[i__], dabs(r__1));
-L30:
-	;
-    }
-    return ret_val;
-} /* isamax_ */
-
-integer izamax_(integer *n, doublecomplex *zx, integer *incx)
-{
-    /* System generated locals */
-    integer ret_val, i__1;
-
-    /* Local variables */
-    static integer i__, ix;
-    static doublereal smax;
-    extern doublereal dcabs1_(doublecomplex *);
-
-
-/*
-       finds the index of element having max. absolute value.
-       jack dongarra, 1/15/85.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --zx;
-
-    /* Function Body */
-    ret_val = 0;
-    if ((*n < 1) || (*incx <= 0)) {
-	return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-	return ret_val;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    smax = dcabs1_(&zx[1]);
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	if (dcabs1_(&zx[ix]) <= smax) {
-	    goto L5;
-	}
-	ret_val = i__;
-	smax = dcabs1_(&zx[ix]);
-L5:
-	ix += *incx;
-/* L10: */
-    }
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-L20:
-    smax = dcabs1_(&zx[1]);
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	if (dcabs1_(&zx[i__]) <= smax) {
-	    goto L30;
-	}
-	ret_val = i__;
-	smax = dcabs1_(&zx[i__]);
-L30:
-	;
-    }
-    return ret_val;
-} /* izamax_ */
-
-logical lsame_(char *ca, char *cb)
-{
-    /* System generated locals */
-    logical ret_val;
-
-    /* Local variables */
-    static integer inta, intb, zcode;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    LSAME returns .TRUE. if CA is the same letter as CB regardless of
-    case.
-
-    Arguments
-    =========
-
-    CA      (input) CHARACTER*1
-    CB      (input) CHARACTER*1
-            CA and CB specify the single characters to be compared.
-
-   =====================================================================
-
-
-       Test if the characters are equal
-*/
-
-    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
-    if (ret_val) {
-	return ret_val;
-    }
-
-/*     Now test for equivalence if both characters are alphabetic. */
-
-    zcode = 'Z';
-
-/*
-       Use 'Z' rather than 'A' so that ASCII can be detected on Prime
-       machines, on which ICHAR returns a value with bit 8 set.
-       ICHAR('A') on Prime machines returns 193 which is the same as
-       ICHAR('A') on an EBCDIC machine.
-*/
-
-    inta = *(unsigned char *)ca;
-    intb = *(unsigned char *)cb;
-
-    if ((zcode == 90) || (zcode == 122)) {
-
-/*
-          ASCII is assumed - ZCODE is the ASCII code of either lower or
-          upper case 'Z'.
-*/
-
-	if (inta >= 97 && inta <= 122) {
-	    inta += -32;
-	}
-	if (intb >= 97 && intb <= 122) {
-	    intb += -32;
-	}
-
-    } else if ((zcode == 233) || (zcode == 169)) {
-
-/*
-          EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
-          upper case 'Z'.
-*/
-
-	if (((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153)) ||
-		(inta >= 162 && inta <= 169)) {
-	    inta += 64;
-	}
-	if (((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153)) ||
-		(intb >= 162 && intb <= 169)) {
-	    intb += 64;
-	}
-
-    } else if ((zcode == 218) || (zcode == 250)) {
-
-/*
-          ASCII is assumed, on Prime machines - ZCODE is the ASCII code
-          plus 128 of either lower or upper case 'Z'.
-*/
-
-	if (inta >= 225 && inta <= 250) {
-	    inta += -32;
-	}
-	if (intb >= 225 && intb <= 250) {
-	    intb += -32;
-	}
-    }
-    ret_val = inta == intb;
-
-/*
-       RETURN
-
-       End of LSAME
-*/
-
-    return ret_val;
-} /* lsame_ */
-
-/* Subroutine */ int saxpy_(integer *n, real *sa, real *sx, integer *incx,
-	real *sy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-
-
-/*
-       constant times a vector plus a vector.
-       uses unrolled loop for increments equal to one.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*sa == 0.f) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-          code for unequal increments or equal increments
-            not equal to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	sy[iy] += *sa * sx[ix];
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*
-          code for both increments equal to 1
-
-
-          clean-up loop
-*/
-
-L20:
-    m = *n % 4;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	sy[i__] += *sa * sx[i__];
-/* L30: */
-    }
-    if (*n < 4) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 4) {
-	sy[i__] += *sa * sx[i__];
-	sy[i__ + 1] += *sa * sx[i__ + 1];
-	sy[i__ + 2] += *sa * sx[i__ + 2];
-	sy[i__ + 3] += *sa * sx[i__ + 3];
-/* L50: */
-    }
-    return 0;
-} /* saxpy_ */
-
-doublereal scasum_(integer *n, complex *cx, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    real ret_val, r__1, r__2;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-
-    /* Local variables */
-    static integer i__, nincx;
-    static real stemp;
-
-
-/*
-       takes the sum of the absolute values of a complex vector and
-       returns a single precision result.
-       jack dongarra, linpack, 3/11/78.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --cx;
-
-    /* Function Body */
-    ret_val = 0.f;
-    stemp = 0.f;
-    if ((*n <= 0) || (*incx <= 0)) {
-	return ret_val;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	i__3 = i__;
-	stemp = stemp + (r__1 = cx[i__3].r, dabs(r__1)) + (r__2 = r_imag(&cx[
-		i__]), dabs(r__2));
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-L20:
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	i__1 = i__;
-	stemp = stemp + (r__1 = cx[i__1].r, dabs(r__1)) + (r__2 = r_imag(&cx[
-		i__]), dabs(r__2));
-/* L30: */
-    }
-    ret_val = stemp;
-    return ret_val;
-} /* scasum_ */
-
-doublereal scnrm2_(integer *n, complex *x, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    real ret_val, r__1;
-
-    /* Builtin functions */
-    double r_imag(complex *), sqrt(doublereal);
-
-    /* Local variables */
-    static integer ix;
-    static real ssq, temp, norm, scale;
-
-
-/*
-    SCNRM2 returns the euclidean norm of a vector via the function
-    name, so that
-
-       SCNRM2 := sqrt( conjg( x' )*x )
-
-
-    -- This version written on 25-October-1982.
-       Modified on 14-October-1993 to inline the call to CLASSQ.
-       Sven Hammarling, Nag Ltd.
-*/
-
-
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if ((*n < 1) || (*incx < 1)) {
-	norm = 0.f;
-    } else {
-	scale = 0.f;
-	ssq = 1.f;
-/*
-          The following loop is equivalent to this call to the LAPACK
-          auxiliary routine:
-          CALL CLASSQ( N, X, INCX, SCALE, SSQ )
-*/
-
-	i__1 = (*n - 1) * *incx + 1;
-	i__2 = *incx;
-	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-	    i__3 = ix;
-	    if (x[i__3].r != 0.f) {
-		i__3 = ix;
-		temp = (r__1 = x[i__3].r, dabs(r__1));
-		if (scale < temp) {
-/* Computing 2nd power */
-		    r__1 = scale / temp;
-		    ssq = ssq * (r__1 * r__1) + 1.f;
-		    scale = temp;
-		} else {
-/* Computing 2nd power */
-		    r__1 = temp / scale;
-		    ssq += r__1 * r__1;
-		}
-	    }
-	    if (r_imag(&x[ix]) != 0.f) {
-		temp = (r__1 = r_imag(&x[ix]), dabs(r__1));
-		if (scale < temp) {
-/* Computing 2nd power */
-		    r__1 = scale / temp;
-		    ssq = ssq * (r__1 * r__1) + 1.f;
-		    scale = temp;
-		} else {
-/* Computing 2nd power */
-		    r__1 = temp / scale;
-		    ssq += r__1 * r__1;
-		}
-	    }
-/* L10: */
-	}
-	norm = scale * sqrt(ssq);
-    }
-
-    ret_val = norm;
-    return ret_val;
-
-/*     End of SCNRM2. */
-
-} /* scnrm2_ */
-
-/* Subroutine */ int scopy_(integer *n, real *sx, integer *incx, real *sy,
-	integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-
-
-/*
-       copies a vector, x, to a vector, y.
-       uses unrolled loops for increments equal to 1.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-          code for unequal increments or equal increments
-            not equal to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	sy[iy] = sx[ix];
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*
-          code for both increments equal to 1
-
-
-          clean-up loop
-*/
-
-L20:
-    m = *n % 7;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	sy[i__] = sx[i__];
-/* L30: */
-    }
-    if (*n < 7) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 7) {
-	sy[i__] = sx[i__];
-	sy[i__ + 1] = sx[i__ + 1];
-	sy[i__ + 2] = sx[i__ + 2];
-	sy[i__ + 3] = sx[i__ + 3];
-	sy[i__ + 4] = sx[i__ + 4];
-	sy[i__ + 5] = sx[i__ + 5];
-	sy[i__ + 6] = sx[i__ + 6];
-/* L50: */
-    }
-    return 0;
-} /* scopy_ */
-
-doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-    real ret_val;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static real stemp;
-
-
-/*
-       forms the dot product of two vectors.
-       uses unrolled loops for increments equal to one.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-
-    /* Function Body */
-    stemp = 0.f;
-    ret_val = 0.f;
-    if (*n <= 0) {
-	return ret_val;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-          code for unequal increments or equal increments
-            not equal to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	stemp += sx[ix] * sy[iy];
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-
-/*
-          code for both increments equal to 1
-
-
-          clean-up loop
-*/
-
-L20:
-    m = *n % 5;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	stemp += sx[i__] * sy[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-	goto L60;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 5) {
-	stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[
-		i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ +
-		4] * sy[i__ + 4];
-/* L50: */
-    }
-L60:
-    ret_val = stemp;
-    return ret_val;
-} /* sdot_ */
-
-/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer *
-	n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *
-	ldb, real *beta, real *c__, integer *ldc)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
-	    i__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static logical nota, notb;
-    static real temp;
-    static integer ncola;
-    extern logical lsame_(char *, char *);
-    static integer nrowa, nrowb;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    SGEMM  performs one of the matrix-matrix operations
-
-       C := alpha*op( A )*op( B ) + beta*C,
-
-    where  op( X ) is one of
-
-       op( X ) = X   or   op( X ) = X',
-
-    alpha and beta are scalars, and A, B and C are matrices, with op( A )
-    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
-
-    Parameters
-    ==========
-
-    TRANSA - CHARACTER*1.
-             On entry, TRANSA specifies the form of op( A ) to be used in
-             the matrix multiplication as follows:
-
-                TRANSA = 'N' or 'n',  op( A ) = A.
-
-                TRANSA = 'T' or 't',  op( A ) = A'.
-
-                TRANSA = 'C' or 'c',  op( A ) = A'.
-
-             Unchanged on exit.
-
-    TRANSB - CHARACTER*1.
-             On entry, TRANSB specifies the form of op( B ) to be used in
-             the matrix multiplication as follows:
-
-                TRANSB = 'N' or 'n',  op( B ) = B.
-
-                TRANSB = 'T' or 't',  op( B ) = B'.
-
-                TRANSB = 'C' or 'c',  op( B ) = B'.
-
-             Unchanged on exit.
-
-    M      - INTEGER.
-             On entry,  M  specifies  the number  of rows  of the  matrix
-             op( A )  and of the  matrix  C.  M  must  be at least  zero.
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry,  N  specifies the number  of columns of the matrix
-             op( B ) and the number of columns of the matrix C. N must be
-             at least zero.
-             Unchanged on exit.
-
-    K      - INTEGER.
-             On entry,  K  specifies  the number of columns of the matrix
-             op( A ) and the number of rows of the matrix op( B ). K must
-             be at least  zero.
-             Unchanged on exit.
-
-    ALPHA  - REAL            .
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
-             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
-             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
-             part of the array  A  must contain the matrix  A,  otherwise
-             the leading  k by m  part of the array  A  must contain  the
-             matrix A.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. When  TRANSA = 'N' or 'n' then
-             LDA must be at least  max( 1, m ), otherwise  LDA must be at
-             least  max( 1, k ).
-             Unchanged on exit.
-
-    B      - REAL             array of DIMENSION ( LDB, kb ), where kb is
-             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
-             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
-             part of the array  B  must contain the matrix  B,  otherwise
-             the leading  n by k  part of the array  B  must contain  the
-             matrix B.
-             Unchanged on exit.
-
-    LDB    - INTEGER.
-             On entry, LDB specifies the first dimension of B as declared
-             in the calling (sub) program. When  TRANSB = 'N' or 'n' then
-             LDB must be at least  max( 1, k ), otherwise  LDB must be at
-             least  max( 1, n ).
-             Unchanged on exit.
-
-    BETA   - REAL            .
-             On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-             supplied as zero then C need not be set on input.
-             Unchanged on exit.
-
-    C      - REAL             array of DIMENSION ( LDC, n ).
-             Before entry, the leading  m by n  part of the array  C must
-             contain the matrix  C,  except when  beta  is zero, in which
-             case C need not be set on entry.
-             On exit, the array  C  is overwritten by the  m by n  matrix
-             ( alpha*op( A )*op( B ) + beta*C ).
-
-    LDC    - INTEGER.
-             On entry, LDC specifies the first dimension of C as declared
-             in  the  calling  (sub)  program.   LDC  must  be  at  least
-             max( 1, m ).
-             Unchanged on exit.
-
-
-    Level 3 Blas routine.
-
-    -- Written on 8-February-1989.
-       Jack Dongarra, Argonne National Laboratory.
-       Iain Duff, AERE Harwell.
-       Jeremy Du Croz, Numerical Algorithms Group Ltd.
-       Sven Hammarling, Numerical Algorithms Group Ltd.
-
-
-       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-       transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
-       and  columns of  A  and the  number of  rows  of  B  respectively.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    nota = lsame_(transa, "N");
-    notb = lsame_(transb, "N");
-    if (nota) {
-	nrowa = *m;
-	ncola = *k;
-    } else {
-	nrowa = *k;
-	ncola = *m;
-    }
-    if (notb) {
-	nrowb = *k;
-    } else {
-	nrowb = *n;
-    }
-
-/*     Test the input parameters. */
-
-    info = 0;
-    if (! nota && ! lsame_(transa, "C") && ! lsame_(
-	    transa, "T")) {
-	info = 1;
-    } else if (! notb && ! lsame_(transb, "C") && !
-	    lsame_(transb, "T")) {
-	info = 2;
-    } else if (*m < 0) {
-	info = 3;
-    } else if (*n < 0) {
-	info = 4;
-    } else if (*k < 0) {
-	info = 5;
-    } else if (*lda < max(1,nrowa)) {
-	info = 8;
-    } else if (*ldb < max(1,nrowb)) {
-	info = 10;
-    } else if (*ldc < max(1,*m)) {
-	info = 13;
-    }
-    if (info != 0) {
-	xerbla_("SGEMM ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (((*m == 0) || (*n == 0)) || (((*alpha == 0.f) || (*k == 0)) && *beta
-	    == 1.f)) {
-	return 0;
-    }
-
-/*     And if  alpha.eq.zero. */
-
-    if (*alpha == 0.f) {
-	if (*beta == 0.f) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    c__[i__ + j * c_dim1] = 0.f;
-/* L10: */
-		}
-/* L20: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-		}
-/* L40: */
-	    }
-	}
-	return 0;
-    }
-
-/*     Start the operations. */
-
-    if (notb) {
-	if (nota) {
-
-/*           Form  C := alpha*A*B + beta*C. */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.f) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.f;
-/* L50: */
-		    }
-		} else if (*beta != 1.f) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L60: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    if (b[l + j * b_dim1] != 0.f) {
-			temp = *alpha * b[l + j * b_dim1];
-			i__3 = *m;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
-				    a_dim1];
-/* L70: */
-			}
-		    }
-/* L80: */
-		}
-/* L90: */
-	    }
-	} else {
-
-/*           Form  C := alpha*A'*B + beta*C */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp = 0.f;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-/* L100: */
-		    }
-		    if (*beta == 0.f) {
-			c__[i__ + j * c_dim1] = *alpha * temp;
-		    } else {
-			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-				i__ + j * c_dim1];
-		    }
-/* L110: */
-		}
-/* L120: */
-	    }
-	}
-    } else {
-	if (nota) {
-
-/*           Form  C := alpha*A*B' + beta*C */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.f) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.f;
-/* L130: */
-		    }
-		} else if (*beta != 1.f) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L140: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    if (b[j + l * b_dim1] != 0.f) {
-			temp = *alpha * b[j + l * b_dim1];
-			i__3 = *m;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
-				    a_dim1];
-/* L150: */
-			}
-		    }
-/* L160: */
-		}
-/* L170: */
-	    }
-	} else {
-
-/*           Form  C := alpha*A'*B' + beta*C */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp = 0.f;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
-/* L180: */
-		    }
-		    if (*beta == 0.f) {
-			c__[i__ + j * c_dim1] = *alpha * temp;
-		    } else {
-			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-				i__ + j * c_dim1];
-		    }
-/* L190: */
-		}
-/* L200: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of SGEMM . */
-
-} /* sgemm_ */
-
-/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha,
-	real *a, integer *lda, real *x, integer *incx, real *beta, real *y,
-	integer *incy)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static real temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    SGEMV  performs one of the matrix-vector operations
-
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
-
-    where alpha and beta are scalars, x and y are vectors and A is an
-    m by n matrix.
-
-    Parameters
-    ==========
-
-    TRANS  - CHARACTER*1.
-             On entry, TRANS specifies the operation to be performed as
-             follows:
-
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
-
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
-
-             Unchanged on exit.
-
-    M      - INTEGER.
-             On entry, M specifies the number of rows of the matrix A.
-             M must be at least zero.
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the number of columns of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
-
-    ALPHA  - REAL            .
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    A      - REAL             array of DIMENSION ( LDA, n ).
-             Before entry, the leading m by n part of the array A must
-             contain the matrix of coefficients.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, m ).
-             Unchanged on exit.
-
-    X      - REAL             array of DIMENSION at least
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-             and at least
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-             Before entry, the incremented array X must contain the
-             vector x.
-             Unchanged on exit.
-
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
-
-    BETA   - REAL            .
-             On entry, BETA specifies the scalar beta. When BETA is
-             supplied as zero then Y need not be set on input.
-             Unchanged on exit.
-
-    Y      - REAL             array of DIMENSION at least
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-             and at least
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-             Before entry with BETA non-zero, the incremented array Y
-             must contain the vector y. On exit, Y is overwritten by the
-             updated vector y.
-
-    INCY   - INTEGER.
-             On entry, INCY specifies the increment for the elements of
-             Y. INCY must not be zero.
-             Unchanged on exit.
-
-
-    Level 2 Blas routine.
-
-    -- Written on 22-October-1986.
-       Jack Dongarra, Argonne National Lab.
-       Jeremy Du Croz, Nag Central Office.
-       Sven Hammarling, Nag Central Office.
-       Richard Hanson, Sandia National Labs.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
-	    ) {
-	info = 1;
-    } else if (*m < 0) {
-	info = 2;
-    } else if (*n < 0) {
-	info = 3;
-    } else if (*lda < max(1,*m)) {
-	info = 6;
-    } else if (*incx == 0) {
-	info = 8;
-    } else if (*incy == 0) {
-	info = 11;
-    }
-    if (info != 0) {
-	xerbla_("SGEMV ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (((*m == 0) || (*n == 0)) || (*alpha == 0.f && *beta == 1.f)) {
-	return 0;
-    }
-
-/*
-       Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-       up the start points in  X  and  Y.
-*/
-
-    if (lsame_(trans, "N")) {
-	lenx = *n;
-	leny = *m;
-    } else {
-	lenx = *m;
-	leny = *n;
-    }
-    if (*incx > 0) {
-	kx = 1;
-    } else {
-	kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-	ky = 1;
-    } else {
-	ky = 1 - (leny - 1) * *incy;
-    }
-
-/*
-       Start the operations. In this version the elements of A are
-       accessed sequentially with one pass through A.
-
-       First form  y := beta*y.
-*/
-
-    if (*beta != 1.f) {
-	if (*incy == 1) {
-	    if (*beta == 0.f) {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = 0.f;
-/* L10: */
-		}
-	    } else {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = *beta * y[i__];
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (*beta == 0.f) {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = 0.f;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = *beta * y[iy];
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (*alpha == 0.f) {
-	return 0;
-    }
-    if (lsame_(trans, "N")) {
-
-/*        Form  y := alpha*A*x + y. */
-
-	jx = kx;
-	if (*incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (x[jx] != 0.f) {
-		    temp = *alpha * x[jx];
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			y[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-		    }
-		}
-		jx += *incx;
-/* L60: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (x[jx] != 0.f) {
-		    temp = *alpha * x[jx];
-		    iy = ky;
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			y[iy] += temp * a[i__ + j * a_dim1];
-			iy += *incy;
-/* L70: */
-		    }
-		}
-		jx += *incx;
-/* L80: */
-	    }
-	}
-    } else {
-
-/*        Form  y := alpha*A'*x + y. */
-
-	jy = ky;
-	if (*incx == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = 0.f;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-		}
-		y[jy] += *alpha * temp;
-		jy += *incy;
-/* L100: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = 0.f;
-		ix = kx;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp += a[i__ + j * a_dim1] * x[ix];
-		    ix += *incx;
-/* L110: */
-		}
-		y[jy] += *alpha * temp;
-		jy += *incy;
-/* L120: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of SGEMV . */
-
-} /* sgemv_ */
-
-/* Subroutine */ int sger_(integer *m, integer *n, real *alpha, real *x,
-	integer *incx, real *y, integer *incy, real *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static real temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    SGER   performs the rank 1 operation
-
-       A := alpha*x*y' + A,
-
-    where alpha is a scalar, x is an m element vector, y is an n element
-    vector and A is an m by n matrix.
-
-    Parameters
-    ==========
-
-    M      - INTEGER.
-             On entry, M specifies the number of rows of the matrix A.
-             M must be at least zero.
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the number of columns of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
-
-    ALPHA  - REAL            .
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    X      - REAL             array of dimension at least
-             ( 1 + ( m - 1 )*abs( INCX ) ).
-             Before entry, the incremented array X must contain the m
-             element vector x.
-             Unchanged on exit.
-
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
-
-    Y      - REAL             array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCY ) ).
-             Before entry, the incremented array Y must contain the n
-             element vector y.
-             Unchanged on exit.
-
-    INCY   - INTEGER.
-             On entry, INCY specifies the increment for the elements of
-             Y. INCY must not be zero.
-             Unchanged on exit.
-
-    A      - REAL             array of DIMENSION ( LDA, n ).
-             Before entry, the leading m by n part of the array A must
-             contain the matrix of coefficients. On exit, A is
-             overwritten by the updated matrix.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, m ).
-             Unchanged on exit.
-
-
-    Level 2 Blas routine.
-
-    -- Written on 22-October-1986.
-       Jack Dongarra, Argonne National Lab.
-       Jeremy Du Croz, Nag Central Office.
-       Sven Hammarling, Nag Central Office.
-       Richard Hanson, Sandia National Labs.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*incx == 0) {
-	info = 5;
-    } else if (*incy == 0) {
-	info = 7;
-    } else if (*lda < max(1,*m)) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("SGER  ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (((*m == 0) || (*n == 0)) || (*alpha == 0.f)) {
-	return 0;
-    }
-
-/*
-       Start the operations. In this version the elements of A are
-       accessed sequentially with one pass through A.
-*/
-
-    if (*incy > 0) {
-	jy = 1;
-    } else {
-	jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (y[jy] != 0.f) {
-		temp = *alpha * y[jy];
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    a[i__ + j * a_dim1] += x[i__] * temp;
-/* L10: */
-		}
-	    }
-	    jy += *incy;
-/* L20: */
-	}
-    } else {
-	if (*incx > 0) {
-	    kx = 1;
-	} else {
-	    kx = 1 - (*m - 1) * *incx;
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (y[jy] != 0.f) {
-		temp = *alpha * y[jy];
-		ix = kx;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    a[i__ + j * a_dim1] += x[ix] * temp;
-		    ix += *incx;
-/* L30: */
-		}
-	    }
-	    jy += *incy;
-/* L40: */
-	}
-    }
-
-    return 0;
-
-/*     End of SGER  . */
-
-} /* sger_ */
-
-doublereal snrm2_(integer *n, real *x, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    real ret_val, r__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer ix;
-    static real ssq, norm, scale, absxi;
-
-
-/*
-    SNRM2 returns the euclidean norm of a vector via the function
-    name, so that
-
-       SNRM2 := sqrt( x'*x )
-
-
-    -- This version written on 25-October-1982.
-       Modified on 14-October-1993 to inline the call to SLASSQ.
-       Sven Hammarling, Nag Ltd.
-*/
-
-
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if ((*n < 1) || (*incx < 1)) {
-	norm = 0.f;
-    } else if (*n == 1) {
-	norm = dabs(x[1]);
-    } else {
-	scale = 0.f;
-	ssq = 1.f;
-/*
-          The following loop is equivalent to this call to the LAPACK
-          auxiliary routine:
-          CALL SLASSQ( N, X, INCX, SCALE, SSQ )
-*/
-
-	i__1 = (*n - 1) * *incx + 1;
-	i__2 = *incx;
-	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-	    if (x[ix] != 0.f) {
-		absxi = (r__1 = x[ix], dabs(r__1));
-		if (scale < absxi) {
-/* Computing 2nd power */
-		    r__1 = scale / absxi;
-		    ssq = ssq * (r__1 * r__1) + 1.f;
-		    scale = absxi;
-		} else {
-/* Computing 2nd power */
-		    r__1 = absxi / scale;
-		    ssq += r__1 * r__1;
-		}
-	    }
-/* L10: */
-	}
-	norm = scale * sqrt(ssq);
-    }
-
-    ret_val = norm;
-    return ret_val;
-
-/*     End of SNRM2. */
-
-} /* snrm2_ */
-
-/* Subroutine */ int srot_(integer *n, real *sx, integer *incx, real *sy,
-	integer *incy, real *c__, real *s)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static real stemp;
-
-
-/*
-       applies a plane rotation.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-         code for unequal increments or equal increments not equal
-           to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	stemp = *c__ * sx[ix] + *s * sy[iy];
-	sy[iy] = *c__ * sy[iy] - *s * sx[ix];
-	sx[ix] = stemp;
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*       code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	stemp = *c__ * sx[i__] + *s * sy[i__];
-	sy[i__] = *c__ * sy[i__] - *s * sx[i__];
-	sx[i__] = stemp;
-/* L30: */
-    }
-    return 0;
-} /* srot_ */
-
-/* Subroutine */ int sscal_(integer *n, real *sa, real *sx, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, m, mp1, nincx;
-
-
-/*
-       scales a vector by a constant.
-       uses unrolled loops for increment equal to 1.
-       jack dongarra, linpack, 3/11/78.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --sx;
-
-    /* Function Body */
-    if ((*n <= 0) || (*incx <= 0)) {
-	return 0;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	sx[i__] = *sa * sx[i__];
-/* L10: */
-    }
-    return 0;
-
-/*
-          code for increment equal to 1
-
-
-          clean-up loop
-*/
-
-L20:
-    m = *n % 5;
-    if (m == 0) {
-	goto L40;
-    }
-    i__2 = m;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	sx[i__] = *sa * sx[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__2 = *n;
-    for (i__ = mp1; i__ <= i__2; i__ += 5) {
-	sx[i__] = *sa * sx[i__];
-	sx[i__ + 1] = *sa * sx[i__ + 1];
-	sx[i__ + 2] = *sa * sx[i__ + 2];
-	sx[i__ + 3] = *sa * sx[i__ + 3];
-	sx[i__ + 4] = *sa * sx[i__ + 4];
-/* L50: */
-    }
-    return 0;
-} /* sscal_ */
-
-/* Subroutine */ int sswap_(integer *n, real *sx, integer *incx, real *sy,
-	integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static real stemp;
-
-
-/*
-       interchanges two vectors.
-       uses unrolled loops for increments equal to 1.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
-
-
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-         code for unequal increments or equal increments not equal
-           to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	stemp = sx[ix];
-	sx[ix] = sy[iy];
-	sy[iy] = stemp;
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*
-         code for both increments equal to 1
-
-
-         clean-up loop
-*/
-
-L20:
-    m = *n % 3;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	stemp = sx[i__];
-	sx[i__] = sy[i__];
-	sy[i__] = stemp;
-/* L30: */
-    }
-    if (*n < 3) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 3) {
-	stemp = sx[i__];
-	sx[i__] = sy[i__];
-	sy[i__] = stemp;
-	stemp = sx[i__ + 1];
-	sx[i__ + 1] = sy[i__ + 1];
-	sy[i__ + 1] = stemp;
-	stemp = sx[i__ + 2];
-	sx[i__ + 2] = sy[i__ + 2];
-	sy[i__ + 2] = stemp;
-/* L50: */
-    }
-    return 0;
-} /* sswap_ */
-
-/* Subroutine */ int ssymv_(char *uplo, integer *n, real *alpha, real *a,
-	integer *lda, real *x, integer *incx, real *beta, real *y, integer *
-	incy)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    SSYMV  performs the matrix-vector  operation
-
-       y := alpha*A*x + beta*y,
-
-    where alpha and beta are scalars, x and y are n element vectors and
-    A is an n by n symmetric matrix.
-
-    Parameters
-    ==========
-
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the upper or lower
-             triangular part of the array A is to be referenced as
-             follows:
-
-                UPLO = 'U' or 'u'   Only the upper triangular part of A
-                                    is to be referenced.
-
-                UPLO = 'L' or 'l'   Only the lower triangular part of A
-                                    is to be referenced.
-
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the order of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
-
-    ALPHA  - REAL            .
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    A      - REAL             array of DIMENSION ( LDA, n ).
-             Before entry with  UPLO = 'U' or 'u', the leading n by n
-             upper triangular part of the array A must contain the upper
-             triangular part of the symmetric matrix and the strictly
-             lower triangular part of A is not referenced.
-             Before entry with UPLO = 'L' or 'l', the leading n by n
-             lower triangular part of the array A must contain the lower
-             triangular part of the symmetric matrix and the strictly
-             upper triangular part of A is not referenced.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, n ).
-             Unchanged on exit.
-
-    X      - REAL             array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCX ) ).
-             Before entry, the incremented array X must contain the n
-             element vector x.
-             Unchanged on exit.
-
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
-
-    BETA   - REAL            .
-             On entry, BETA specifies the scalar beta. When BETA is
-             supplied as zero then Y need not be set on input.
-             Unchanged on exit.
-
-    Y      - REAL             array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCY ) ).
-             Before entry, the incremented array Y must contain the n
-             element vector y. On exit, Y is overwritten by the updated
-             vector y.
-
-    INCY   - INTEGER.
-             On entry, INCY specifies the increment for the elements of
-             Y. INCY must not be zero.
-             Unchanged on exit.
-
-
-    Level 2 Blas routine.
-
-    -- Written on 22-October-1986.
-       Jack Dongarra, Argonne National Lab.
-       Jeremy Du Croz, Nag Central Office.
-       Sven Hammarling, Nag Central Office.
-       Richard Hanson, Sandia National Labs.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*lda < max(1,*n)) {
-	info = 5;
-    } else if (*incx == 0) {
-	info = 7;
-    } else if (*incy == 0) {
-	info = 10;
-    }
-    if (info != 0) {
-	xerbla_("SSYMV ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if ((*n == 0) || (*alpha == 0.f && *beta == 1.f)) {
-	return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-	kx = 1;
-    } else {
-	kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-	ky = 1;
-    } else {
-	ky = 1 - (*n - 1) * *incy;
-    }
-
-/*
-       Start the operations. In this version the elements of A are
-       accessed sequentially with one pass through the triangular part
-       of A.
-
-       First form  y := beta*y.
-*/
-
-    if (*beta != 1.f) {
-	if (*incy == 1) {
-	    if (*beta == 0.f) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = 0.f;
-/* L10: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = *beta * y[i__];
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (*beta == 0.f) {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = 0.f;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = *n;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = *beta * y[iy];
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (*alpha == 0.f) {
-	return 0;
-    }
-    if (lsame_(uplo, "U")) {
-
-/*        Form  y  when A is stored in upper triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.f;
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    y[i__] += temp1 * a[i__ + j * a_dim1];
-		    temp2 += a[i__ + j * a_dim1] * x[i__];
-/* L50: */
-		}
-		y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
-/* L60: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.f;
-		ix = kx;
-		iy = ky;
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    y[iy] += temp1 * a[i__ + j * a_dim1];
-		    temp2 += a[i__ + j * a_dim1] * x[ix];
-		    ix += *incx;
-		    iy += *incy;
-/* L70: */
-		}
-		y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
-		jx += *incx;
-		jy += *incy;
-/* L80: */
-	    }
-	}
-    } else {
-
-/*        Form  y  when A is stored in lower triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.f;
-		y[j] += temp1 * a[j + j * a_dim1];
-		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    y[i__] += temp1 * a[i__ + j * a_dim1];
-		    temp2 += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-		}
-		y[j] += *alpha * temp2;
-/* L100: */
-	    }
-	} else {
-	    jx = kx;
-	    jy = ky;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.f;
-		y[jy] += temp1 * a[j + j * a_dim1];
-		ix = jx;
-		iy = jy;
-		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    ix += *incx;
-		    iy += *incy;
-		    y[iy] += temp1 * a[i__ + j * a_dim1];
-		    temp2 += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
-		}
-		y[jy] += *alpha * temp2;
-		jx += *incx;
-		jy += *incy;
-/* L120: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of SSYMV . */
-
-} /* ssymv_ */
-
-/* Subroutine */ int ssyr2_(char *uplo, integer *n, real *alpha, real *x,
-	integer *incx, real *y, integer *incy, real *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    SSYR2  performs the symmetric rank 2 operation
-
-       A := alpha*x*y' + alpha*y*x' + A,
-
-    where alpha is a scalar, x and y are n element vectors and A is an n
-    by n symmetric matrix.
-
-    Parameters
-    ==========
-
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the upper or lower
-             triangular part of the array A is to be referenced as
-             follows:
-
-                UPLO = 'U' or 'u'   Only the upper triangular part of A
-                                    is to be referenced.
-
-                UPLO = 'L' or 'l'   Only the lower triangular part of A
-                                    is to be referenced.
-
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the order of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
-
-    ALPHA  - REAL            .
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    X      - REAL             array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCX ) ).
-             Before entry, the incremented array X must contain the n
-             element vector x.
-             Unchanged on exit.
-
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
-
-    Y      - REAL             array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCY ) ).
-             Before entry, the incremented array Y must contain the n
-             element vector y.
-             Unchanged on exit.
-
-    INCY   - INTEGER.
-             On entry, INCY specifies the increment for the elements of
-             Y. INCY must not be zero.
-             Unchanged on exit.
-
-    A      - REAL             array of DIMENSION ( LDA, n ).
-             Before entry with  UPLO = 'U' or 'u', the leading n by n
-             upper triangular part of the array A must contain the upper
-             triangular part of the symmetric matrix and the strictly
-             lower triangular part of A is not referenced. On exit, the
-             upper triangular part of the array A is overwritten by the
-             upper triangular part of the updated matrix.
-             Before entry with UPLO = 'L' or 'l', the leading n by n
-             lower triangular part of the array A must contain the lower
-             triangular part of the symmetric matrix and the strictly
-             upper triangular part of A is not referenced. On exit, the
-             lower triangular part of the array A is overwritten by the
-             lower triangular part of the updated matrix.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, n ).
-             Unchanged on exit.
-
-
-    Level 2 Blas routine.
-
-    -- Written on 22-October-1986.
-       Jack Dongarra, Argonne National Lab.
-       Jeremy Du Croz, Nag Central Office.
-       Sven Hammarling, Nag Central Office.
-       Richard Hanson, Sandia National Labs.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*incx == 0) {
-	info = 5;
-    } else if (*incy == 0) {
-	info = 7;
-    } else if (*lda < max(1,*n)) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("SSYR2 ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if ((*n == 0) || (*alpha == 0.f)) {
-	return 0;
-    }
-
-/*
-       Set up the start points in X and Y if the increments are not both
-       unity.
-*/
-
-    if ((*incx != 1) || (*incy != 1)) {
-	if (*incx > 0) {
-	    kx = 1;
-	} else {
-	    kx = 1 - (*n - 1) * *incx;
-	}
-	if (*incy > 0) {
-	    ky = 1;
-	} else {
-	    ky = 1 - (*n - 1) * *incy;
-	}
-	jx = kx;
-	jy = ky;
-    }
-
-/*
-       Start the operations. In this version the elements of A are
-       accessed sequentially with one pass through the triangular part
-       of A.
-*/
-
-    if (lsame_(uplo, "U")) {
-
-/*        Form  A  when A is stored in the upper triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if ((x[j] != 0.f) || (y[j] != 0.f)) {
-		    temp1 = *alpha * y[j];
-		    temp2 = *alpha * x[j];
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
-				temp1 + y[i__] * temp2;
-/* L10: */
-		    }
-		}
-/* L20: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if ((x[jx] != 0.f) || (y[jy] != 0.f)) {
-		    temp1 = *alpha * y[jy];
-		    temp2 = *alpha * x[jx];
-		    ix = kx;
-		    iy = ky;
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
-				temp1 + y[iy] * temp2;
-			ix += *incx;
-			iy += *incy;
-/* L30: */
-		    }
-		}
-		jx += *incx;
-		jy += *incy;
-/* L40: */
-	    }
-	}
-    } else {
-
-/*        Form  A  when A is stored in the lower triangle. */
-
-	if (*incx == 1 && *incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if ((x[j] != 0.f) || (y[j] != 0.f)) {
-		    temp1 = *alpha * y[j];
-		    temp2 = *alpha * x[j];
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
-				temp1 + y[i__] * temp2;
-/* L50: */
-		    }
-		}
-/* L60: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if ((x[jx] != 0.f) || (y[jy] != 0.f)) {
-		    temp1 = *alpha * y[jy];
-		    temp2 = *alpha * x[jx];
-		    ix = jx;
-		    iy = jy;
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
-				temp1 + y[iy] * temp2;
-			ix += *incx;
-			iy += *incy;
-/* L70: */
-		    }
-		}
-		jx += *incx;
-		jy += *incy;
-/* L80: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of SSYR2 . */
-
-} /* ssyr2_ */
-
-/* Subroutine */ int ssyr2k_(char *uplo, char *trans, integer *n, integer *k,
-	real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta,
-	 real *c__, integer *ldc)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
-	    i__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    SSYR2K  performs one of the symmetric rank 2k operations
-
-       C := alpha*A*B' + alpha*B*A' + beta*C,
-
-    or
-
-       C := alpha*A'*B + alpha*B'*A + beta*C,
-
-    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-    and  A and B  are  n by k  matrices  in the  first  case  and  k by n
-    matrices in the second case.
-
-    Parameters
-    ==========
-
-    UPLO   - CHARACTER*1.
-             On  entry,   UPLO  specifies  whether  the  upper  or  lower
-             triangular  part  of the  array  C  is to be  referenced  as
-             follows:
-
-                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-                                    is to be referenced.
-
-                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-                                    is to be referenced.
-
-             Unchanged on exit.
-
-    TRANS  - CHARACTER*1.
-             On entry,  TRANS  specifies the operation to be performed as
-             follows:
-
-                TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +
-                                          beta*C.
-
-                TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +
-                                          beta*C.
-
-                TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +
-                                          beta*C.
-
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry,  N specifies the order of the matrix C.  N must be
-             at least zero.
-             Unchanged on exit.
-
-    K      - INTEGER.
-             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-             of  columns  of the  matrices  A and B,  and on  entry  with
-             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-             of rows of the matrices  A and B.  K must be at least  zero.
-             Unchanged on exit.
-
-    ALPHA  - REAL            .
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
-             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-             part of the array  A  must contain the matrix  A,  otherwise
-             the leading  k by n  part of the array  A  must contain  the
-             matrix A.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-             then  LDA must be at least  max( 1, n ), otherwise  LDA must
-             be at least  max( 1, k ).
-             Unchanged on exit.
-
-    B      - REAL             array of DIMENSION ( LDB, kb ), where kb is
-             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-             part of the array  B  must contain the matrix  B,  otherwise
-             the leading  k by n  part of the array  B  must contain  the
-             matrix B.
-             Unchanged on exit.
-
-    LDB    - INTEGER.
-             On entry, LDB specifies the first dimension of B as declared
-             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-             then  LDB must be at least  max( 1, n ), otherwise  LDB must
-             be at least  max( 1, k ).
-             Unchanged on exit.
-
-    BETA   - REAL            .
-             On entry, BETA specifies the scalar beta.
-             Unchanged on exit.
-
-    C      - REAL             array of DIMENSION ( LDC, n ).
-             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-             upper triangular part of the array C must contain the upper
-             triangular part  of the  symmetric matrix  and the strictly
-             lower triangular part of C is not referenced.  On exit, the
-             upper triangular part of the array  C is overwritten by the
-             upper triangular part of the updated matrix.
-             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-             lower triangular part of the array C must contain the lower
-             triangular part  of the  symmetric matrix  and the strictly
-             upper triangular part of C is not referenced.  On exit, the
-             lower triangular part of the array  C is overwritten by the
-             lower triangular part of the updated matrix.
-
-    LDC    - INTEGER.
-             On entry, LDC specifies the first dimension of C as declared
-             in  the  calling  (sub)  program.   LDC  must  be  at  least
-             max( 1, n ).
-             Unchanged on exit.
-
-
-    Level 3 Blas routine.
-
-
-    -- Written on 8-February-1989.
-       Jack Dongarra, Argonne National Laboratory.
-       Iain Duff, AERE Harwell.
-       Jeremy Du Croz, Numerical Algorithms Group Ltd.
-       Sven Hammarling, Numerical Algorithms Group Ltd.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N")) {
-	nrowa = *n;
-    } else {
-	nrowa = *k;
-    }
-    upper = lsame_(uplo, "U");
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L")) {
-	info = 1;
-    } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "T") && ! lsame_(trans, "C")) {
-	info = 2;
-    } else if (*n < 0) {
-	info = 3;
-    } else if (*k < 0) {
-	info = 4;
-    } else if (*lda < max(1,nrowa)) {
-	info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-	info = 9;
-    } else if (*ldc < max(1,*n)) {
-	info = 12;
-    }
-    if (info != 0) {
-	xerbla_("SSYR2K", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if ((*n == 0) || (((*alpha == 0.f) || (*k == 0)) && *beta == 1.f)) {
-	return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.f) {
-	if (upper) {
-	    if (*beta == 0.f) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.f;
-/* L10: */
-		    }
-/* L20: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-		    }
-/* L40: */
-		}
-	    }
-	} else {
-	    if (*beta == 0.f) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.f;
-/* L50: */
-		    }
-/* L60: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L70: */
-		    }
-/* L80: */
-		}
-	    }
-	}
-	return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N")) {
-
-/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
-
-	if (upper) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.f) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.f;
-/* L90: */
-		    }
-		} else if (*beta != 1.f) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L100: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    if ((a[j + l * a_dim1] != 0.f) || (b[j + l * b_dim1] !=
-			    0.f)) {
-			temp1 = *alpha * b[j + l * b_dim1];
-			temp2 = *alpha * a[j + l * a_dim1];
-			i__3 = j;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-				    i__ + l * a_dim1] * temp1 + b[i__ + l *
-				    b_dim1] * temp2;
-/* L110: */
-			}
-		    }
-/* L120: */
-		}
-/* L130: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.f) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.f;
-/* L140: */
-		    }
-		} else if (*beta != 1.f) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L150: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    if ((a[j + l * a_dim1] != 0.f) || (b[j + l * b_dim1] !=
-			    0.f)) {
-			temp1 = *alpha * b[j + l * b_dim1];
-			temp2 = *alpha * a[j + l * a_dim1];
-			i__3 = *n;
-			for (i__ = j; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-				    i__ + l * a_dim1] * temp1 + b[i__ + l *
-				    b_dim1] * temp2;
-/* L160: */
-			}
-		    }
-/* L170: */
-		}
-/* L180: */
-	    }
-	}
-    } else {
-
-/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
-
-	if (upper) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp1 = 0.f;
-		    temp2 = 0.f;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
-/* L190: */
-		    }
-		    if (*beta == 0.f) {
-			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
-				temp2;
-		    } else {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
-				+ *alpha * temp1 + *alpha * temp2;
-		    }
-/* L200: */
-		}
-/* L210: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *n;
-		for (i__ = j; i__ <= i__2; ++i__) {
-		    temp1 = 0.f;
-		    temp2 = 0.f;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
-/* L220: */
-		    }
-		    if (*beta == 0.f) {
-			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
-				temp2;
-		    } else {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
-				+ *alpha * temp1 + *alpha * temp2;
-		    }
-/* L230: */
-		}
-/* L240: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of SSYR2K. */
-
-} /* ssyr2k_ */
-
-/* Subroutine */ int ssyrk_(char *uplo, char *trans, integer *n, integer *k,
-	real *alpha, real *a, integer *lda, real *beta, real *c__, integer *
-	ldc)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static real temp;
-    extern logical lsame_(char *, char *);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    SSYRK  performs one of the symmetric rank k operations
-
-       C := alpha*A*A' + beta*C,
-
-    or
-
-       C := alpha*A'*A + beta*C,
-
-    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-    and  A  is an  n by k  matrix in the first case and a  k by n  matrix
-    in the second case.
-
-    Parameters
-    ==========
-
-    UPLO   - CHARACTER*1.
-             On  entry,   UPLO  specifies  whether  the  upper  or  lower
-             triangular  part  of the  array  C  is to be  referenced  as
-             follows:
-
-                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-                                    is to be referenced.
-
-                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-                                    is to be referenced.
-
-             Unchanged on exit.
-
-    TRANS  - CHARACTER*1.
-             On entry,  TRANS  specifies the operation to be performed as
-             follows:
-
-                TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
-
-                TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
-
-                TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C.
-
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry,  N specifies the order of the matrix C.  N must be
-             at least zero.
-             Unchanged on exit.
-
-    K      - INTEGER.
-             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-             of  columns   of  the   matrix   A,   and  on   entry   with
-             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-             of rows of the matrix  A.  K must be at least zero.
-             Unchanged on exit.
-
-    ALPHA  - REAL            .
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
-             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-             part of the array  A  must contain the matrix  A,  otherwise
-             the leading  k by n  part of the array  A  must contain  the
-             matrix A.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-             then  LDA must be at least  max( 1, n ), otherwise  LDA must
-             be at least  max( 1, k ).
-             Unchanged on exit.
-
-    BETA   - REAL            .
-             On entry, BETA specifies the scalar beta.
-             Unchanged on exit.
-
-    C      - REAL             array of DIMENSION ( LDC, n ).
-             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-             upper triangular part of the array C must contain the upper
-             triangular part  of the  symmetric matrix  and the strictly
-             lower triangular part of C is not referenced.  On exit, the
-             upper triangular part of the array  C is overwritten by the
-             upper triangular part of the updated matrix.
-             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-             lower triangular part of the array C must contain the lower
-             triangular part  of the  symmetric matrix  and the strictly
-             upper triangular part of C is not referenced.  On exit, the
-             lower triangular part of the array  C is overwritten by the
-             lower triangular part of the updated matrix.
-
-    LDC    - INTEGER.
-             On entry, LDC specifies the first dimension of C as declared
-             in  the  calling  (sub)  program.   LDC  must  be  at  least
-             max( 1, n ).
-             Unchanged on exit.
-
-
-    Level 3 Blas routine.
-
-    -- Written on 8-February-1989.
-       Jack Dongarra, Argonne National Laboratory.
-       Iain Duff, AERE Harwell.
-       Jeremy Du Croz, Numerical Algorithms Group Ltd.
-       Sven Hammarling, Numerical Algorithms Group Ltd.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N")) {
-	nrowa = *n;
-    } else {
-	nrowa = *k;
-    }
-    upper = lsame_(uplo, "U");
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L")) {
-	info = 1;
-    } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "T") && ! lsame_(trans, "C")) {
-	info = 2;
-    } else if (*n < 0) {
-	info = 3;
-    } else if (*k < 0) {
-	info = 4;
-    } else if (*lda < max(1,nrowa)) {
-	info = 7;
-    } else if (*ldc < max(1,*n)) {
-	info = 10;
-    }
-    if (info != 0) {
-	xerbla_("SSYRK ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if ((*n == 0) || (((*alpha == 0.f) || (*k == 0)) && *beta == 1.f)) {
-	return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.f) {
-	if (upper) {
-	    if (*beta == 0.f) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.f;
-/* L10: */
-		    }
-/* L20: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-		    }
-/* L40: */
-		}
-	    }
-	} else {
-	    if (*beta == 0.f) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.f;
-/* L50: */
-		    }
-/* L60: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L70: */
-		    }
-/* L80: */
-		}
-	    }
-	}
-	return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N")) {
-
-/*        Form  C := alpha*A*A' + beta*C. */
-
-	if (upper) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.f) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.f;
-/* L90: */
-		    }
-		} else if (*beta != 1.f) {
-		    i__2 = j;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L100: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    if (a[j + l * a_dim1] != 0.f) {
-			temp = *alpha * a[j + l * a_dim1];
-			i__3 = j;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
-				    a_dim1];
-/* L110: */
-			}
-		    }
-/* L120: */
-		}
-/* L130: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.f) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.f;
-/* L140: */
-		    }
-		} else if (*beta != 1.f) {
-		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L150: */
-		    }
-		}
-		i__2 = *k;
-		for (l = 1; l <= i__2; ++l) {
-		    if (a[j + l * a_dim1] != 0.f) {
-			temp = *alpha * a[j + l * a_dim1];
-			i__3 = *n;
-			for (i__ = j; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
-				    a_dim1];
-/* L160: */
-			}
-		    }
-/* L170: */
-		}
-/* L180: */
-	    }
-	}
-    } else {
-
-/*        Form  C := alpha*A'*A + beta*C. */
-
-	if (upper) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp = 0.f;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L190: */
-		    }
-		    if (*beta == 0.f) {
-			c__[i__ + j * c_dim1] = *alpha * temp;
-		    } else {
-			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-				i__ + j * c_dim1];
-		    }
-/* L200: */
-		}
-/* L210: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *n;
-		for (i__ = j; i__ <= i__2; ++i__) {
-		    temp = 0.f;
-		    i__3 = *k;
-		    for (l = 1; l <= i__3; ++l) {
-			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L220: */
-		    }
-		    if (*beta == 0.f) {
-			c__[i__ + j * c_dim1] = *alpha * temp;
-		    } else {
-			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-				i__ + j * c_dim1];
-		    }
-/* L230: */
-		}
-/* L240: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of SSYRK . */
-
-} /* ssyrk_ */
-
-/* Subroutine */ int strmm_(char *side, char *uplo, char *transa, char *diag,
-	integer *m, integer *n, real *alpha, real *a, integer *lda, real *b,
-	integer *ldb)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static real temp;
-    static logical lside;
-    extern logical lsame_(char *, char *);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical nounit;
-
-
-/*
-    Purpose
-    =======
-
-    STRMM  performs one of the matrix-matrix operations
-
-       B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
-
-    where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
-    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-
-       op( A ) = A   or   op( A ) = A'.
-
-    Parameters
-    ==========
-
-    SIDE   - CHARACTER*1.
-             On entry,  SIDE specifies whether  op( A ) multiplies B from
-             the left or right as follows:
-
-                SIDE = 'L' or 'l'   B := alpha*op( A )*B.
-
-                SIDE = 'R' or 'r'   B := alpha*B*op( A ).
-
-             Unchanged on exit.
-
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the matrix A is an upper or
-             lower triangular matrix as follows:
-
-                UPLO = 'U' or 'u'   A is an upper triangular matrix.
-
-                UPLO = 'L' or 'l'   A is a lower triangular matrix.
-
-             Unchanged on exit.
-
-    TRANSA - CHARACTER*1.
-             On entry, TRANSA specifies the form of op( A ) to be used in
-             the matrix multiplication as follows:
-
-                TRANSA = 'N' or 'n'   op( A ) = A.
-
-                TRANSA = 'T' or 't'   op( A ) = A'.
-
-                TRANSA = 'C' or 'c'   op( A ) = A'.
-
-             Unchanged on exit.
-
-    DIAG   - CHARACTER*1.
-             On entry, DIAG specifies whether or not A is unit triangular
-             as follows:
-
-                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-
-                DIAG = 'N' or 'n'   A is not assumed to be unit
-                                    triangular.
-
-             Unchanged on exit.
-
-    M      - INTEGER.
-             On entry, M specifies the number of rows of B. M must be at
-             least zero.
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the number of columns of B.  N must be
-             at least zero.
-             Unchanged on exit.
-
-    ALPHA  - REAL            .
-             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-             zero then  A is not referenced and  B need not be set before
-             entry.
-             Unchanged on exit.
-
-    A      - REAL             array of DIMENSION ( LDA, k ), where k is m
-             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
-             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-             upper triangular part of the array  A must contain the upper
-             triangular matrix  and the strictly lower triangular part of
-             A is not referenced.
-             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-             lower triangular part of the array  A must contain the lower
-             triangular matrix  and the strictly upper triangular part of
-             A is not referenced.
-             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-             A  are not referenced either,  but are assumed to be  unity.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-             then LDA must be at least max( 1, n ).
-             Unchanged on exit.
-
-    B      - REAL             array of DIMENSION ( LDB, n ).
-             Before entry,  the leading  m by n part of the array  B must
-             contain the matrix  B,  and  on exit  is overwritten  by the
-             transformed matrix.
-
-    LDB    - INTEGER.
-             On entry, LDB specifies the first dimension of B as declared
-             in  the  calling  (sub)  program.   LDB  must  be  at  least
-             max( 1, m ).
-             Unchanged on exit.
-
-
-    Level 3 Blas routine.
-
-    -- Written on 8-February-1989.
-       Jack Dongarra, Argonne National Laboratory.
-       Iain Duff, AERE Harwell.
-       Jeremy Du Croz, Numerical Algorithms Group Ltd.
-       Sven Hammarling, Numerical Algorithms Group Ltd.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    lside = lsame_(side, "L");
-    if (lside) {
-	nrowa = *m;
-    } else {
-	nrowa = *n;
-    }
-    nounit = lsame_(diag, "N");
-    upper = lsame_(uplo, "U");
-
-    info = 0;
-    if (! lside && ! lsame_(side, "R")) {
-	info = 1;
-    } else if (! upper && ! lsame_(uplo, "L")) {
-	info = 2;
-    } else if (! lsame_(transa, "N") && ! lsame_(transa,
-	     "T") && ! lsame_(transa, "C")) {
-	info = 3;
-    } else if (! lsame_(diag, "U") && ! lsame_(diag,
-	    "N")) {
-	info = 4;
-    } else if (*m < 0) {
-	info = 5;
-    } else if (*n < 0) {
-	info = 6;
-    } else if (*lda < max(1,nrowa)) {
-	info = 9;
-    } else if (*ldb < max(1,*m)) {
-	info = 11;
-    }
-    if (info != 0) {
-	xerbla_("STRMM ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.f) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		b[i__ + j * b_dim1] = 0.f;
-/* L10: */
-	    }
-/* L20: */
-	}
-	return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lside) {
-	if (lsame_(transa, "N")) {
-
-/*           Form  B := alpha*A*B. */
-
-	    if (upper) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (k = 1; k <= i__2; ++k) {
-			if (b[k + j * b_dim1] != 0.f) {
-			    temp = *alpha * b[k + j * b_dim1];
-			    i__3 = k - 1;
-			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] += temp * a[i__ + k *
-					a_dim1];
-/* L30: */
-			    }
-			    if (nounit) {
-				temp *= a[k + k * a_dim1];
-			    }
-			    b[k + j * b_dim1] = temp;
-			}
-/* L40: */
-		    }
-/* L50: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    for (k = *m; k >= 1; --k) {
-			if (b[k + j * b_dim1] != 0.f) {
-			    temp = *alpha * b[k + j * b_dim1];
-			    b[k + j * b_dim1] = temp;
-			    if (nounit) {
-				b[k + j * b_dim1] *= a[k + k * a_dim1];
-			    }
-			    i__2 = *m;
-			    for (i__ = k + 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] += temp * a[i__ + k *
-					a_dim1];
-/* L60: */
-			    }
-			}
-/* L70: */
-		    }
-/* L80: */
-		}
-	    }
-	} else {
-
-/*           Form  B := alpha*A'*B. */
-
-	    if (upper) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    for (i__ = *m; i__ >= 1; --i__) {
-			temp = b[i__ + j * b_dim1];
-			if (nounit) {
-			    temp *= a[i__ + i__ * a_dim1];
-			}
-			i__2 = i__ - 1;
-			for (k = 1; k <= i__2; ++k) {
-			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L90: */
-			}
-			b[i__ + j * b_dim1] = *alpha * temp;
-/* L100: */
-		    }
-/* L110: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			temp = b[i__ + j * b_dim1];
-			if (nounit) {
-			    temp *= a[i__ + i__ * a_dim1];
-			}
-			i__3 = *m;
-			for (k = i__ + 1; k <= i__3; ++k) {
-			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L120: */
-			}
-			b[i__ + j * b_dim1] = *alpha * temp;
-/* L130: */
-		    }
-/* L140: */
-		}
-	    }
-	}
-    } else {
-	if (lsame_(transa, "N")) {
-
-/*           Form  B := alpha*B*A. */
-
-	    if (upper) {
-		for (j = *n; j >= 1; --j) {
-		    temp = *alpha;
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    i__1 = *m;
-		    for (i__ = 1; i__ <= i__1; ++i__) {
-			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L150: */
-		    }
-		    i__1 = j - 1;
-		    for (k = 1; k <= i__1; ++k) {
-			if (a[k + j * a_dim1] != 0.f) {
-			    temp = *alpha * a[k + j * a_dim1];
-			    i__2 = *m;
-			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] += temp * b[i__ + k *
-					b_dim1];
-/* L160: */
-			    }
-			}
-/* L170: */
-		    }
-/* L180: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    temp = *alpha;
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L190: */
-		    }
-		    i__2 = *n;
-		    for (k = j + 1; k <= i__2; ++k) {
-			if (a[k + j * a_dim1] != 0.f) {
-			    temp = *alpha * a[k + j * a_dim1];
-			    i__3 = *m;
-			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] += temp * b[i__ + k *
-					b_dim1];
-/* L200: */
-			    }
-			}
-/* L210: */
-		    }
-/* L220: */
-		}
-	    }
-	} else {
-
-/*           Form  B := alpha*B*A'. */
-
-	    if (upper) {
-		i__1 = *n;
-		for (k = 1; k <= i__1; ++k) {
-		    i__2 = k - 1;
-		    for (j = 1; j <= i__2; ++j) {
-			if (a[j + k * a_dim1] != 0.f) {
-			    temp = *alpha * a[j + k * a_dim1];
-			    i__3 = *m;
-			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] += temp * b[i__ + k *
-					b_dim1];
-/* L230: */
-			    }
-			}
-/* L240: */
-		    }
-		    temp = *alpha;
-		    if (nounit) {
-			temp *= a[k + k * a_dim1];
-		    }
-		    if (temp != 1.f) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L250: */
-			}
-		    }
-/* L260: */
-		}
-	    } else {
-		for (k = *n; k >= 1; --k) {
-		    i__1 = *n;
-		    for (j = k + 1; j <= i__1; ++j) {
-			if (a[j + k * a_dim1] != 0.f) {
-			    temp = *alpha * a[j + k * a_dim1];
-			    i__2 = *m;
-			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] += temp * b[i__ + k *
-					b_dim1];
-/* L270: */
-			    }
-			}
-/* L280: */
-		    }
-		    temp = *alpha;
-		    if (nounit) {
-			temp *= a[k + k * a_dim1];
-		    }
-		    if (temp != 1.f) {
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L290: */
-			}
-		    }
-/* L300: */
-		}
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of STRMM . */
-
-} /* strmm_ */
-
-/* Subroutine */ int strmv_(char *uplo, char *trans, char *diag, integer *n,
-	real *a, integer *lda, real *x, integer *incx)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical nounit;
-
-
-/*
-    Purpose
-    =======
-
-    STRMV  performs one of the matrix-vector operations
-
-       x := A*x,   or   x := A'*x,
-
-    where x is an n element vector and  A is an n by n unit, or non-unit,
-    upper or lower triangular matrix.
-
-    Parameters
-    ==========
-
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the matrix is an upper or
-             lower triangular matrix as follows:
-
-                UPLO = 'U' or 'u'   A is an upper triangular matrix.
-
-                UPLO = 'L' or 'l'   A is a lower triangular matrix.
-
-             Unchanged on exit.
-
-    TRANS  - CHARACTER*1.
-             On entry, TRANS specifies the operation to be performed as
-             follows:
-
-                TRANS = 'N' or 'n'   x := A*x.
-
-                TRANS = 'T' or 't'   x := A'*x.
-
-                TRANS = 'C' or 'c'   x := A'*x.
-
-             Unchanged on exit.
-
-    DIAG   - CHARACTER*1.
-             On entry, DIAG specifies whether or not A is unit
-             triangular as follows:
-
-                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-
-                DIAG = 'N' or 'n'   A is not assumed to be unit
-                                    triangular.
-
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the order of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
-
-    A      - REAL             array of DIMENSION ( LDA, n ).
-             Before entry with  UPLO = 'U' or 'u', the leading n by n
-             upper triangular part of the array A must contain the upper
-             triangular matrix and the strictly lower triangular part of
-             A is not referenced.
-             Before entry with UPLO = 'L' or 'l', the leading n by n
-             lower triangular part of the array A must contain the lower
-             triangular matrix and the strictly upper triangular part of
-             A is not referenced.
-             Note that when  DIAG = 'U' or 'u', the diagonal elements of
-             A are not referenced either, but are assumed to be unity.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, n ).
-             Unchanged on exit.
-
-    X      - REAL             array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCX ) ).
-             Before entry, the incremented array X must contain the n
-             element vector x. On exit, X is overwritten with the
-             tranformed vector x.
-
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
-
-
-    Level 2 Blas routine.
-
-    -- Written on 22-October-1986.
-       Jack Dongarra, Argonne National Lab.
-       Jeremy Du Croz, Nag Central Office.
-       Sven Hammarling, Nag Central Office.
-       Richard Hanson, Sandia National Labs.
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
-	info = 1;
-    } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "T") && ! lsame_(trans, "C")) {
-	info = 2;
-    } else if (! lsame_(diag, "U") && ! lsame_(diag,
-	    "N")) {
-	info = 3;
-    } else if (*n < 0) {
-	info = 4;
-    } else if (*lda < max(1,*n)) {
-	info = 6;
-    } else if (*incx == 0) {
-	info = 8;
-    }
-    if (info != 0) {
-	xerbla_("STRMV ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    nounit = lsame_(diag, "N");
-
-/*
-       Set up the start point in X if the increment is not unity. This
-       will be  ( N - 1 )*INCX  too small for descending loops.
-*/
-
-    if (*incx <= 0) {
-	kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-	kx = 1;
-    }
-
-/*
-       Start the operations. In this version the elements of A are
-       accessed sequentially with one pass through A.
-*/
-
-    if (lsame_(trans, "N")) {
-
-/*        Form  x := A*x. */
-
-	if (lsame_(uplo, "U")) {
-	    if (*incx == 1) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (x[j] != 0.f) {
-			temp = x[j];
-			i__2 = j - 1;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    x[i__] += temp * a[i__ + j * a_dim1];
-/* L10: */
-			}
-			if (nounit) {
-			    x[j] *= a[j + j * a_dim1];
-			}
-		    }
-/* L20: */
-		}
-	    } else {
-		jx = kx;
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (x[jx] != 0.f) {
-			temp = x[jx];
-			ix = kx;
-			i__2 = j - 1;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    x[ix] += temp * a[i__ + j * a_dim1];
-			    ix += *incx;
-/* L30: */
-			}
-			if (nounit) {
-			    x[jx] *= a[j + j * a_dim1];
-			}
-		    }
-		    jx += *incx;
-/* L40: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    if (x[j] != 0.f) {
-			temp = x[j];
-			i__1 = j + 1;
-			for (i__ = *n; i__ >= i__1; --i__) {
-			    x[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-			}
-			if (nounit) {
-			    x[j] *= a[j + j * a_dim1];
-			}
-		    }
-/* L60: */
-		}
-	    } else {
-		kx += (*n - 1) * *incx;
-		jx = kx;
-		for (j = *n; j >= 1; --j) {
-		    if (x[jx] != 0.f) {
-			temp = x[jx];
-			ix = kx;
-			i__1 = j + 1;
-			for (i__ = *n; i__ >= i__1; --i__) {
-			    x[ix] += temp * a[i__ + j * a_dim1];
-			    ix -= *incx;
-/* L70: */
-			}
-			if (nounit) {
-			    x[jx] *= a[j + j * a_dim1];
-			}
-		    }
-		    jx -= *incx;
-/* L80: */
-		}
-	    }
-	}
-    } else {
-
-/*        Form  x := A'*x. */
-
-	if (lsame_(uplo, "U")) {
-	    if (*incx == 1) {
-		for (j = *n; j >= 1; --j) {
-		    temp = x[j];
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    for (i__ = j - 1; i__ >= 1; --i__) {
-			temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-		    }
-		    x[j] = temp;
-/* L100: */
-		}
-	    } else {
-		jx = kx + (*n - 1) * *incx;
-		for (j = *n; j >= 1; --j) {
-		    temp = x[jx];
-		    ix = jx;
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    for (i__ = j - 1; i__ >= 1; --i__) {
-			ix -= *incx;
-			temp += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
-		    }
-		    x[jx] = temp;
-		    jx -= *incx;
-/* L120: */
-		}
-	    }
-	} else {
-	    if (*incx == 1) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    temp = x[j];
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    i__2 = *n;
-		    for (i__ = j + 1; i__ <= i__2; ++i__) {
-			temp += a[i__ + j * a_dim1] * x[i__];
-/* L130: */
-		    }
-		    x[j] = temp;
-/* L140: */
-		}
-	    } else {
-		jx = kx;
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    temp = x[jx];
-		    ix = jx;
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    i__2 = *n;
-		    for (i__ = j + 1; i__ <= i__2; ++i__) {
-			ix += *incx;
-			temp += a[i__ + j * a_dim1] * x[ix];
-/* L150: */
-		    }
-		    x[jx] = temp;
-		    jx += *incx;
-/* L160: */
-		}
-	    }
+L20:
+    smax = dcabs1_(&zx[1]);
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if (dcabs1_(&zx[i__]) <= smax) {
+	    goto L30;
 	}
+	ret_val = i__;
+	smax = dcabs1_(&zx[i__]);
+L30:
+	;
     }
+    return ret_val;
+} /* izamax_ */
 
-    return 0;
-
-/*     End of STRMV . */
-
-} /* strmv_ */
-
-/* Subroutine */ int strsm_(char *side, char *uplo, char *transa, char *diag,
-	integer *m, integer *n, real *alpha, real *a, integer *lda, real *b,
-	integer *ldb)
+logical lsame_(char *ca, char *cb)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+    logical ret_val;
 
     /* Local variables */
-    static integer i__, j, k, info;
-    static real temp;
-    static logical lside;
-    extern logical lsame_(char *, char *);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical nounit;
+    static integer inta, intb, zcode;
 
 
 /*
-    Purpose
-    =======
-
-    STRSM  solves one of the matrix equations
-
-       op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
-
-    where alpha is a scalar, X and B are m by n matrices, A is a unit, or
-    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-
-       op( A ) = A   or   op( A ) = A'.
-
-    The matrix X is overwritten on B.
-
-    Parameters
-    ==========
-
-    SIDE   - CHARACTER*1.
-             On entry, SIDE specifies whether op( A ) appears on the left
-             or right of X as follows:
-
-                SIDE = 'L' or 'l'   op( A )*X = alpha*B.
-
-                SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
-
-             Unchanged on exit.
-
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the matrix A is an upper or
-             lower triangular matrix as follows:
-
-                UPLO = 'U' or 'u'   A is an upper triangular matrix.
-
-                UPLO = 'L' or 'l'   A is a lower triangular matrix.
-
-             Unchanged on exit.
-
-    TRANSA - CHARACTER*1.
-             On entry, TRANSA specifies the form of op( A ) to be used in
-             the matrix multiplication as follows:
-
-                TRANSA = 'N' or 'n'   op( A ) = A.
-
-                TRANSA = 'T' or 't'   op( A ) = A'.
-
-                TRANSA = 'C' or 'c'   op( A ) = A'.
-
-             Unchanged on exit.
-
-    DIAG   - CHARACTER*1.
-             On entry, DIAG specifies whether or not A is unit triangular
-             as follows:
-
-                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-
-                DIAG = 'N' or 'n'   A is not assumed to be unit
-                                    triangular.
-
-             Unchanged on exit.
-
-    M      - INTEGER.
-             On entry, M specifies the number of rows of B. M must be at
-             least zero.
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the number of columns of B.  N must be
-             at least zero.
-             Unchanged on exit.
-
-    ALPHA  - REAL            .
-             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-             zero then  A is not referenced and  B need not be set before
-             entry.
-             Unchanged on exit.
-
-    A      - REAL             array of DIMENSION ( LDA, k ), where k is m
-             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
-             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-             upper triangular part of the array  A must contain the upper
-             triangular matrix  and the strictly lower triangular part of
-             A is not referenced.
-             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-             lower triangular part of the array  A must contain the lower
-             triangular matrix  and the strictly upper triangular part of
-             A is not referenced.
-             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-             A  are not referenced either,  but are assumed to be  unity.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-             then LDA must be at least max( 1, n ).
-             Unchanged on exit.
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-    B      - REAL             array of DIMENSION ( LDB, n ).
-             Before entry,  the leading  m by n part of the array  B must
-             contain  the  right-hand  side  matrix  B,  and  on exit  is
-             overwritten by the solution matrix  X.
 
-    LDB    - INTEGER.
-             On entry, LDB specifies the first dimension of B as declared
-             in  the  calling  (sub)  program.   LDB  must  be  at  least
-             max( 1, m ).
-             Unchanged on exit.
+    Purpose
+    =======
 
+    LSAME returns .TRUE. if CA is the same letter as CB regardless of
+    case.
 
-    Level 3 Blas routine.
+    Arguments
+    =========
 
+    CA      (input) CHARACTER*1
+    CB      (input) CHARACTER*1
+            CA and CB specify the single characters to be compared.
 
-    -- Written on 8-February-1989.
-       Jack Dongarra, Argonne National Laboratory.
-       Iain Duff, AERE Harwell.
-       Jeremy Du Croz, Numerical Algorithms Group Ltd.
-       Sven Hammarling, Numerical Algorithms Group Ltd.
+   =====================================================================
 
 
-       Test the input parameters.
+       Test if the characters are equal
 */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    lside = lsame_(side, "L");
-    if (lside) {
-	nrowa = *m;
-    } else {
-	nrowa = *n;
-    }
-    nounit = lsame_(diag, "N");
-    upper = lsame_(uplo, "U");
-
-    info = 0;
-    if (! lside && ! lsame_(side, "R")) {
-	info = 1;
-    } else if (! upper && ! lsame_(uplo, "L")) {
-	info = 2;
-    } else if (! lsame_(transa, "N") && ! lsame_(transa,
-	     "T") && ! lsame_(transa, "C")) {
-	info = 3;
-    } else if (! lsame_(diag, "U") && ! lsame_(diag,
-	    "N")) {
-	info = 4;
-    } else if (*m < 0) {
-	info = 5;
-    } else if (*n < 0) {
-	info = 6;
-    } else if (*lda < max(1,nrowa)) {
-	info = 9;
-    } else if (*ldb < max(1,*m)) {
-	info = 11;
-    }
-    if (info != 0) {
-	xerbla_("STRSM ", &info);
-	return 0;
+    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
+    if (ret_val) {
+	return ret_val;
     }
 
-/*     Quick return if possible. */
+/*     Now test for equivalence if both characters are alphabetic. */
 
-    if (*n == 0) {
-	return 0;
-    }
+    zcode = 'Z';
 
-/*     And when  alpha.eq.zero. */
+/*
+       Use 'Z' rather than 'A' so that ASCII can be detected on Prime
+       machines, on which ICHAR returns a value with bit 8 set.
+       ICHAR('A') on Prime machines returns 193 which is the same as
+       ICHAR('A') on an EBCDIC machine.
+*/
 
-    if (*alpha == 0.f) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		b[i__ + j * b_dim1] = 0.f;
-/* L10: */
-	    }
-/* L20: */
-	}
-	return 0;
-    }
+    inta = *(unsigned char *)ca;
+    intb = *(unsigned char *)cb;
 
-/*     Start the operations. */
+    if (zcode == 90 || zcode == 122) {
 
-    if (lside) {
-	if (lsame_(transa, "N")) {
+/*
+          ASCII is assumed - ZCODE is the ASCII code of either lower or
+          upper case 'Z'.
+*/
 
-/*           Form  B := alpha*inv( A )*B. */
+	if (inta >= 97 && inta <= 122) {
+	    inta += -32;
+	}
+	if (intb >= 97 && intb <= 122) {
+	    intb += -32;
+	}
 
-	    if (upper) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (*alpha != 1.f) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-				    ;
-/* L30: */
-			}
-		    }
-		    for (k = *m; k >= 1; --k) {
-			if (b[k + j * b_dim1] != 0.f) {
-			    if (nounit) {
-				b[k + j * b_dim1] /= a[k + k * a_dim1];
-			    }
-			    i__2 = k - 1;
-			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-					i__ + k * a_dim1];
-/* L40: */
-			    }
-			}
-/* L50: */
-		    }
-/* L60: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (*alpha != 1.f) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-				    ;
-/* L70: */
-			}
-		    }
-		    i__2 = *m;
-		    for (k = 1; k <= i__2; ++k) {
-			if (b[k + j * b_dim1] != 0.f) {
-			    if (nounit) {
-				b[k + j * b_dim1] /= a[k + k * a_dim1];
-			    }
-			    i__3 = *m;
-			    for (i__ = k + 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-					i__ + k * a_dim1];
-/* L80: */
-			    }
-			}
-/* L90: */
-		    }
-/* L100: */
-		}
-	    }
-	} else {
+    } else if (zcode == 233 || zcode == 169) {
 
-/*           Form  B := alpha*inv( A' )*B. */
+/*
+          EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
+          upper case 'Z'.
+*/
 
-	    if (upper) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			temp = *alpha * b[i__ + j * b_dim1];
-			i__3 = i__ - 1;
-			for (k = 1; k <= i__3; ++k) {
-			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L110: */
-			}
-			if (nounit) {
-			    temp /= a[i__ + i__ * a_dim1];
-			}
-			b[i__ + j * b_dim1] = temp;
-/* L120: */
-		    }
-/* L130: */
-		}
-	    } else {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    for (i__ = *m; i__ >= 1; --i__) {
-			temp = *alpha * b[i__ + j * b_dim1];
-			i__2 = *m;
-			for (k = i__ + 1; k <= i__2; ++k) {
-			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L140: */
-			}
-			if (nounit) {
-			    temp /= a[i__ + i__ * a_dim1];
-			}
-			b[i__ + j * b_dim1] = temp;
-/* L150: */
-		    }
-/* L160: */
-		}
-	    }
+	if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta
+		>= 162 && inta <= 169) {
+	    inta += 64;
+	}
+	if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb
+		>= 162 && intb <= 169) {
+	    intb += 64;
 	}
-    } else {
-	if (lsame_(transa, "N")) {
-
-/*           Form  B := alpha*B*inv( A ). */
 
-	    if (upper) {
-		i__1 = *n;
-		for (j = 1; j <= i__1; ++j) {
-		    if (*alpha != 1.f) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-				    ;
-/* L170: */
-			}
-		    }
-		    i__2 = j - 1;
-		    for (k = 1; k <= i__2; ++k) {
-			if (a[k + j * a_dim1] != 0.f) {
-			    i__3 = *m;
-			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-					i__ + k * b_dim1];
-/* L180: */
-			    }
-			}
-/* L190: */
-		    }
-		    if (nounit) {
-			temp = 1.f / a[j + j * a_dim1];
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L200: */
-			}
-		    }
-/* L210: */
-		}
-	    } else {
-		for (j = *n; j >= 1; --j) {
-		    if (*alpha != 1.f) {
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-				    ;
-/* L220: */
-			}
-		    }
-		    i__1 = *n;
-		    for (k = j + 1; k <= i__1; ++k) {
-			if (a[k + j * a_dim1] != 0.f) {
-			    i__2 = *m;
-			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-					i__ + k * b_dim1];
-/* L230: */
-			    }
-			}
-/* L240: */
-		    }
-		    if (nounit) {
-			temp = 1.f / a[j + j * a_dim1];
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L250: */
-			}
-		    }
-/* L260: */
-		}
-	    }
-	} else {
+    } else if (zcode == 218 || zcode == 250) {
 
-/*           Form  B := alpha*B*inv( A' ). */
+/*
+          ASCII is assumed, on Prime machines - ZCODE is the ASCII code
+          plus 128 of either lower or upper case 'Z'.
+*/
 
-	    if (upper) {
-		for (k = *n; k >= 1; --k) {
-		    if (nounit) {
-			temp = 1.f / a[k + k * a_dim1];
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L270: */
-			}
-		    }
-		    i__1 = k - 1;
-		    for (j = 1; j <= i__1; ++j) {
-			if (a[j + k * a_dim1] != 0.f) {
-			    temp = a[j + k * a_dim1];
-			    i__2 = *m;
-			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] -= temp * b[i__ + k *
-					b_dim1];
-/* L280: */
-			    }
-			}
-/* L290: */
-		    }
-		    if (*alpha != 1.f) {
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-				    ;
-/* L300: */
-			}
-		    }
-/* L310: */
-		}
-	    } else {
-		i__1 = *n;
-		for (k = 1; k <= i__1; ++k) {
-		    if (nounit) {
-			temp = 1.f / a[k + k * a_dim1];
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L320: */
-			}
-		    }
-		    i__2 = *n;
-		    for (j = k + 1; j <= i__2; ++j) {
-			if (a[j + k * a_dim1] != 0.f) {
-			    temp = a[j + k * a_dim1];
-			    i__3 = *m;
-			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] -= temp * b[i__ + k *
-					b_dim1];
-/* L330: */
-			    }
-			}
-/* L340: */
-		    }
-		    if (*alpha != 1.f) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-				    ;
-/* L350: */
-			}
-		    }
-/* L360: */
-		}
-	    }
+	if (inta >= 225 && inta <= 250) {
+	    inta += -32;
+	}
+	if (intb >= 225 && intb <= 250) {
+	    intb += -32;
 	}
     }
+    ret_val = inta == intb;
 
-    return 0;
+/*
+       RETURN
+
+       End of LSAME
+*/
 
-/*     End of STRSM . */
+    return ret_val;
+} /* lsame_ */
 
-} /* strsm_ */
-#if 0
 /* Subroutine */ int xerbla_(char *srname, integer *info)
 {
     /* Format strings */
@@ -14792,7 +4318,7 @@ L40:
     /* Subroutine */ int s_stop(char *, ftnlen);
 
     /* Fortran I/O blocks */
-    static cilist io___425 = { 0, 6, 0, fmt_9999, 0 };
+    static cilist io___147 = { 0, 6, 0, fmt_9999, 0 };
 
 
 /*
@@ -14824,7 +4350,7 @@ L40:
 */
 
 
-    s_wsfe(&io___425);
+    s_wsfe(&io___147);
     do_fio(&c__1, srname, (ftnlen)6);
     do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer));
     e_wsfe();
@@ -14836,7 +4362,6 @@ L40:
 
     return 0;
 } /* xerbla_ */
-#endif
 
 /* Subroutine */ int zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx,
 	integer *incx, doublecomplex *zy, integer *incy)
@@ -15162,7 +4687,7 @@ L20:
     --zx;
 
     /* Function Body */
-    if ((*n <= 0) || (*incx <= 0)) {
+    if (*n <= 0 || *incx <= 0) {
 	return 0;
     }
     if (*incx == 1) {
@@ -15409,8 +4934,8 @@ L20:
 
 /*     Quick return if possible. */
 
-    if (((*m == 0) || (*n == 0)) || (((alpha->r == 0. && alpha->i == 0.) || (*
-	    k == 0)) && (beta->r == 1. && beta->i == 0.))) {
+    if (*m == 0 || *n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) &&
+	     (beta->r == 1. && beta->i == 0.)) {
 	return 0;
     }
 
@@ -15463,7 +4988,7 @@ L20:
 			c__[i__3].r = 0., c__[i__3].i = 0.;
 /* L50: */
 		    }
-		} else if ((beta->r != 1.) || (beta->i != 0.)) {
+		} else if (beta->r != 1. || beta->i != 0.) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			i__3 = i__ + j * c_dim1;
@@ -15478,7 +5003,7 @@ L20:
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
 		    i__3 = l + j * b_dim1;
-		    if ((b[i__3].r != 0.) || (b[i__3].i != 0.)) {
+		    if (b[i__3].r != 0. || b[i__3].i != 0.) {
 			i__3 = l + j * b_dim1;
 			z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
 				z__1.i = alpha->r * b[i__3].i + alpha->i * b[
@@ -15601,7 +5126,7 @@ L20:
 			c__[i__3].r = 0., c__[i__3].i = 0.;
 /* L160: */
 		    }
-		} else if ((beta->r != 1.) || (beta->i != 0.)) {
+		} else if (beta->r != 1. || beta->i != 0.) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			i__3 = i__ + j * c_dim1;
@@ -15616,7 +5141,7 @@ L20:
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
 		    i__3 = j + l * b_dim1;
-		    if ((b[i__3].r != 0.) || (b[i__3].i != 0.)) {
+		    if (b[i__3].r != 0. || b[i__3].i != 0.) {
 			d_cnjg(&z__2, &b[j + l * b_dim1]);
 			z__1.r = alpha->r * z__2.r - alpha->i * z__2.i,
 				z__1.i = alpha->r * z__2.i + alpha->i *
@@ -15653,7 +5178,7 @@ L20:
 			c__[i__3].r = 0., c__[i__3].i = 0.;
 /* L210: */
 		    }
-		} else if ((beta->r != 1.) || (beta->i != 0.)) {
+		} else if (beta->r != 1. || beta->i != 0.) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			i__3 = i__ + j * c_dim1;
@@ -15668,7 +5193,7 @@ L20:
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
 		    i__3 = j + l * b_dim1;
-		    if ((b[i__3].r != 0.) || (b[i__3].i != 0.)) {
+		    if (b[i__3].r != 0. || b[i__3].i != 0.) {
 			i__3 = j + l * b_dim1;
 			z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
 				z__1.i = alpha->r * b[i__3].i + alpha->i * b[
@@ -16020,8 +5545,8 @@ L20:
 
 /*     Quick return if possible. */
 
-    if (((*m == 0) || (*n == 0)) || (alpha->r == 0. && alpha->i == 0. && (
-	    beta->r == 1. && beta->i == 0.))) {
+    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r ==
+	    1. && beta->i == 0.)) {
 	return 0;
     }
 
@@ -16057,7 +5582,7 @@ L20:
        First form  y := beta*y.
 */
 
-    if ((beta->r != 1.) || (beta->i != 0.)) {
+    if (beta->r != 1. || beta->i != 0.) {
 	if (*incy == 1) {
 	    if (beta->r == 0. && beta->i == 0.) {
 		i__1 = leny;
@@ -16115,7 +5640,7 @@ L20:
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = jx;
-		if ((x[i__2].r != 0.) || (x[i__2].i != 0.)) {
+		if (x[i__2].r != 0. || x[i__2].i != 0.) {
 		    i__2 = jx;
 		    z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
 			    z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
@@ -16142,7 +5667,7 @@ L20:
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = jx;
-		if ((x[i__2].r != 0.) || (x[i__2].i != 0.)) {
+		if (x[i__2].r != 0. || x[i__2].i != 0.) {
 		    i__2 = jx;
 		    z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
 			    z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
@@ -16379,7 +5904,7 @@ L20:
 
 /*     Quick return if possible. */
 
-    if (((*m == 0) || (*n == 0)) || (alpha->r == 0. && alpha->i == 0.)) {
+    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) {
 	return 0;
     }
 
@@ -16397,7 +5922,7 @@ L20:
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = jy;
-	    if ((y[i__2].r != 0.) || (y[i__2].i != 0.)) {
+	    if (y[i__2].r != 0. || y[i__2].i != 0.) {
 		d_cnjg(&z__2, &y[jy]);
 		z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i =
 			alpha->r * z__2.i + alpha->i * z__2.r;
@@ -16426,7 +5951,7 @@ L20:
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = jy;
-	    if ((y[i__2].r != 0.) || (y[i__2].i != 0.)) {
+	    if (y[i__2].r != 0. || y[i__2].i != 0.) {
 		d_cnjg(&z__2, &y[jy]);
 		z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i =
 			alpha->r * z__2.i + alpha->i * z__2.r;
@@ -16571,7 +6096,7 @@ L20:
 
 /*     Quick return if possible. */
 
-    if (((*m == 0) || (*n == 0)) || (alpha->r == 0. && alpha->i == 0.)) {
+    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) {
 	return 0;
     }
 
@@ -16589,7 +6114,7 @@ L20:
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = jy;
-	    if ((y[i__2].r != 0.) || (y[i__2].i != 0.)) {
+	    if (y[i__2].r != 0. || y[i__2].i != 0.) {
 		i__2 = jy;
 		z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i =
 			 alpha->r * y[i__2].i + alpha->i * y[i__2].r;
@@ -16618,7 +6143,7 @@ L20:
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = jy;
-	    if ((y[i__2].r != 0.) || (y[i__2].i != 0.)) {
+	    if (y[i__2].r != 0. || y[i__2].i != 0.) {
 		i__2 = jy;
 		z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i =
 			 alpha->r * y[i__2].i + alpha->i * y[i__2].r;
@@ -16789,8 +6314,8 @@ L20:
 
 /*     Quick return if possible. */
 
-    if ((*n == 0) || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. &&
-	    beta->i == 0.))) {
+    if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. &&
+	    beta->i == 0.)) {
 	return 0;
     }
 
@@ -16815,7 +6340,7 @@ L20:
        First form  y := beta*y.
 */
 
-    if ((beta->r != 1.) || (beta->i != 0.)) {
+    if (beta->r != 1. || beta->i != 0.) {
 	if (*incy == 1) {
 	    if (beta->r == 0. && beta->i == 0.) {
 		i__1 = *n;
@@ -17196,7 +6721,7 @@ L20:
 
 /*     Quick return if possible. */
 
-    if ((*n == 0) || (alpha->r == 0. && alpha->i == 0.)) {
+    if (*n == 0 || alpha->r == 0. && alpha->i == 0.) {
 	return 0;
     }
 
@@ -17205,7 +6730,7 @@ L20:
        unity.
 */
 
-    if ((*incx != 1) || (*incy != 1)) {
+    if (*incx != 1 || *incy != 1) {
 	if (*incx > 0) {
 	    kx = 1;
 	} else {
@@ -17235,8 +6760,8 @@ L20:
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = j;
 		i__3 = j;
-		if (((x[i__2].r != 0.) || (x[i__2].i != 0.)) || (((y[i__3].r
-			!= 0.) || (y[i__3].i != 0.)))) {
+		if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. ||
+			y[i__3].i != 0.)) {
 		    d_cnjg(&z__2, &y[j]);
 		    z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i =
 			    alpha->r * z__2.i + alpha->i * z__2.r;
@@ -17291,8 +6816,8 @@ L20:
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = jx;
 		i__3 = jy;
-		if (((x[i__2].r != 0.) || (x[i__2].i != 0.)) || (((y[i__3].r
-			!= 0.) || (y[i__3].i != 0.)))) {
+		if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. ||
+			y[i__3].i != 0.)) {
 		    d_cnjg(&z__2, &y[jy]);
 		    z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i =
 			    alpha->r * z__2.i + alpha->i * z__2.r;
@@ -17358,8 +6883,8 @@ L20:
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = j;
 		i__3 = j;
-		if (((x[i__2].r != 0.) || (x[i__2].i != 0.)) || (((y[i__3].r
-			!= 0.) || (y[i__3].i != 0.)))) {
+		if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. ||
+			y[i__3].i != 0.)) {
 		    d_cnjg(&z__2, &y[j]);
 		    z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i =
 			    alpha->r * z__2.i + alpha->i * z__2.r;
@@ -17414,8 +6939,8 @@ L20:
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = jx;
 		i__3 = jy;
-		if (((x[i__2].r != 0.) || (x[i__2].i != 0.)) || (((y[i__3].r
-			!= 0.) || (y[i__3].i != 0.)))) {
+		if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. ||
+			y[i__3].i != 0.)) {
 		    d_cnjg(&z__2, &y[jy]);
 		    z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i =
 			    alpha->r * z__2.i + alpha->i * z__2.r;
@@ -17680,8 +7205,8 @@ L20:
 
 /*     Quick return if possible. */
 
-    if ((*n == 0) || (((alpha->r == 0. && alpha->i == 0.) || (*k == 0)) && *
-	    beta == 1.)) {
+    if (*n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && *beta ==
+	    1.) {
 	return 0;
     }
 
@@ -17797,8 +7322,8 @@ L20:
 		for (l = 1; l <= i__2; ++l) {
 		    i__3 = j + l * a_dim1;
 		    i__4 = j + l * b_dim1;
-		    if (((a[i__3].r != 0.) || (a[i__3].i != 0.)) || (((b[i__4]
-			    .r != 0.) || (b[i__4].i != 0.)))) {
+		    if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r !=
+			    0. || b[i__4].i != 0.)) {
 			d_cnjg(&z__2, &b[j + l * b_dim1]);
 			z__1.r = alpha->r * z__2.r - alpha->i * z__2.i,
 				z__1.i = alpha->r * z__2.i + alpha->i *
@@ -17881,8 +7406,8 @@ L20:
 		for (l = 1; l <= i__2; ++l) {
 		    i__3 = j + l * a_dim1;
 		    i__4 = j + l * b_dim1;
-		    if (((a[i__3].r != 0.) || (a[i__3].i != 0.)) || (((b[i__4]
-			    .r != 0.) || (b[i__4].i != 0.)))) {
+		    if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r !=
+			    0. || b[i__4].i != 0.)) {
 			d_cnjg(&z__2, &b[j + l * b_dim1]);
 			z__1.r = alpha->r * z__2.r - alpha->i * z__2.i,
 				z__1.i = alpha->r * z__2.i + alpha->i *
@@ -18305,7 +7830,7 @@ L20:
 
 /*     Quick return if possible. */
 
-    if ((*n == 0) || (((*alpha == 0.) || (*k == 0)) && *beta == 1.)) {
+    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
 	return 0;
     }
 
@@ -18417,7 +7942,7 @@ L20:
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
 		    i__3 = j + l * a_dim1;
-		    if ((a[i__3].r != 0.) || (a[i__3].i != 0.)) {
+		    if (a[i__3].r != 0. || a[i__3].i != 0.) {
 			d_cnjg(&z__2, &a[j + l * a_dim1]);
 			z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
 			temp.r = z__1.r, temp.i = z__1.i;
@@ -18480,7 +8005,7 @@ L20:
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
 		    i__3 = j + l * a_dim1;
-		    if ((a[i__3].r != 0.) || (a[i__3].i != 0.)) {
+		    if (a[i__3].r != 0. || a[i__3].i != 0.) {
 			d_cnjg(&z__2, &a[j + l * a_dim1]);
 			z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
 			temp.r = z__1.r, temp.i = z__1.i;
@@ -18657,7 +8182,7 @@ L20:
     --zx;
 
     /* Function Body */
-    if ((*n <= 0) || (*incx <= 0)) {
+    if (*n <= 0 || *incx <= 0) {
 	return 0;
     }
     if (*incx == 1) {
@@ -18990,7 +8515,7 @@ L20:
 		    i__2 = *m;
 		    for (k = 1; k <= i__2; ++k) {
 			i__3 = k + j * b_dim1;
-			if ((b[i__3].r != 0.) || (b[i__3].i != 0.)) {
+			if (b[i__3].r != 0. || b[i__3].i != 0.) {
 			    i__3 = k + j * b_dim1;
 			    z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
 				    .i, z__1.i = alpha->r * b[i__3].i +
@@ -19028,7 +8553,7 @@ L20:
 		for (j = 1; j <= i__1; ++j) {
 		    for (k = *m; k >= 1; --k) {
 			i__2 = k + j * b_dim1;
-			if ((b[i__2].r != 0.) || (b[i__2].i != 0.)) {
+			if (b[i__2].r != 0. || b[i__2].i != 0.) {
 			    i__2 = k + j * b_dim1;
 			    z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2]
 				    .i, z__1.i = alpha->r * b[i__2].i +
@@ -19211,7 +8736,7 @@ L20:
 		    i__1 = j - 1;
 		    for (k = 1; k <= i__1; ++k) {
 			i__2 = k + j * a_dim1;
-			if ((a[i__2].r != 0.) || (a[i__2].i != 0.)) {
+			if (a[i__2].r != 0. || a[i__2].i != 0.) {
 			    i__2 = k + j * a_dim1;
 			    z__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2]
 				    .i, z__1.i = alpha->r * a[i__2].i +
@@ -19259,7 +8784,7 @@ L20:
 		    i__2 = *n;
 		    for (k = j + 1; k <= i__2; ++k) {
 			i__3 = k + j * a_dim1;
-			if ((a[i__3].r != 0.) || (a[i__3].i != 0.)) {
+			if (a[i__3].r != 0. || a[i__3].i != 0.) {
 			    i__3 = k + j * a_dim1;
 			    z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3]
 				    .i, z__1.i = alpha->r * a[i__3].i +
@@ -19294,7 +8819,7 @@ L20:
 		    i__2 = k - 1;
 		    for (j = 1; j <= i__2; ++j) {
 			i__3 = j + k * a_dim1;
-			if ((a[i__3].r != 0.) || (a[i__3].i != 0.)) {
+			if (a[i__3].r != 0. || a[i__3].i != 0.) {
 			    if (noconj) {
 				i__3 = j + k * a_dim1;
 				z__1.r = alpha->r * a[i__3].r - alpha->i * a[
@@ -19340,7 +8865,7 @@ L20:
 			    temp.r = z__1.r, temp.i = z__1.i;
 			}
 		    }
-		    if ((temp.r != 1.) || (temp.i != 0.)) {
+		    if (temp.r != 1. || temp.i != 0.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    i__3 = i__ + k * b_dim1;
@@ -19359,7 +8884,7 @@ L20:
 		    i__1 = *n;
 		    for (j = k + 1; j <= i__1; ++j) {
 			i__2 = j + k * a_dim1;
-			if ((a[i__2].r != 0.) || (a[i__2].i != 0.)) {
+			if (a[i__2].r != 0. || a[i__2].i != 0.) {
 			    if (noconj) {
 				i__2 = j + k * a_dim1;
 				z__1.r = alpha->r * a[i__2].r - alpha->i * a[
@@ -19405,7 +8930,7 @@ L20:
 			    temp.r = z__1.r, temp.i = z__1.i;
 			}
 		    }
-		    if ((temp.r != 1.) || (temp.i != 0.)) {
+		    if (temp.r != 1. || temp.i != 0.) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    i__2 = i__ + k * b_dim1;
@@ -19604,7 +9129,7 @@ L20:
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = j;
-		    if ((x[i__2].r != 0.) || (x[i__2].i != 0.)) {
+		    if (x[i__2].r != 0. || x[i__2].i != 0.) {
 			i__2 = j;
 			temp.r = x[i__2].r, temp.i = x[i__2].i;
 			i__2 = j - 1;
@@ -19637,7 +9162,7 @@ L20:
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = jx;
-		    if ((x[i__2].r != 0.) || (x[i__2].i != 0.)) {
+		    if (x[i__2].r != 0. || x[i__2].i != 0.) {
 			i__2 = jx;
 			temp.r = x[i__2].r, temp.i = x[i__2].i;
 			ix = kx;
@@ -19673,7 +9198,7 @@ L20:
 	    if (*incx == 1) {
 		for (j = *n; j >= 1; --j) {
 		    i__1 = j;
-		    if ((x[i__1].r != 0.) || (x[i__1].i != 0.)) {
+		    if (x[i__1].r != 0. || x[i__1].i != 0.) {
 			i__1 = j;
 			temp.r = x[i__1].r, temp.i = x[i__1].i;
 			i__1 = j + 1;
@@ -19706,7 +9231,7 @@ L20:
 		jx = kx;
 		for (j = *n; j >= 1; --j) {
 		    i__1 = jx;
-		    if ((x[i__1].r != 0.) || (x[i__1].i != 0.)) {
+		    if (x[i__1].r != 0. || x[i__1].i != 0.) {
 			i__1 = jx;
 			temp.r = x[i__1].r, temp.i = x[i__1].i;
 			ix = kx;
@@ -20182,7 +9707,7 @@ L20:
 	    if (upper) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    if ((alpha->r != 1.) || (alpha->i != 0.)) {
+		    if (alpha->r != 1. || alpha->i != 0.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    i__3 = i__ + j * b_dim1;
@@ -20196,7 +9721,7 @@ L20:
 		    }
 		    for (k = *m; k >= 1; --k) {
 			i__2 = k + j * b_dim1;
-			if ((b[i__2].r != 0.) || (b[i__2].i != 0.)) {
+			if (b[i__2].r != 0. || b[i__2].i != 0.) {
 			    if (nounit) {
 				i__2 = k + j * b_dim1;
 				z_div(&z__1, &b[k + j * b_dim1], &a[k + k *
@@ -20225,7 +9750,7 @@ L20:
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    if ((alpha->r != 1.) || (alpha->i != 0.)) {
+		    if (alpha->r != 1. || alpha->i != 0.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    i__3 = i__ + j * b_dim1;
@@ -20240,7 +9765,7 @@ L20:
 		    i__2 = *m;
 		    for (k = 1; k <= i__2; ++k) {
 			i__3 = k + j * b_dim1;
-			if ((b[i__3].r != 0.) || (b[i__3].i != 0.)) {
+			if (b[i__3].r != 0. || b[i__3].i != 0.) {
 			    if (nounit) {
 				i__3 = k + j * b_dim1;
 				z_div(&z__1, &b[k + j * b_dim1], &a[k + k *
@@ -20387,7 +9912,7 @@ L20:
 	    if (upper) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    if ((alpha->r != 1.) || (alpha->i != 0.)) {
+		    if (alpha->r != 1. || alpha->i != 0.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    i__3 = i__ + j * b_dim1;
@@ -20402,7 +9927,7 @@ L20:
 		    i__2 = j - 1;
 		    for (k = 1; k <= i__2; ++k) {
 			i__3 = k + j * a_dim1;
-			if ((a[i__3].r != 0.) || (a[i__3].i != 0.)) {
+			if (a[i__3].r != 0. || a[i__3].i != 0.) {
 			    i__3 = *m;
 			    for (i__ = 1; i__ <= i__3; ++i__) {
 				i__4 = i__ + j * b_dim1;
@@ -20421,7 +9946,7 @@ L20:
 /* L210: */
 		    }
 		    if (nounit) {
-			z_div(&z__1, &c_b1077, &a[j + j * a_dim1]);
+			z_div(&z__1, &c_b359, &a[j + j * a_dim1]);
 			temp.r = z__1.r, temp.i = z__1.i;
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
@@ -20438,7 +9963,7 @@ L20:
 		}
 	    } else {
 		for (j = *n; j >= 1; --j) {
-		    if ((alpha->r != 1.) || (alpha->i != 0.)) {
+		    if (alpha->r != 1. || alpha->i != 0.) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    i__2 = i__ + j * b_dim1;
@@ -20453,7 +9978,7 @@ L20:
 		    i__1 = *n;
 		    for (k = j + 1; k <= i__1; ++k) {
 			i__2 = k + j * a_dim1;
-			if ((a[i__2].r != 0.) || (a[i__2].i != 0.)) {
+			if (a[i__2].r != 0. || a[i__2].i != 0.) {
 			    i__2 = *m;
 			    for (i__ = 1; i__ <= i__2; ++i__) {
 				i__3 = i__ + j * b_dim1;
@@ -20472,7 +9997,7 @@ L20:
 /* L260: */
 		    }
 		    if (nounit) {
-			z_div(&z__1, &c_b1077, &a[j + j * a_dim1]);
+			z_div(&z__1, &c_b359, &a[j + j * a_dim1]);
 			temp.r = z__1.r, temp.i = z__1.i;
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
@@ -20499,11 +10024,11 @@ L20:
 		for (k = *n; k >= 1; --k) {
 		    if (nounit) {
 			if (noconj) {
-			    z_div(&z__1, &c_b1077, &a[k + k * a_dim1]);
+			    z_div(&z__1, &c_b359, &a[k + k * a_dim1]);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			} else {
 			    d_cnjg(&z__2, &a[k + k * a_dim1]);
-			    z_div(&z__1, &c_b1077, &z__2);
+			    z_div(&z__1, &c_b359, &z__2);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			}
 			i__1 = *m;
@@ -20520,7 +10045,7 @@ L20:
 		    i__1 = k - 1;
 		    for (j = 1; j <= i__1; ++j) {
 			i__2 = j + k * a_dim1;
-			if ((a[i__2].r != 0.) || (a[i__2].i != 0.)) {
+			if (a[i__2].r != 0. || a[i__2].i != 0.) {
 			    if (noconj) {
 				i__2 = j + k * a_dim1;
 				temp.r = a[i__2].r, temp.i = a[i__2].i;
@@ -20544,7 +10069,7 @@ L20:
 			}
 /* L310: */
 		    }
-		    if ((alpha->r != 1.) || (alpha->i != 0.)) {
+		    if (alpha->r != 1. || alpha->i != 0.) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    i__2 = i__ + k * b_dim1;
@@ -20563,11 +10088,11 @@ L20:
 		for (k = 1; k <= i__1; ++k) {
 		    if (nounit) {
 			if (noconj) {
-			    z_div(&z__1, &c_b1077, &a[k + k * a_dim1]);
+			    z_div(&z__1, &c_b359, &a[k + k * a_dim1]);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			} else {
 			    d_cnjg(&z__2, &a[k + k * a_dim1]);
-			    z_div(&z__1, &c_b1077, &z__2);
+			    z_div(&z__1, &c_b359, &z__2);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			}
 			i__2 = *m;
@@ -20584,7 +10109,7 @@ L20:
 		    i__2 = *n;
 		    for (j = k + 1; j <= i__2; ++j) {
 			i__3 = j + k * a_dim1;
-			if ((a[i__3].r != 0.) || (a[i__3].i != 0.)) {
+			if (a[i__3].r != 0. || a[i__3].i != 0.) {
 			    if (noconj) {
 				i__3 = j + k * a_dim1;
 				temp.r = a[i__3].r, temp.i = a[i__3].i;
@@ -20608,7 +10133,7 @@ L20:
 			}
 /* L360: */
 		    }
-		    if ((alpha->r != 1.) || (alpha->i != 0.)) {
+		    if (alpha->r != 1. || alpha->i != 0.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    i__3 = i__ + k * b_dim1;
@@ -20810,7 +10335,7 @@ L20:
 	    if (*incx == 1) {
 		for (j = *n; j >= 1; --j) {
 		    i__1 = j;
-		    if ((x[i__1].r != 0.) || (x[i__1].i != 0.)) {
+		    if (x[i__1].r != 0. || x[i__1].i != 0.) {
 			if (nounit) {
 			    i__1 = j;
 			    z_div(&z__1, &x[j], &a[j + j * a_dim1]);
@@ -20837,7 +10362,7 @@ L20:
 		jx = kx + (*n - 1) * *incx;
 		for (j = *n; j >= 1; --j) {
 		    i__1 = jx;
-		    if ((x[i__1].r != 0.) || (x[i__1].i != 0.)) {
+		    if (x[i__1].r != 0. || x[i__1].i != 0.) {
 			if (nounit) {
 			    i__1 = jx;
 			    z_div(&z__1, &x[jx], &a[j + j * a_dim1]);
@@ -20869,7 +10394,7 @@ L20:
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = j;
-		    if ((x[i__2].r != 0.) || (x[i__2].i != 0.)) {
+		    if (x[i__2].r != 0. || x[i__2].i != 0.) {
 			if (nounit) {
 			    i__2 = j;
 			    z_div(&z__1, &x[j], &a[j + j * a_dim1]);
@@ -20898,7 +10423,7 @@ L20:
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = jx;
-		    if ((x[i__2].r != 0.) || (x[i__2].i != 0.)) {
+		    if (x[i__2].r != 0. || x[i__2].i != 0.) {
 			if (nounit) {
 			    i__2 = jx;
 			    z_div(&z__1, &x[jx], &a[j + j * a_dim1]);
@@ -21132,3 +10657,4 @@ L20:
 /*     End of ZTRSV . */
 
 } /* ztrsv_ */
+
diff --git a/numpy/linalg/lapack_lite/dlapack_lite.c b/numpy/linalg/lapack_lite/dlapack_lite.c
index 9f864d7ce..15d6457eb 100644
--- a/numpy/linalg/lapack_lite/dlapack_lite.c
+++ b/numpy/linalg/lapack_lite/dlapack_lite.c
@@ -20,147 +20,257 @@ extern doublereal dlapy2_(doublereal *x, doublereal *y);
 
 /* Table of constant values */
 
+static integer c__9 = 9;
+static integer c__0 = 0;
+static doublereal c_b15 = 1.;
 static integer c__1 = 1;
-static complex c_b55 = {0.f,0.f};
-static complex c_b56 = {1.f,0.f};
+static doublereal c_b29 = 0.;
+static doublereal c_b94 = -.125;
+static doublereal c_b151 = -1.;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
-static integer c__0 = 0;
 static integer c__8 = 8;
 static integer c__4 = 4;
 static integer c__65 = 65;
 static integer c__6 = 6;
-static integer c__9 = 9;
-static real c_b320 = 0.f;
-static real c_b1011 = 1.f;
 static integer c__15 = 15;
 static logical c_false = FALSE_;
-static real c_b1290 = -1.f;
-static real c_b2206 = .5f;
-static doublereal c_b2865 = 1.;
-static doublereal c_b2879 = 0.;
-static doublereal c_b2944 = -.125;
-static doublereal c_b3001 = -1.;
 static integer c__10 = 10;
 static integer c__11 = 11;
-static doublereal c_b5654 = 2.;
+static doublereal c_b2804 = 2.;
 static logical c_true = TRUE_;
-static real c_b9647 = 2.f;
+static real c_b3825 = 0.f;
+static real c_b3826 = 1.f;
 
-/* Subroutine */ int cgebak_(char *job, char *side, integer *n, integer *ilo,
-	integer *ihi, real *scale, integer *m, complex *v, integer *ldv,
-	integer *info)
+/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal *
+	d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt,
+	integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer *
+	iwork, integer *info)
 {
     /* System generated locals */
-    integer v_dim1, v_offset, i__1;
+    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double d_sign(doublereal *, doublereal *), log(doublereal);
 
     /* Local variables */
-    static integer i__, k;
-    static real s;
-    static integer ii;
+    static integer i__, j, k;
+    static doublereal p, r__;
+    static integer z__, ic, ii, kk;
+    static doublereal cs;
+    static integer is, iu;
+    static doublereal sn;
+    static integer nm1;
+    static doublereal eps;
+    static integer ivt, difl, difr, ierr, perm, mlvl, sqre;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
-	    complex *, integer *);
-    static logical leftv;
-    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
-	    *), xerbla_(char *, integer *);
-    static logical rightv;
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *
+	    , doublereal *, integer *), dswap_(integer *, doublereal *,
+	    integer *, doublereal *, integer *);
+    static integer poles, iuplo, nsize, start;
+    extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    integer *, integer *, doublereal *, integer *);
+
+    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *), dlascl_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *), dlasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlaset_(char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static integer givcol;
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    static integer icompq;
+    static doublereal orgnrm;
+    static integer givnum, givptr, qstart, smlsiz, wstart, smlszp;
 
 
 /*
     -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+       December 1, 1999
 
 
     Purpose
     =======
 
-    CGEBAK forms the right or left eigenvectors of a complex general
-    matrix by backward transformation on the computed eigenvectors of the
-    balanced matrix output by CGEBAL.
+    DBDSDC computes the singular value decomposition (SVD) of a real
+    N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,
+    using a divide and conquer method, where S is a diagonal matrix
+    with non-negative diagonal elements (the singular values of B), and
+    U and VT are orthogonal matrices of left and right singular vectors,
+    respectively. DBDSDC can be used to compute all singular values,
+    and optionally, singular vectors or singular vectors in compact form.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See DLASD3 for details.
+
+    The code currently call DLASDQ if singular values only are desired.
+    However, it can be slightly modified to compute singular values
+    using the divide and conquer method.
 
     Arguments
     =========
 
-    JOB     (input) CHARACTER*1
-            Specifies the type of backward transformation required:
-            = 'N', do nothing, return immediately;
-            = 'P', do backward transformation for permutation only;
-            = 'S', do backward transformation for scaling only;
-            = 'B', do backward transformations for both permutation and
-                   scaling.
-            JOB must be the same as the argument JOB supplied to CGEBAL.
+    UPLO    (input) CHARACTER*1
+            = 'U':  B is upper bidiagonal.
+            = 'L':  B is lower bidiagonal.
 
-    SIDE    (input) CHARACTER*1
-            = 'R':  V contains right eigenvectors;
-            = 'L':  V contains left eigenvectors.
+    COMPQ   (input) CHARACTER*1
+            Specifies whether singular vectors are to be computed
+            as follows:
+            = 'N':  Compute singular values only;
+            = 'P':  Compute singular values and compute singular
+                    vectors in compact form;
+            = 'I':  Compute singular values and singular vectors.
 
     N       (input) INTEGER
-            The number of rows of the matrix V.  N >= 0.
+            The order of the matrix B.  N >= 0.
 
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            The integers ILO and IHI determined by CGEBAL.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the n diagonal elements of the bidiagonal matrix B.
+            On exit, if INFO=0, the singular values of B.
 
-    SCALE   (input) REAL array, dimension (N)
-            Details of the permutation and scaling factors, as returned
-            by CGEBAL.
+    E       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the elements of E contain the offdiagonal
+            elements of the bidiagonal matrix whose SVD is desired.
+            On exit, E has been destroyed.
 
-    M       (input) INTEGER
-            The number of columns of the matrix V.  M >= 0.
+    U       (output) DOUBLE PRECISION array, dimension (LDU,N)
+            If  COMPQ = 'I', then:
+               On exit, if INFO = 0, U contains the left singular vectors
+               of the bidiagonal matrix.
+            For other values of COMPQ, U is not referenced.
 
-    V       (input/output) COMPLEX array, dimension (LDV,M)
-            On entry, the matrix of right or left eigenvectors to be
-            transformed, as returned by CHSEIN or CTREVC.
-            On exit, V is overwritten by the transformed eigenvectors.
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= 1.
+            If singular vectors are desired, then LDU >= max( 1, N ).
 
-    LDV     (input) INTEGER
-            The leading dimension of the array V. LDV >= max(1,N).
+    VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
+            If  COMPQ = 'I', then:
+               On exit, if INFO = 0, VT' contains the right singular
+               vectors of the bidiagonal matrix.
+            For other values of COMPQ, VT is not referenced.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.  LDVT >= 1.
+            If singular vectors are desired, then LDVT >= max( 1, N ).
+
+    Q       (output) DOUBLE PRECISION array, dimension (LDQ)
+            If  COMPQ = 'P', then:
+               On exit, if INFO = 0, Q and IQ contain the left
+               and right singular vectors in a compact form,
+               requiring O(N log N) space instead of 2*N**2.
+               In particular, Q contains all the DOUBLE PRECISION data in
+               LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1))))
+               words of memory, where SMLSIZ is returned by ILAENV and
+               is equal to the maximum size of the subproblems at the
+               bottom of the computation tree (usually about 25).
+            For other values of COMPQ, Q is not referenced.
+
+    IQ      (output) INTEGER array, dimension (LDIQ)
+            If  COMPQ = 'P', then:
+               On exit, if INFO = 0, Q and IQ contain the left
+               and right singular vectors in a compact form,
+               requiring O(N log N) space instead of 2*N**2.
+               In particular, IQ contains all INTEGER data in
+               LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1))))
+               words of memory, where SMLSIZ is returned by ILAENV and
+               is equal to the maximum size of the subproblems at the
+               bottom of the computation tree (usually about 25).
+            For other values of COMPQ, IQ is not referenced.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
+            If COMPQ = 'N' then LWORK >= (4 * N).
+            If COMPQ = 'P' then LWORK >= (6 * N).
+            If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).
+
+    IWORK   (workspace) INTEGER array, dimension (8*N)
 
     INFO    (output) INTEGER
-            = 0:  successful exit
+            = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an singular value.
+                  The update process of divide and conquer failed.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
 
     =====================================================================
 
 
-       Decode and Test the input parameters
+       Test the input parameters.
 */
 
     /* Parameter adjustments */
-    --scale;
-    v_dim1 = *ldv;
-    v_offset = 1 + v_dim1;
-    v -= v_offset;
+    --d__;
+    --e;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --q;
+    --iq;
+    --work;
+    --iwork;
 
     /* Function Body */
-    rightv = lsame_(side, "R");
-    leftv = lsame_(side, "L");
-
     *info = 0;
-    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
-	    && ! lsame_(job, "B")) {
+
+    iuplo = 0;
+    if (lsame_(uplo, "U")) {
+	iuplo = 1;
+    }
+    if (lsame_(uplo, "L")) {
+	iuplo = 2;
+    }
+    if (lsame_(compq, "N")) {
+	icompq = 0;
+    } else if (lsame_(compq, "P")) {
+	icompq = 1;
+    } else if (lsame_(compq, "I")) {
+	icompq = 2;
+    } else {
+	icompq = -1;
+    }
+    if (iuplo == 0) {
 	*info = -1;
-    } else if (! rightv && ! leftv) {
+    } else if (icompq < 0) {
 	*info = -2;
     } else if (*n < 0) {
 	*info = -3;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
-	*info = -4;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
-	*info = -5;
-    } else if (*m < 0) {
+    } else if (*ldu < 1 || icompq == 2 && *ldu < *n) {
 	*info = -7;
-    } else if (*ldv < max(1,*n)) {
+    } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) {
 	*info = -9;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("CGEBAK", &i__1);
+	xerbla_("DBDSDC", &i__1);
 	return 0;
     }
 
@@ -169,64779 +279,861 @@ static real c_b9647 = 2.f;
     if (*n == 0) {
 	return 0;
     }
-    if (*m == 0) {
-	return 0;
-    }
-    if (lsame_(job, "N")) {
+    smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+    if (*n == 1) {
+	if (icompq == 1) {
+	    q[1] = d_sign(&c_b15, &d__[1]);
+	    q[smlsiz * *n + 1] = 1.;
+	} else if (icompq == 2) {
+	    u[u_dim1 + 1] = d_sign(&c_b15, &d__[1]);
+	    vt[vt_dim1 + 1] = 1.;
+	}
+	d__[1] = abs(d__[1]);
 	return 0;
     }
+    nm1 = *n - 1;
 
-    if (*ilo == *ihi) {
-	goto L30;
-    }
-
-/*     Backward balance */
-
-    if ((lsame_(job, "S")) || (lsame_(job, "B"))) {
+/*
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left
+*/
 
-	if (rightv) {
-	    i__1 = *ihi;
-	    for (i__ = *ilo; i__ <= i__1; ++i__) {
-		s = scale[i__];
-		csscal_(m, &s, &v[i__ + v_dim1], ldv);
-/* L10: */
+    wstart = 1;
+    qstart = 3;
+    if (icompq == 1) {
+	dcopy_(n, &d__[1], &c__1, &q[1], &c__1);
+	i__1 = *n - 1;
+	dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1);
+    }
+    if (iuplo == 2) {
+	qstart = 5;
+	wstart = (*n << 1) - 1;
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (icompq == 1) {
+		q[i__ + (*n << 1)] = cs;
+		q[i__ + *n * 3] = sn;
+	    } else if (icompq == 2) {
+		work[i__] = cs;
+		work[nm1 + i__] = -sn;
 	    }
+/* L10: */
 	}
+    }
 
-	if (leftv) {
-	    i__1 = *ihi;
-	    for (i__ = *ilo; i__ <= i__1; ++i__) {
-		s = 1.f / scale[i__];
-		csscal_(m, &s, &v[i__ + v_dim1], ldv);
-/* L20: */
-	    }
-	}
+/*     If ICOMPQ = 0, use DLASDQ to compute the singular values. */
 
+    if (icompq == 0) {
+	dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
+		vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
+		wstart], info);
+	goto L40;
     }
 
 /*
-       Backward permutation
-
-       For  I = ILO-1 step -1 until 1,
-                IHI+1 step 1 until N do --
+       If N is smaller than the minimum divide size SMLSIZ, then solve
+       the problem with another solver.
 */
 
-L30:
-    if ((lsame_(job, "P")) || (lsame_(job, "B"))) {
-	if (rightv) {
-	    i__1 = *n;
-	    for (ii = 1; ii <= i__1; ++ii) {
-		i__ = ii;
-		if (i__ >= *ilo && i__ <= *ihi) {
-		    goto L40;
-		}
-		if (i__ < *ilo) {
-		    i__ = *ilo - ii;
-		}
-		k = scale[i__];
-		if (k == i__) {
-		    goto L40;
-		}
-		cswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
-L40:
-		;
-	    }
-	}
-
-	if (leftv) {
-	    i__1 = *n;
-	    for (ii = 1; ii <= i__1; ++ii) {
-		i__ = ii;
-		if (i__ >= *ilo && i__ <= *ihi) {
-		    goto L50;
-		}
-		if (i__ < *ilo) {
-		    i__ = *ilo - ii;
-		}
-		k = scale[i__];
-		if (k == i__) {
-		    goto L50;
-		}
-		cswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
-L50:
-		;
-	    }
+    if (*n <= smlsiz) {
+	if (icompq == 2) {
+	    dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu);
+	    dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt);
+	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
+		    , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
+		    wstart], info);
+	} else if (icompq == 1) {
+	    iu = 1;
+	    ivt = iu + *n;
+	    dlaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n);
+	    dlaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n);
+	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (
+		    qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[
+		    iu + (qstart - 1) * *n], n, &work[wstart], info);
 	}
+	goto L40;
     }
 
-    return 0;
-
-/*     End of CGEBAK */
-
-} /* cgebak_ */
-
-/* Subroutine */ int cgebal_(char *job, integer *n, complex *a, integer *lda,
-	integer *ilo, integer *ihi, real *scale, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    real r__1, r__2;
-
-    /* Builtin functions */
-    double r_imag(complex *), c_abs(complex *);
-
-    /* Local variables */
-    static real c__, f, g;
-    static integer i__, j, k, l, m;
-    static real r__, s, ca, ra;
-    static integer ica, ira, iexc;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
-	    complex *, integer *);
-    static real sfmin1, sfmin2, sfmax1, sfmax2;
-    extern integer icamax_(integer *, complex *, integer *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
-	    *), xerbla_(char *, integer *);
-    static logical noconv;
-
+    if (icompq == 2) {
+	dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu);
+	dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt);
+    }
 
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+/*     Scale. */
 
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	return 0;
+    }
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, &
+	    ierr);
 
-    Purpose
-    =======
+    eps = EPSILON;
 
-    CGEBAL balances a general complex matrix A.  This involves, first,
-    permuting A by a similarity transformation to isolate eigenvalues
-    in the first 1 to ILO-1 and last IHI+1 to N elements on the
-    diagonal; and second, applying a diagonal similarity transformation
-    to rows and columns ILO to IHI to make the rows and columns as
-    close in norm as possible.  Both steps are optional.
+    mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) /
+	    log(2.)) + 1;
+    smlszp = smlsiz + 1;
 
-    Balancing may reduce the 1-norm of the matrix, and improve the
-    accuracy of the computed eigenvalues and/or eigenvectors.
+    if (icompq == 1) {
+	iu = 1;
+	ivt = smlsiz + 1;
+	difl = ivt + smlszp;
+	difr = difl + mlvl;
+	z__ = difr + (mlvl << 1);
+	ic = z__ + mlvl;
+	is = ic + 1;
+	poles = is + 1;
+	givnum = poles + (mlvl << 1);
 
-    Arguments
-    =========
+	k = 1;
+	givptr = 2;
+	perm = 3;
+	givcol = perm + mlvl;
+    }
 
-    JOB     (input) CHARACTER*1
-            Specifies the operations to be performed on A:
-            = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
-                    for i = 1,...,N;
-            = 'P':  permute only;
-            = 'S':  scale only;
-            = 'B':  both permute and scale.
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) < eps) {
+	    d__[i__] = d_sign(&eps, &d__[i__]);
+	}
+/* L20: */
+    }
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+    start = 1;
+    sqre = 0;
 
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the input matrix A.
-            On exit,  A is overwritten by the balanced matrix.
-            If JOB = 'N', A is not referenced.
-            See Further Details.
+    i__1 = nm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) {
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+/*
+          Subproblem found. First determine its size and then
+          apply divide and conquer on it.
+*/
 
-    ILO     (output) INTEGER
-    IHI     (output) INTEGER
-            ILO and IHI are set to integers such that on exit
-            A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
-            If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+	    if (i__ < nm1) {
 
-    SCALE   (output) REAL array, dimension (N)
-            Details of the permutations and scaling factors applied to
-            A.  If P(j) is the index of the row and column interchanged
-            with row and column j and D(j) is the scaling factor
-            applied to row and column j, then
-            SCALE(j) = P(j)    for j = 1,...,ILO-1
-                     = D(j)    for j = ILO,...,IHI
-                     = P(j)    for j = IHI+1,...,N.
-            The order in which the interchanges are made is N to IHI+1,
-            then 1 to ILO-1.
+/*        A subproblem with E(I) small for I < NM1. */
 
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+		nsize = i__ - start + 1;
+	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
 
-    Further Details
-    ===============
+/*        A subproblem with E(NM1) not too small but I = NM1. */
 
-    The permutations consist of row and column interchanges which put
-    the matrix in the form
+		nsize = *n - start + 1;
+	    } else {
 
-               ( T1   X   Y  )
-       P A P = (  0   B   Z  )
-               (  0   0   T2 )
+/*
+          A subproblem with E(NM1) small. This implies an
+          1-by-1 subproblem at D(N). Solve this 1-by-1 problem
+          first.
+*/
 
-    where T1 and T2 are upper triangular matrices whose eigenvalues lie
-    along the diagonal.  The column indices ILO and IHI mark the starting
-    and ending columns of the submatrix B. Balancing consists of applying
-    a diagonal similarity transformation inv(D) * B * D to make the
-    1-norms of each row of B and its corresponding column nearly equal.
-    The output matrix is
+		nsize = i__ - start + 1;
+		if (icompq == 2) {
+		    u[*n + *n * u_dim1] = d_sign(&c_b15, &d__[*n]);
+		    vt[*n + *n * vt_dim1] = 1.;
+		} else if (icompq == 1) {
+		    q[*n + (qstart - 1) * *n] = d_sign(&c_b15, &d__[*n]);
+		    q[*n + (smlsiz + qstart - 1) * *n] = 1.;
+		}
+		d__[*n] = (d__1 = d__[*n], abs(d__1));
+	    }
+	    if (icompq == 2) {
+		dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start +
+			start * u_dim1], ldu, &vt[start + start * vt_dim1],
+			ldvt, &smlsiz, &iwork[1], &work[wstart], info);
+	    } else {
+		dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[
+			start], &q[start + (iu + qstart - 2) * *n], n, &q[
+			start + (ivt + qstart - 2) * *n], &iq[start + k * *n],
+			 &q[start + (difl + qstart - 2) * *n], &q[start + (
+			difr + qstart - 2) * *n], &q[start + (z__ + qstart -
+			2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[
+			start + givptr * *n], &iq[start + givcol * *n], n, &
+			iq[start + perm * *n], &q[start + (givnum + qstart -
+			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
+			start + (is + qstart - 2) * *n], &work[wstart], &
+			iwork[1], info);
+		if (*info != 0) {
+		    return 0;
+		}
+	    }
+	    start = i__ + 1;
+	}
+/* L30: */
+    }
 
-       ( T1     X*D          Y    )
-       (  0  inv(D)*B*D  inv(D)*Z ).
-       (  0      0           T2   )
+/*     Unscale */
 
-    Information about the permutations P and the diagonal matrix D is
-    returned in the vector SCALE.
+    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr);
+L40:
 
-    This subroutine is based on the EISPACK routine CBAL.
+/*     Use Selection Sort to minimize swaps of singular vectors */
 
-    Modified by Tzu-Yi Chen, Computer Science Division, University of
-      California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --scale;
-
-    /* Function Body */
-    *info = 0;
-    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
-	    && ! lsame_(job, "B")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGEBAL", &i__1);
-	return 0;
-    }
-
-    k = 1;
-    l = *n;
-
-    if (*n == 0) {
-	goto L210;
-    }
-
-    if (lsame_(job, "N")) {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    scale[i__] = 1.f;
-/* L10: */
-	}
-	goto L210;
-    }
-
-    if (lsame_(job, "S")) {
-	goto L120;
-    }
-
-/*     Permutation to isolate eigenvalues if possible */
-
-    goto L50;
-
-/*     Row and column exchange. */
-
-L20:
-    scale[m] = (real) j;
-    if (j == m) {
-	goto L30;
-    }
-
-    cswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
-    i__1 = *n - k + 1;
-    cswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
-
-L30:
-    switch (iexc) {
-	case 1:  goto L40;
-	case 2:  goto L80;
-    }
-
-/*     Search for rows isolating an eigenvalue and push them down. */
-
-L40:
-    if (l == 1) {
-	goto L210;
-    }
-    --l;
-
-L50:
-    for (j = l; j >= 1; --j) {
-
-	i__1 = l;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (i__ == j) {
-		goto L60;
-	    }
-	    i__2 = j + i__ * a_dim1;
-	    if ((a[i__2].r != 0.f) || (r_imag(&a[j + i__ * a_dim1]) != 0.f)) {
-		goto L70;
+    i__1 = *n;
+    for (ii = 2; ii <= i__1; ++ii) {
+	i__ = ii - 1;
+	kk = i__;
+	p = d__[i__];
+	i__2 = *n;
+	for (j = ii; j <= i__2; ++j) {
+	    if (d__[j] > p) {
+		kk = j;
+		p = d__[j];
 	    }
-L60:
-	    ;
+/* L50: */
 	}
-
-	m = l;
-	iexc = 1;
-	goto L20;
-L70:
-	;
-    }
-
-    goto L90;
-
-/*     Search for columns isolating an eigenvalue and push them left. */
-
-L80:
-    ++k;
-
-L90:
-    i__1 = l;
-    for (j = k; j <= i__1; ++j) {
-
-	i__2 = l;
-	for (i__ = k; i__ <= i__2; ++i__) {
-	    if (i__ == j) {
-		goto L100;
-	    }
-	    i__3 = i__ + j * a_dim1;
-	    if ((a[i__3].r != 0.f) || (r_imag(&a[i__ + j * a_dim1]) != 0.f)) {
-		goto L110;
+	if (kk != i__) {
+	    d__[kk] = d__[i__];
+	    d__[i__] = p;
+	    if (icompq == 1) {
+		iq[i__] = kk;
+	    } else if (icompq == 2) {
+		dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &
+			c__1);
+		dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt);
 	    }
-L100:
-	    ;
+	} else if (icompq == 1) {
+	    iq[i__] = i__;
 	}
-
-	m = k;
-	iexc = 2;
-	goto L20;
-L110:
-	;
+/* L60: */
     }
 
-L120:
-    i__1 = l;
-    for (i__ = k; i__ <= i__1; ++i__) {
-	scale[i__] = 1.f;
-/* L130: */
-    }
+/*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */
 
-    if (lsame_(job, "P")) {
-	goto L210;
+    if (icompq == 1) {
+	if (iuplo == 1) {
+	    iq[*n] = 1;
+	} else {
+	    iq[*n] = 0;
+	}
     }
 
 /*
-       Balance the submatrix in rows K to L.
-
-       Iterative loop for norm reduction
+       If B is lower bidiagonal, update U by those Givens rotations
+       which rotated B to be upper bidiagonal
 */
 
-    sfmin1 = slamch_("S") / slamch_("P");
-    sfmax1 = 1.f / sfmin1;
-    sfmin2 = sfmin1 * 8.f;
-    sfmax2 = 1.f / sfmin2;
-L140:
-    noconv = FALSE_;
-
-    i__1 = l;
-    for (i__ = k; i__ <= i__1; ++i__) {
-	c__ = 0.f;
-	r__ = 0.f;
-
-	i__2 = l;
-	for (j = k; j <= i__2; ++j) {
-	    if (j == i__) {
-		goto L150;
-	    }
-	    i__3 = j + i__ * a_dim1;
-	    c__ += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&a[j + i__
-		    * a_dim1]), dabs(r__2));
-	    i__3 = i__ + j * a_dim1;
-	    r__ += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&a[i__ + j
-		    * a_dim1]), dabs(r__2));
-L150:
-	    ;
-	}
-	ica = icamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
-	ca = c_abs(&a[ica + i__ * a_dim1]);
-	i__2 = *n - k + 1;
-	ira = icamax_(&i__2, &a[i__ + k * a_dim1], lda);
-	ra = c_abs(&a[i__ + (ira + k - 1) * a_dim1]);
-
-/*        Guard against zero C or R due to underflow. */
-
-	if ((c__ == 0.f) || (r__ == 0.f)) {
-	    goto L200;
-	}
-	g = r__ / 8.f;
-	f = 1.f;
-	s = c__ + r__;
-L160:
-/* Computing MAX */
-	r__1 = max(f,c__);
-/* Computing MIN */
-	r__2 = min(r__,g);
-	if (((c__ >= g) || (dmax(r__1,ca) >= sfmax2)) || (dmin(r__2,ra) <=
-		sfmin2)) {
-	    goto L170;
-	}
-	f *= 8.f;
-	c__ *= 8.f;
-	ca *= 8.f;
-	r__ /= 8.f;
-	g /= 8.f;
-	ra /= 8.f;
-	goto L160;
-
-L170:
-	g = c__ / 8.f;
-L180:
-/* Computing MIN */
-	r__1 = min(f,c__), r__1 = min(r__1,g);
-	if (((g < r__) || (dmax(r__,ra) >= sfmax2)) || (dmin(r__1,ca) <=
-		sfmin2)) {
-	    goto L190;
-	}
-	f /= 8.f;
-	c__ /= 8.f;
-	g /= 8.f;
-	ca /= 8.f;
-	r__ *= 8.f;
-	ra *= 8.f;
-	goto L180;
-
-/*        Now balance. */
-
-L190:
-	if (c__ + r__ >= s * .95f) {
-	    goto L200;
-	}
-	if (f < 1.f && scale[i__] < 1.f) {
-	    if (f * scale[i__] <= sfmin1) {
-		goto L200;
-	    }
-	}
-	if (f > 1.f && scale[i__] > 1.f) {
-	    if (scale[i__] >= sfmax1 / f) {
-		goto L200;
-	    }
-	}
-	g = 1.f / f;
-	scale[i__] *= f;
-	noconv = TRUE_;
-
-	i__2 = *n - k + 1;
-	csscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
-	csscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
-
-L200:
-	;
-    }
-
-    if (noconv) {
-	goto L140;
+    if (iuplo == 2 && icompq == 2) {
+	dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu);
     }
 
-L210:
-    *ilo = k;
-    *ihi = l;
-
     return 0;
 
-/*     End of CGEBAL */
+/*     End of DBDSDC */
 
-} /* cgebal_ */
+} /* dbdsdc_ */
 
-/* Subroutine */ int cgebd2_(integer *m, integer *n, complex *a, integer *lda,
-	 real *d__, real *e, complex *tauq, complex *taup, complex *work,
-	integer *info)
+/* Subroutine */ int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer *
+	nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt,
+	integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer *
+	ldc, doublereal *work, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    complex q__1;
+    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2;
+    doublereal d__1, d__2, d__3, d__4;
 
     /* Builtin functions */
-    void r_cnjg(complex *, complex *);
+    double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign(
+	    doublereal *, doublereal *);
 
     /* Local variables */
-    static integer i__;
-    static complex alpha;
-    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
-	    , integer *, complex *, complex *, integer *, complex *),
-	    clarfg_(integer *, complex *, complex *, integer *, complex *),
-	    clacgv_(integer *, complex *, integer *), xerbla_(char *, integer
-	    *);
+    static doublereal f, g, h__;
+    static integer i__, j, m;
+    static doublereal r__, cs;
+    static integer ll;
+    static doublereal sn, mu;
+    static integer nm1, nm12, nm13, lll;
+    static doublereal eps, sll, tol, abse;
+    static integer idir;
+    static doublereal abss;
+    static integer oldm;
+    static doublereal cosl;
+    static integer isub, iter;
+    static doublereal unfl, sinl, cosr, smin, smax, sinr;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *), dlas2_(
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *), dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    static doublereal oldcs;
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *);
+    static integer oldll;
+    static doublereal shift, sigmn, oldsn;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer maxit;
+    static doublereal sminl, sigmx;
+    static logical lower;
+    extern /* Subroutine */ int dlasq1_(integer *, doublereal *, doublereal *,
+	     doublereal *, integer *), dlasv2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
+
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *), xerbla_(char *,
+	    integer *);
+    static doublereal sminoa, thresh;
+    static logical rotate;
+    static doublereal sminlo, tolmul;
 
 
 /*
     -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+       October 31, 1999
 
 
     Purpose
     =======
 
-    CGEBD2 reduces a complex general m by n matrix A to upper or lower
-    real bidiagonal form B by a unitary transformation: Q' * A * P = B.
+    DBDSQR computes the singular value decomposition (SVD) of a real
+    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
+    denotes the transpose of P), where S is a diagonal matrix with
+    non-negative diagonal elements (the singular values of B), and Q
+    and P are orthogonal matrices.
+
+    The routine computes S, and optionally computes U * Q, P' * VT,
+    or Q' * C, for given real input matrices U, VT, and C.
 
-    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+    See "Computing  Small Singular Values of Bidiagonal Matrices With
+    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+    LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
+    no. 5, pp. 873-912, Sept 1990) and
+    "Accurate singular values and differential qd algorithms," by
+    B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
+    Department, University of California at Berkeley, July 1992
+    for a detailed description of the algorithm.
 
     Arguments
     =========
 
-    M       (input) INTEGER
-            The number of rows in the matrix A.  M >= 0.
+    UPLO    (input) CHARACTER*1
+            = 'U':  B is upper bidiagonal;
+            = 'L':  B is lower bidiagonal.
 
     N       (input) INTEGER
-            The number of columns in the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the m by n general matrix to be reduced.
-            On exit,
-            if m >= n, the diagonal and the first superdiagonal are
-              overwritten with the upper bidiagonal matrix B; the
-              elements below the diagonal, with the array TAUQ, represent
-              the unitary matrix Q as a product of elementary
-              reflectors, and the elements above the first superdiagonal,
-              with the array TAUP, represent the unitary matrix P as
-              a product of elementary reflectors;
-            if m < n, the diagonal and the first subdiagonal are
-              overwritten with the lower bidiagonal matrix B; the
-              elements below the first subdiagonal, with the array TAUQ,
-              represent the unitary matrix Q as a product of
-              elementary reflectors, and the elements above the diagonal,
-              with the array TAUP, represent the unitary matrix P as
-              a product of elementary reflectors.
-            See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    D       (output) REAL array, dimension (min(M,N))
-            The diagonal elements of the bidiagonal matrix B:
-            D(i) = A(i,i).
-
-    E       (output) REAL array, dimension (min(M,N)-1)
-            The off-diagonal elements of the bidiagonal matrix B:
-            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
-            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
-
-    TAUQ    (output) COMPLEX array dimension (min(M,N))
-            The scalar factors of the elementary reflectors which
-            represent the unitary matrix Q. See Further Details.
-
-    TAUP    (output) COMPLEX array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors which
-            represent the unitary matrix P. See Further Details.
-
-    WORK    (workspace) COMPLEX array, dimension (max(M,N))
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value.
+            The order of the matrix B.  N >= 0.
 
-    Further Details
-    ===============
+    NCVT    (input) INTEGER
+            The number of columns of the matrix VT. NCVT >= 0.
 
-    The matrices Q and P are represented as products of elementary
-    reflectors:
+    NRU     (input) INTEGER
+            The number of rows of the matrix U. NRU >= 0.
 
-    If m >= n,
+    NCC     (input) INTEGER
+            The number of columns of the matrix C. NCC >= 0.
 
-       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the n diagonal elements of the bidiagonal matrix B.
+            On exit, if INFO=0, the singular values of B in decreasing
+            order.
 
-    Each H(i) and G(i) has the form:
+    E       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the elements of E contain the
+            offdiagonal elements of the bidiagonal matrix whose SVD
+            is desired. On normal exit (INFO = 0), E is destroyed.
+            If the algorithm does not converge (INFO > 0), D and E
+            will contain the diagonal and superdiagonal elements of a
+            bidiagonal matrix orthogonally equivalent to the one given
+            as input. E(N) is used for workspace.
 
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+    VT      (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
+            On entry, an N-by-NCVT matrix VT.
+            On exit, VT is overwritten by P' * VT.
+            VT is not referenced if NCVT = 0.
 
-    where tauq and taup are complex scalars, and v and u are complex
-    vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
-    A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
-    A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.
+            LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
 
-    If m < n,
+    U       (input/output) DOUBLE PRECISION array, dimension (LDU, N)
+            On entry, an NRU-by-N matrix U.
+            On exit, U is overwritten by U * Q.
+            U is not referenced if NRU = 0.
 
-       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= max(1,NRU).
 
-    Each H(i) and G(i) has the form:
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
+            On entry, an N-by-NCC matrix C.
+            On exit, C is overwritten by Q' * C.
+            C is not referenced if NCC = 0.
 
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+    LDC     (input) INTEGER
+            The leading dimension of the array C.
+            LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
 
-    where tauq and taup are complex scalars, v and u are complex vectors;
-    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
-    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
-    tauq is stored in TAUQ(i) and taup in TAUP(i).
+    WORK    (workspace) DOUBLE PRECISION array, dimension (4*N)
 
-    The contents of A on exit are illustrated by the following examples:
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  If INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm did not converge; D and E contain the
+                  elements of a bidiagonal matrix which is orthogonally
+                  similar to the input matrix B;  if INFO = i, i
+                  elements of E have not converged to zero.
 
-    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+    Internal Parameters
+    ===================
 
-      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
-      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
-      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
-      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
-      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
-      (  v1  v2  v3  v4  v5 )
+    TOLMUL  DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
+            TOLMUL controls the convergence criterion of the QR loop.
+            If it is positive, TOLMUL*EPS is the desired relative
+               precision in the computed singular values.
+            If it is negative, abs(TOLMUL*EPS*sigma_max) is the
+               desired absolute accuracy in the computed singular
+               values (corresponds to relative accuracy
+               abs(TOLMUL*EPS) in the largest singular value.
+            abs(TOLMUL) should be between 1 and 1/EPS, and preferably
+               between 10 (for fast convergence) and .1/EPS
+               (for there to be some accuracy in the results).
+            Default is to lose at either one eighth or 2 of the
+               available decimal digits in each computed singular value
+               (whichever is smaller).
 
-    where d and e denote diagonal and off-diagonal elements of B, vi
-    denotes an element of the vector defining H(i), and ui an element of
-    the vector defining G(i).
+    MAXITR  INTEGER, default = 6
+            MAXITR controls the maximum number of passes of the
+            algorithm through its inner loop. The algorithms stops
+            (and so fails to converge) if the number of passes
+            through the inner loop exceeds MAXITR*N**2.
 
     =====================================================================
 
 
-       Test the input parameters
+       Test the input parameters.
 */
 
     /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
     --d__;
     --e;
-    --tauq;
-    --taup;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
     --work;
 
     /* Function Body */
     *info = 0;
-    if (*m < 0) {
+    lower = lsame_(uplo, "L");
+    if (! lsame_(uplo, "U") && ! lower) {
 	*info = -1;
     } else if (*n < 0) {
 	*info = -2;
-    } else if (*lda < max(1,*m)) {
+    } else if (*ncvt < 0) {
+	*info = -3;
+    } else if (*nru < 0) {
 	*info = -4;
+    } else if (*ncc < 0) {
+	*info = -5;
+    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) {
+	*info = -9;
+    } else if (*ldu < max(1,*nru)) {
+	*info = -11;
+    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) {
+	*info = -13;
     }
-    if (*info < 0) {
+    if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("CGEBD2", &i__1);
+	xerbla_("DBDSQR", &i__1);
 	return 0;
     }
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	goto L160;
+    }
 
-    if (*m >= *n) {
-
-/*        Reduce to upper bidiagonal form */
+/*     ROTATE is true if any singular vectors desired, false otherwise */
 
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
+    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
 
-/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+/*     If no singular vectors desired, use qd algorithm */
 
-	    i__2 = i__ + i__ * a_dim1;
-	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-	    i__2 = *m - i__ + 1;
-/* Computing MIN */
-	    i__3 = i__ + 1;
-	    clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, &
-		    tauq[i__]);
-	    i__2 = i__;
-	    d__[i__2] = alpha.r;
-	    i__2 = i__ + i__ * a_dim1;
-	    a[i__2].r = 1.f, a[i__2].i = 0.f;
+    if (! rotate) {
+	dlasq1_(n, &d__[1], &e[1], &work[1], info);
+	return 0;
+    }
 
-/*           Apply H(i)' to A(i:m,i+1:n) from the left */
+    nm1 = *n - 1;
+    nm12 = nm1 + nm1;
+    nm13 = nm12 + nm1;
+    idir = 0;
 
-	    i__2 = *m - i__ + 1;
-	    i__3 = *n - i__;
-	    r_cnjg(&q__1, &tauq[i__]);
-	    clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &q__1,
-		     &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
-	    i__2 = i__ + i__ * a_dim1;
-	    i__3 = i__;
-	    a[i__2].r = d__[i__3], a[i__2].i = 0.f;
+/*     Get machine constants */
 
-	    if (i__ < *n) {
+    eps = EPSILON;
+    unfl = SAFEMINIMUM;
 
 /*
-                Generate elementary reflector G(i) to annihilate
-                A(i,i+2:n)
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left
 */
 
-		i__2 = *n - i__;
-		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
-		i__2 = i__ + (i__ + 1) * a_dim1;
-		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-		i__2 = *n - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
-			taup[i__]);
-		i__2 = i__;
-		e[i__2] = alpha.r;
-		i__2 = i__ + (i__ + 1) * a_dim1;
-		a[i__2].r = 1.f, a[i__2].i = 0.f;
-
-/*              Apply G(i) to A(i+1:m,i+1:n) from the right */
-
-		i__2 = *m - i__;
-		i__3 = *n - i__;
-		clarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1],
-			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
-			lda, &work[1]);
-		i__2 = *n - i__;
-		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
-		i__2 = i__ + (i__ + 1) * a_dim1;
-		i__3 = i__;
-		a[i__2].r = e[i__3], a[i__2].i = 0.f;
-	    } else {
-		i__2 = i__;
-		taup[i__2].r = 0.f, taup[i__2].i = 0.f;
-	    }
+    if (lower) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    work[i__] = cs;
+	    work[nm1 + i__] = sn;
 /* L10: */
 	}
-    } else {
 
-/*        Reduce to lower bidiagonal form */
-
-	i__1 = *m;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
-
-	    i__2 = *n - i__ + 1;
-	    clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
-	    i__2 = i__ + i__ * a_dim1;
-	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-	    i__2 = *n - i__ + 1;
-/* Computing MIN */
-	    i__3 = i__ + 1;
-	    clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
-		    taup[i__]);
-	    i__2 = i__;
-	    d__[i__2] = alpha.r;
-	    i__2 = i__ + i__ * a_dim1;
-	    a[i__2].r = 1.f, a[i__2].i = 0.f;
-
-/*           Apply G(i) to A(i+1:m,i:n) from the right */
-
-	    i__2 = *m - i__;
-	    i__3 = *n - i__ + 1;
-/* Computing MIN */
-	    i__4 = i__ + 1;
-	    clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
-		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
-	    i__2 = *n - i__ + 1;
-	    clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
-	    i__2 = i__ + i__ * a_dim1;
-	    i__3 = i__;
-	    a[i__2].r = d__[i__3], a[i__2].i = 0.f;
+/*        Update singular vectors if desired */
 
-	    if (i__ < *m) {
+	if (*nru > 0) {
+	    dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset],
+		    ldu);
+	}
+	if (*ncc > 0) {
+	    dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
+		     ldc);
+	}
+    }
 
 /*
-                Generate elementary reflector H(i) to annihilate
-                A(i+2:m,i)
-*/
+       Compute singular values to relative accuracy TOL
+       (By setting TOL to be negative, algorithm will compute
+       singular values to absolute accuracy ABS(TOL)*norm(input matrix))
 
-		i__2 = i__ + 1 + i__ * a_dim1;
-		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-		i__2 = *m - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1,
-			 &tauq[i__]);
-		i__2 = i__;
-		e[i__2] = alpha.r;
-		i__2 = i__ + 1 + i__ * a_dim1;
-		a[i__2].r = 1.f, a[i__2].i = 0.f;
+   Computing MAX
+   Computing MIN
+*/
+    d__3 = 100., d__4 = pow_dd(&eps, &c_b94);
+    d__1 = 10., d__2 = min(d__3,d__4);
+    tolmul = max(d__1,d__2);
+    tol = tolmul * eps;
 
-/*              Apply H(i)' to A(i+1:m,i+1:n) from the left */
+/*     Compute approximate maximum, minimum singular values */
 
-		i__2 = *m - i__;
-		i__3 = *n - i__;
-		r_cnjg(&q__1, &tauq[i__]);
-		clarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
-			c__1, &q__1, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &
-			work[1]);
-		i__2 = i__ + 1 + i__ * a_dim1;
-		i__3 = i__;
-		a[i__2].r = e[i__3], a[i__2].i = 0.f;
-	    } else {
-		i__2 = i__;
-		tauq[i__2].r = 0.f, tauq[i__2].i = 0.f;
-	    }
+    smax = 0.;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1));
+	smax = max(d__2,d__3);
 /* L20: */
-	}
     }
-    return 0;
-
-/*     End of CGEBD2 */
-
-} /* cgebd2_ */
-
-/* Subroutine */ int cgebrd_(integer *m, integer *n, complex *a, integer *lda,
-	 real *d__, real *e, complex *tauq, complex *taup, complex *work,
-	integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1;
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
+	smax = max(d__2,d__3);
+/* L30: */
+    }
+    sminl = 0.;
+    if (tol >= 0.) {
 
-    /* Local variables */
-    static integer i__, j, nb, nx;
-    static real ws;
-    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
-	    integer *, complex *, complex *, integer *, complex *, integer *,
-	    complex *, complex *, integer *);
-    static integer nbmin, iinfo, minmn;
-    extern /* Subroutine */ int cgebd2_(integer *, integer *, complex *,
-	    integer *, real *, real *, complex *, complex *, complex *,
-	    integer *), clabrd_(integer *, integer *, integer *, complex *,
-	    integer *, real *, real *, complex *, complex *, complex *,
-	    integer *, complex *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwrkx, ldwrky, lwkopt;
-    static logical lquery;
+/*        Relative accuracy desired */
 
+	sminoa = abs(d__[1]);
+	if (sminoa == 0.) {
+	    goto L50;
+	}
+	mu = sminoa;
+	i__1 = *n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1]
+		    , abs(d__1))));
+	    sminoa = min(sminoa,mu);
+	    if (sminoa == 0.) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+	sminoa /= sqrt((doublereal) (*n));
+/* Computing MAX */
+	d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl;
+	thresh = max(d__1,d__2);
+    } else {
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CGEBRD reduces a general complex M-by-N matrix A to upper or lower
-    bidiagonal form B by a unitary transformation: Q**H * A * P = B.
-
-    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows in the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns in the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the M-by-N general matrix to be reduced.
-            On exit,
-            if m >= n, the diagonal and the first superdiagonal are
-              overwritten with the upper bidiagonal matrix B; the
-              elements below the diagonal, with the array TAUQ, represent
-              the unitary matrix Q as a product of elementary
-              reflectors, and the elements above the first superdiagonal,
-              with the array TAUP, represent the unitary matrix P as
-              a product of elementary reflectors;
-            if m < n, the diagonal and the first subdiagonal are
-              overwritten with the lower bidiagonal matrix B; the
-              elements below the first subdiagonal, with the array TAUQ,
-              represent the unitary matrix Q as a product of
-              elementary reflectors, and the elements above the diagonal,
-              with the array TAUP, represent the unitary matrix P as
-              a product of elementary reflectors.
-            See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    D       (output) REAL array, dimension (min(M,N))
-            The diagonal elements of the bidiagonal matrix B:
-            D(i) = A(i,i).
-
-    E       (output) REAL array, dimension (min(M,N)-1)
-            The off-diagonal elements of the bidiagonal matrix B:
-            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
-            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+          Absolute accuracy desired
 
-    TAUQ    (output) COMPLEX array dimension (min(M,N))
-            The scalar factors of the elementary reflectors which
-            represent the unitary matrix Q. See Further Details.
+   Computing MAX
+*/
+	d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl;
+	thresh = max(d__1,d__2);
+    }
 
-    TAUP    (output) COMPLEX array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors which
-            represent the unitary matrix P. See Further Details.
+/*
+       Prepare for main iteration loop for the singular values
+       (MAXIT is the maximum number of passes through the inner
+       loop permitted before nonconvergence signalled.)
+*/
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+    maxit = *n * 6 * *n;
+    iter = 0;
+    oldll = -1;
+    oldm = -1;
 
-    LWORK   (input) INTEGER
-            The length of the array WORK.  LWORK >= max(1,M,N).
-            For optimum performance LWORK >= (M+N)*NB, where NB
-            is the optimal blocksize.
+/*     M points to last element of unconverged part of matrix */
 
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+    m = *n;
 
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+/*     Begin main iteration loop */
 
-    Further Details
-    ===============
+L60:
 
-    The matrices Q and P are represented as products of elementary
-    reflectors:
+/*     Check for convergence or exceeding iteration count */
 
-    If m >= n,
+    if (m <= 1) {
+	goto L160;
+    }
+    if (iter > maxit) {
+	goto L200;
+    }
 
-       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+/*     Find diagonal block of matrix to work on */
 
-    Each H(i) and G(i) has the form:
+    if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) {
+	d__[m] = 0.;
+    }
+    smax = (d__1 = d__[m], abs(d__1));
+    smin = smax;
+    i__1 = m - 1;
+    for (lll = 1; lll <= i__1; ++lll) {
+	ll = m - lll;
+	abss = (d__1 = d__[ll], abs(d__1));
+	abse = (d__1 = e[ll], abs(d__1));
+	if (tol < 0. && abss <= thresh) {
+	    d__[ll] = 0.;
+	}
+	if (abse <= thresh) {
+	    goto L80;
+	}
+	smin = min(smin,abss);
+/* Computing MAX */
+	d__1 = max(smax,abss);
+	smax = max(d__1,abse);
+/* L70: */
+    }
+    ll = 0;
+    goto L90;
+L80:
+    e[ll] = 0.;
 
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+/*     Matrix splits since E(LL) = 0 */
 
-    where tauq and taup are complex scalars, and v and u are complex
-    vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
-    A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
-    A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+    if (ll == m - 1) {
 
-    If m < n,
+/*        Convergence of bottom singular value, return to top of loop */
 
-       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+	--m;
+	goto L60;
+    }
+L90:
+    ++ll;
 
-    Each H(i) and G(i) has the form:
+/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */
 
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+    if (ll == m - 1) {
 
-    where tauq and taup are complex scalars, and v and u are complex
-    vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in
-    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in
-    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+/*        2 by 2 block, handle separately */
 
-    The contents of A on exit are illustrated by the following examples:
+	dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
+		 &sinl, &cosl);
+	d__[m - 1] = sigmx;
+	e[m - 1] = 0.;
+	d__[m] = sigmn;
 
-    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+/*        Compute singular vectors, if desired */
 
-      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
-      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
-      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
-      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
-      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
-      (  v1  v2  v3  v4  v5 )
+	if (*ncvt > 0) {
+	    drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &
+		    cosr, &sinr);
+	}
+	if (*nru > 0) {
+	    drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &
+		    c__1, &cosl, &sinl);
+	}
+	if (*ncc > 0) {
+	    drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &
+		    cosl, &sinl);
+	}
+	m += -2;
+	goto L60;
+    }
 
-    where d and e denote diagonal and off-diagonal elements of B, vi
-    denotes an element of the vector defining H(i), and ui an element of
-    the vector defining G(i).
+/*
+       If working on new submatrix, choose shift direction
+       (from larger end diagonal element towards smaller)
+*/
 
-    =====================================================================
+    if (ll > oldm || m < oldll) {
+	if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) {
 
+/*           Chase bulge from top (big end) to bottom (small end) */
 
-       Test the input parameters
-*/
+	    idir = 1;
+	} else {
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --d__;
-    --e;
-    --tauq;
-    --taup;
-    --work;
+/*           Chase bulge from bottom (big end) to top (small end) */
 
-    /* Function Body */
-    *info = 0;
-/* Computing MAX */
-    i__1 = 1, i__2 = ilaenv_(&c__1, "CGEBRD", " ", m, n, &c_n1, &c_n1, (
-	    ftnlen)6, (ftnlen)1);
-    nb = max(i__1,i__2);
-    lwkopt = (*m + *n) * nb;
-    r__1 = (real) lwkopt;
-    work[1].r = r__1, work[1].i = 0.f;
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    } else /* if(complicated condition) */ {
-/* Computing MAX */
-	i__1 = max(1,*m);
-	if (*lwork < max(i__1,*n) && ! lquery) {
-	    *info = -10;
+	    idir = 2;
 	}
     }
-    if (*info < 0) {
-	i__1 = -(*info);
-	xerbla_("CGEBRD", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    minmn = min(*m,*n);
-    if (minmn == 0) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
 
-    ws = (real) max(*m,*n);
-    ldwrkx = *m;
-    ldwrky = *n;
+/*     Apply convergence tests */
 
-    if (nb > 1 && nb < minmn) {
+    if (idir == 1) {
 
 /*
-          Set the crossover point NX.
-
-   Computing MAX
+          Run convergence test in forward direction
+          First apply standard test to bottom of matrix
 */
-	i__1 = nb, i__2 = ilaenv_(&c__3, "CGEBRD", " ", m, n, &c_n1, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
 
-/*        Determine when to switch from blocked to unblocked code. */
+	if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs(
+		d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh)
+		{
+	    e[m - 1] = 0.;
+	    goto L60;
+	}
 
-	if (nx < minmn) {
-	    ws = (real) ((*m + *n) * nb);
-	    if ((real) (*lwork) < ws) {
+	if (tol >= 0.) {
 
 /*
-                Not enough work space for the optimal NB, consider using
-                a smaller block size.
+             If relative accuracy desired,
+             apply convergence criterion forward
 */
 
-		nbmin = ilaenv_(&c__2, "CGEBRD", " ", m, n, &c_n1, &c_n1, (
-			ftnlen)6, (ftnlen)1);
-		if (*lwork >= (*m + *n) * nbmin) {
-		    nb = *lwork / (*m + *n);
-		} else {
-		    nb = 1;
-		    nx = minmn;
+	    mu = (d__1 = d__[ll], abs(d__1));
+	    sminl = mu;
+	    i__1 = m - 1;
+	    for (lll = ll; lll <= i__1; ++lll) {
+		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
+		    e[lll] = 0.;
+		    goto L60;
 		}
+		sminlo = sminl;
+		mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[
+			lll], abs(d__1))));
+		sminl = min(sminl,mu);
+/* L100: */
 	    }
 	}
-    } else {
-	nx = minmn;
-    }
 
-    i__1 = minmn - nx;
-    i__2 = nb;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+    } else {
 
 /*
-          Reduce rows and columns i:i+ib-1 to bidiagonal form and return
-          the matrices X and Y which are needed to update the unreduced
-          part of the matrix
+          Run convergence test in backward direction
+          First apply standard test to top of matrix
 */
 
-	i__3 = *m - i__ + 1;
-	i__4 = *n - i__ + 1;
-	clabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
-		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx
-		* nb + 1], &ldwrky);
+	if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1)
+		) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) {
+	    e[ll] = 0.;
+	    goto L60;
+	}
+
+	if (tol >= 0.) {
 
 /*
-          Update the trailing submatrix A(i+ib:m,i+ib:n), using
-          an update of the form  A := A - V*Y' - X*U'
+             If relative accuracy desired,
+             apply convergence criterion backward
 */
 
-	i__3 = *m - i__ - nb + 1;
-	i__4 = *n - i__ - nb + 1;
-	q__1.r = -1.f, q__1.i = -0.f;
-	cgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, &
-		q__1, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb +
-		nb + 1], &ldwrky, &c_b56, &a[i__ + nb + (i__ + nb) * a_dim1],
-		lda);
-	i__3 = *m - i__ - nb + 1;
-	i__4 = *n - i__ - nb + 1;
-	q__1.r = -1.f, q__1.i = -0.f;
-	cgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &q__1, &
-		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
-		c_b56, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
-
-/*        Copy diagonal and off-diagonal elements of B back into A */
-
-	if (*m >= *n) {
-	    i__3 = i__ + nb - 1;
-	    for (j = i__; j <= i__3; ++j) {
-		i__4 = j + j * a_dim1;
-		i__5 = j;
-		a[i__4].r = d__[i__5], a[i__4].i = 0.f;
-		i__4 = j + (j + 1) * a_dim1;
-		i__5 = j;
-		a[i__4].r = e[i__5], a[i__4].i = 0.f;
-/* L10: */
-	    }
-	} else {
-	    i__3 = i__ + nb - 1;
-	    for (j = i__; j <= i__3; ++j) {
-		i__4 = j + j * a_dim1;
-		i__5 = j;
-		a[i__4].r = d__[i__5], a[i__4].i = 0.f;
-		i__4 = j + 1 + j * a_dim1;
-		i__5 = j;
-		a[i__4].r = e[i__5], a[i__4].i = 0.f;
-/* L20: */
+	    mu = (d__1 = d__[m], abs(d__1));
+	    sminl = mu;
+	    i__1 = ll;
+	    for (lll = m - 1; lll >= i__1; --lll) {
+		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
+		    e[lll] = 0.;
+		    goto L60;
+		}
+		sminlo = sminl;
+		mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll]
+			, abs(d__1))));
+		sminl = min(sminl,mu);
+/* L110: */
 	    }
 	}
-/* L30: */
     }
+    oldll = ll;
+    oldm = m;
 
-/*     Use unblocked code to reduce the remainder of the matrix */
+/*
+       Compute shift.  First, test if shifting would ruin relative
+       accuracy, and if so set the shift to zero.
 
-    i__2 = *m - i__ + 1;
-    i__1 = *n - i__ + 1;
-    cgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
-	    tauq[i__], &taup[i__], &work[1], &iinfo);
-    work[1].r = ws, work[1].i = 0.f;
-    return 0;
+   Computing MAX
+*/
+    d__1 = eps, d__2 = tol * .01;
+    if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) {
 
-/*     End of CGEBRD */
+/*        Use a zero shift to avoid loss of relative accuracy */
 
-} /* cgebrd_ */
+	shift = 0.;
+    } else {
 
-/* Subroutine */ int cgeev_(char *jobvl, char *jobvr, integer *n, complex *a,
-	integer *lda, complex *w, complex *vl, integer *ldvl, complex *vr,
-	integer *ldvr, complex *work, integer *lwork, real *rwork, integer *
-	info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
-	    i__2, i__3, i__4;
-    real r__1, r__2;
-    complex q__1, q__2;
+/*        Compute the shift from 2-by-2 block at end of matrix */
 
-    /* Builtin functions */
-    double sqrt(doublereal), r_imag(complex *);
-    void r_cnjg(complex *, complex *);
+	if (idir == 1) {
+	    sll = (d__1 = d__[ll], abs(d__1));
+	    dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
+	} else {
+	    sll = (d__1 = d__[m], abs(d__1));
+	    dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
+	}
 
-    /* Local variables */
-    static integer i__, k, ihi;
-    static real scl;
-    static integer ilo;
-    static real dum[1], eps;
-    static complex tmp;
-    static integer ibal;
-    static char side[1];
-    static integer maxb;
-    static real anrm;
-    static integer ierr, itau, iwrk, nout;
-    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern doublereal scnrm2_(integer *, complex *, integer *);
-    extern /* Subroutine */ int cgebak_(char *, char *, integer *, integer *,
-	    integer *, real *, integer *, complex *, integer *, integer *), cgebal_(char *, integer *, complex *, integer *,
-	    integer *, integer *, real *, integer *), slabad_(real *,
-	    real *);
-    static logical scalea;
-    extern doublereal clange_(char *, integer *, integer *, complex *,
-	    integer *, real *);
-    static real cscale;
-    extern /* Subroutine */ int cgehrd_(integer *, integer *, integer *,
-	    complex *, integer *, complex *, complex *, integer *, integer *),
-	     clascl_(char *, integer *, integer *, real *, real *, integer *,
-	    integer *, complex *, integer *, integer *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
-	    *), clacpy_(char *, integer *, integer *, complex *, integer *,
-	    complex *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static logical select[1];
-    static real bignum;
-    extern integer isamax_(integer *, real *, integer *);
-    extern /* Subroutine */ int chseqr_(char *, char *, integer *, integer *,
-	    integer *, complex *, integer *, complex *, complex *, integer *,
-	    complex *, integer *, integer *), ctrevc_(char *,
-	    char *, logical *, integer *, complex *, integer *, complex *,
-	    integer *, complex *, integer *, integer *, integer *, complex *,
-	    real *, integer *), cunghr_(integer *, integer *,
-	    integer *, complex *, integer *, complex *, complex *, integer *,
-	    integer *);
-    static integer minwrk, maxwrk;
-    static logical wantvl;
-    static real smlnum;
-    static integer hswork, irwork;
-    static logical lquery, wantvr;
+/*        Test if shift negligible, and if so set to zero */
 
+	if (sll > 0.) {
+/* Computing 2nd power */
+	    d__1 = shift / sll;
+	    if (d__1 * d__1 < eps) {
+		shift = 0.;
+	    }
+	}
+    }
 
-/*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+/*     Increment iteration count */
 
+    iter = iter + m - ll;
 
-    Purpose
-    =======
+/*     If SHIFT = 0, do simplified QR iteration */
 
-    CGEEV computes for an N-by-N complex nonsymmetric matrix A, the
-    eigenvalues and, optionally, the left and/or right eigenvectors.
-
-    The right eigenvector v(j) of A satisfies
-                     A * v(j) = lambda(j) * v(j)
-    where lambda(j) is its eigenvalue.
-    The left eigenvector u(j) of A satisfies
-                  u(j)**H * A = lambda(j) * u(j)**H
-    where u(j)**H denotes the conjugate transpose of u(j).
-
-    The computed eigenvectors are normalized to have Euclidean norm
-    equal to 1 and largest component real.
-
-    Arguments
-    =========
-
-    JOBVL   (input) CHARACTER*1
-            = 'N': left eigenvectors of A are not computed;
-            = 'V': left eigenvectors of are computed.
-
-    JOBVR   (input) CHARACTER*1
-            = 'N': right eigenvectors of A are not computed;
-            = 'V': right eigenvectors of A are computed.
-
-    N       (input) INTEGER
-            The order of the matrix A. N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the N-by-N matrix A.
-            On exit, A has been overwritten.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    W       (output) COMPLEX array, dimension (N)
-            W contains the computed eigenvalues.
-
-    VL      (output) COMPLEX array, dimension (LDVL,N)
-            If JOBVL = 'V', the left eigenvectors u(j) are stored one
-            after another in the columns of VL, in the same order
-            as their eigenvalues.
-            If JOBVL = 'N', VL is not referenced.
-            u(j) = VL(:,j), the j-th column of VL.
-
-    LDVL    (input) INTEGER
-            The leading dimension of the array VL.  LDVL >= 1; if
-            JOBVL = 'V', LDVL >= N.
-
-    VR      (output) COMPLEX array, dimension (LDVR,N)
-            If JOBVR = 'V', the right eigenvectors v(j) are stored one
-            after another in the columns of VR, in the same order
-            as their eigenvalues.
-            If JOBVR = 'N', VR is not referenced.
-            v(j) = VR(:,j), the j-th column of VR.
-
-    LDVR    (input) INTEGER
-            The leading dimension of the array VR.  LDVR >= 1; if
-            JOBVR = 'V', LDVR >= N.
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,2*N).
-            For good performance, LWORK must generally be larger.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    RWORK   (workspace) REAL array, dimension (2*N)
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = i, the QR algorithm failed to compute all the
-                  eigenvalues, and no eigenvectors have been computed;
-                  elements and i+1:N of W contain eigenvalues which have
-                  converged.
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --w;
-    vl_dim1 = *ldvl;
-    vl_offset = 1 + vl_dim1;
-    vl -= vl_offset;
-    vr_dim1 = *ldvr;
-    vr_offset = 1 + vr_dim1;
-    vr -= vr_offset;
-    --work;
-    --rwork;
-
-    /* Function Body */
-    *info = 0;
-    lquery = *lwork == -1;
-    wantvl = lsame_(jobvl, "V");
-    wantvr = lsame_(jobvr, "V");
-    if (! wantvl && ! lsame_(jobvl, "N")) {
-	*info = -1;
-    } else if (! wantvr && ! lsame_(jobvr, "N")) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if ((*ldvl < 1) || (wantvl && *ldvl < *n)) {
-	*info = -8;
-    } else if ((*ldvr < 1) || (wantvr && *ldvr < *n)) {
-	*info = -10;
-    }
-
-/*
-       Compute workspace
-        (Note: Comments in the code beginning "Workspace:" describe the
-         minimal amount of workspace needed at that point in the code,
-         as well as the preferred amount for good performance.
-         CWorkspace refers to complex workspace, and RWorkspace to real
-         workspace. NB refers to the optimal block size for the
-         immediately following subroutine, as returned by ILAENV.
-         HSWORK refers to the workspace preferred by CHSEQR, as
-         calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
-         the worst case.)
-*/
-
-    minwrk = 1;
-    if (*info == 0 && ((*lwork >= 1) || (lquery))) {
-	maxwrk = *n + *n * ilaenv_(&c__1, "CGEHRD", " ", n, &c__1, n, &c__0, (
-		ftnlen)6, (ftnlen)1);
-	if (! wantvl && ! wantvr) {
-/* Computing MAX */
-	    i__1 = 1, i__2 = (*n) << (1);
-	    minwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "CHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "CHSEQR", "EN", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
-/* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = (*n) << (1);
-	    hswork = max(i__1,i__2);
-	    maxwrk = max(maxwrk,hswork);
-	} else {
-/* Computing MAX */
-	    i__1 = 1, i__2 = (*n) << (1);
-	    minwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "CUNGHR",
-		    " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "CHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "CHSEQR", "SV", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
-/* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = (*n) << (1);
-	    hswork = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = max(maxwrk,hswork), i__2 = (*n) << (1);
-	    maxwrk = max(i__1,i__2);
-	}
-	work[1].r = (real) maxwrk, work[1].i = 0.f;
-    }
-    if (*lwork < minwrk && ! lquery) {
-	*info = -12;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGEEV ", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Get machine constants */
-
-    eps = slamch_("P");
-    smlnum = slamch_("S");
-    bignum = 1.f / smlnum;
-    slabad_(&smlnum, &bignum);
-    smlnum = sqrt(smlnum) / eps;
-    bignum = 1.f / smlnum;
-
-/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
-
-    anrm = clange_("M", n, n, &a[a_offset], lda, dum);
-    scalea = FALSE_;
-    if (anrm > 0.f && anrm < smlnum) {
-	scalea = TRUE_;
-	cscale = smlnum;
-    } else if (anrm > bignum) {
-	scalea = TRUE_;
-	cscale = bignum;
-    }
-    if (scalea) {
-	clascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
-		ierr);
-    }
-
-/*
-       Balance the matrix
-       (CWorkspace: none)
-       (RWorkspace: need N)
-*/
-
-    ibal = 1;
-    cgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &rwork[ibal], &ierr);
-
-/*
-       Reduce to upper Hessenberg form
-       (CWorkspace: need 2*N, prefer N+N*NB)
-       (RWorkspace: none)
-*/
-
-    itau = 1;
-    iwrk = itau + *n;
-    i__1 = *lwork - iwrk + 1;
-    cgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
-	     &ierr);
-
-    if (wantvl) {
-
-/*
-          Want left eigenvectors
-          Copy Householder vectors to VL
-*/
-
-	*(unsigned char *)side = 'L';
-	clacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
-		;
-
-/*
-          Generate unitary matrix in VL
-          (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
-          (RWorkspace: none)
-*/
-
-	i__1 = *lwork - iwrk + 1;
-	cunghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
-		 &i__1, &ierr);
-
-/*
-          Perform QR iteration, accumulating Schur vectors in VL
-          (CWorkspace: need 1, prefer HSWORK (see comments) )
-          (RWorkspace: none)
-*/
-
-	iwrk = itau;
-	i__1 = *lwork - iwrk + 1;
-	chseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vl[
-		vl_offset], ldvl, &work[iwrk], &i__1, info);
-
-	if (wantvr) {
-
-/*
-             Want left and right eigenvectors
-             Copy Schur vectors to VR
-*/
-
-	    *(unsigned char *)side = 'B';
-	    clacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
-	}
-
-    } else if (wantvr) {
-
-/*
-          Want right eigenvectors
-          Copy Householder vectors to VR
-*/
-
-	*(unsigned char *)side = 'R';
-	clacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
-		;
-
-/*
-          Generate unitary matrix in VR
-          (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
-          (RWorkspace: none)
-*/
-
-	i__1 = *lwork - iwrk + 1;
-	cunghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
-		 &i__1, &ierr);
-
-/*
-          Perform QR iteration, accumulating Schur vectors in VR
-          (CWorkspace: need 1, prefer HSWORK (see comments) )
-          (RWorkspace: none)
-*/
-
-	iwrk = itau;
-	i__1 = *lwork - iwrk + 1;
-	chseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[
-		vr_offset], ldvr, &work[iwrk], &i__1, info);
-
-    } else {
-
-/*
-          Compute eigenvalues only
-          (CWorkspace: need 1, prefer HSWORK (see comments) )
-          (RWorkspace: none)
-*/
-
-	iwrk = itau;
-	i__1 = *lwork - iwrk + 1;
-	chseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[
-		vr_offset], ldvr, &work[iwrk], &i__1, info);
-    }
-
-/*     If INFO > 0 from CHSEQR, then quit */
-
-    if (*info > 0) {
-	goto L50;
-    }
-
-    if ((wantvl) || (wantvr)) {
-
-/*
-          Compute left and/or right eigenvectors
-          (CWorkspace: need 2*N)
-          (RWorkspace: need 2*N)
-*/
-
-	irwork = ibal + *n;
-	ctrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl,
-		 &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &rwork[irwork],
-		&ierr);
-    }
-
-    if (wantvl) {
-
-/*
-          Undo balancing of left eigenvectors
-          (CWorkspace: none)
-          (RWorkspace: need N)
-*/
-
-	cgebak_("B", "L", n, &ilo, &ihi, &rwork[ibal], n, &vl[vl_offset],
-		ldvl, &ierr);
-
-/*        Normalize left eigenvectors and make largest component real */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    scl = 1.f / scnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
-	    csscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
-	    i__2 = *n;
-	    for (k = 1; k <= i__2; ++k) {
-		i__3 = k + i__ * vl_dim1;
-/* Computing 2nd power */
-		r__1 = vl[i__3].r;
-/* Computing 2nd power */
-		r__2 = r_imag(&vl[k + i__ * vl_dim1]);
-		rwork[irwork + k - 1] = r__1 * r__1 + r__2 * r__2;
-/* L10: */
-	    }
-	    k = isamax_(n, &rwork[irwork], &c__1);
-	    r_cnjg(&q__2, &vl[k + i__ * vl_dim1]);
-	    r__1 = sqrt(rwork[irwork + k - 1]);
-	    q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1;
-	    tmp.r = q__1.r, tmp.i = q__1.i;
-	    cscal_(n, &tmp, &vl[i__ * vl_dim1 + 1], &c__1);
-	    i__2 = k + i__ * vl_dim1;
-	    i__3 = k + i__ * vl_dim1;
-	    r__1 = vl[i__3].r;
-	    q__1.r = r__1, q__1.i = 0.f;
-	    vl[i__2].r = q__1.r, vl[i__2].i = q__1.i;
-/* L20: */
-	}
-    }
-
-    if (wantvr) {
-
-/*
-          Undo balancing of right eigenvectors
-          (CWorkspace: none)
-          (RWorkspace: need N)
-*/
-
-	cgebak_("B", "R", n, &ilo, &ihi, &rwork[ibal], n, &vr[vr_offset],
-		ldvr, &ierr);
-
-/*        Normalize right eigenvectors and make largest component real */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    scl = 1.f / scnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
-	    csscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
-	    i__2 = *n;
-	    for (k = 1; k <= i__2; ++k) {
-		i__3 = k + i__ * vr_dim1;
-/* Computing 2nd power */
-		r__1 = vr[i__3].r;
-/* Computing 2nd power */
-		r__2 = r_imag(&vr[k + i__ * vr_dim1]);
-		rwork[irwork + k - 1] = r__1 * r__1 + r__2 * r__2;
-/* L30: */
-	    }
-	    k = isamax_(n, &rwork[irwork], &c__1);
-	    r_cnjg(&q__2, &vr[k + i__ * vr_dim1]);
-	    r__1 = sqrt(rwork[irwork + k - 1]);
-	    q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1;
-	    tmp.r = q__1.r, tmp.i = q__1.i;
-	    cscal_(n, &tmp, &vr[i__ * vr_dim1 + 1], &c__1);
-	    i__2 = k + i__ * vr_dim1;
-	    i__3 = k + i__ * vr_dim1;
-	    r__1 = vr[i__3].r;
-	    q__1.r = r__1, q__1.i = 0.f;
-	    vr[i__2].r = q__1.r, vr[i__2].i = q__1.i;
-/* L40: */
-	}
-    }
-
-/*     Undo scaling if necessary */
-
-L50:
-    if (scalea) {
-	i__1 = *n - *info;
-/* Computing MAX */
-	i__3 = *n - *info;
-	i__2 = max(i__3,1);
-	clascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[*info + 1]
-		, &i__2, &ierr);
-	if (*info > 0) {
-	    i__1 = ilo - 1;
-	    clascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[1], n,
-		     &ierr);
-	}
-    }
-
-    work[1].r = (real) maxwrk, work[1].i = 0.f;
-    return 0;
-
-/*     End of CGEEV */
-
-} /* cgeev_ */
-
-/* Subroutine */ int cgehd2_(integer *n, integer *ilo, integer *ihi, complex *
-	a, integer *lda, complex *tau, complex *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    complex q__1;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__;
-    static complex alpha;
-    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
-	    , integer *, complex *, complex *, integer *, complex *),
-	    clarfg_(integer *, complex *, complex *, integer *, complex *),
-	    xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CGEHD2 reduces a complex general matrix A to upper Hessenberg form H
-    by a unitary similarity transformation:  Q' * A * Q = H .
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that A is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to CGEBAL; otherwise they should be
-            set to 1 and N respectively. See Further Details.
-            1 <= ILO <= IHI <= max(1,N).
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the n by n general matrix to be reduced.
-            On exit, the upper triangle and the first subdiagonal of A
-            are overwritten with the upper Hessenberg matrix H, and the
-            elements below the first subdiagonal, with the array TAU,
-            represent the unitary matrix Q as a product of elementary
-            reflectors. See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    TAU     (output) COMPLEX array, dimension (N-1)
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace) COMPLEX array, dimension (N)
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of (ihi-ilo) elementary
-    reflectors
-
-       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
-    exit in A(i+2:ihi,i), and tau in TAU(i).
-
-    The contents of A are illustrated by the following example, with
-    n = 7, ilo = 2 and ihi = 6:
-
-    on entry,                        on exit,
-
-    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
-    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
-    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
-    (                         a )    (                          a )
-
-    where a denotes an element of the original matrix A, h denotes a
-    modified element of the upper Hessenberg matrix H, and vi denotes an
-    element of the vector defining H(i).
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    if (*n < 0) {
-	*info = -1;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
-	*info = -2;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGEHD2", &i__1);
-	return 0;
-    }
-
-    i__1 = *ihi - 1;
-    for (i__ = *ilo; i__ <= i__1; ++i__) {
-
-/*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
-
-	i__2 = i__ + 1 + i__ * a_dim1;
-	alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-	i__2 = *ihi - i__;
-/* Computing MIN */
-	i__3 = i__ + 2;
-	clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[
-		i__]);
-	i__2 = i__ + 1 + i__ * a_dim1;
-	a[i__2].r = 1.f, a[i__2].i = 0.f;
-
-/*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
-
-	i__2 = *ihi - i__;
-	clarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
-		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
-
-/*        Apply H(i)' to A(i+1:ihi,i+1:n) from the left */
-
-	i__2 = *ihi - i__;
-	i__3 = *n - i__;
-	r_cnjg(&q__1, &tau[i__]);
-	clarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &q__1,
-		 &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
-
-	i__2 = i__ + 1 + i__ * a_dim1;
-	a[i__2].r = alpha.r, a[i__2].i = alpha.i;
-/* L10: */
-    }
-
-    return 0;
-
-/*     End of CGEHD2 */
-
-} /* cgehd2_ */
-
-/* Subroutine */ int cgehrd_(integer *n, integer *ilo, integer *ihi, complex *
-	a, integer *lda, complex *tau, complex *work, integer *lwork, integer
-	*info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    complex q__1;
-
-    /* Local variables */
-    static integer i__;
-    static complex t[4160]	/* was [65][64] */;
-    static integer ib;
-    static complex ei;
-    static integer nb, nh, nx, iws;
-    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
-	    integer *, complex *, complex *, integer *, complex *, integer *,
-	    complex *, complex *, integer *);
-    static integer nbmin, iinfo;
-    extern /* Subroutine */ int cgehd2_(integer *, integer *, integer *,
-	    complex *, integer *, complex *, complex *, integer *), clarfb_(
-	    char *, char *, char *, char *, integer *, integer *, integer *,
-	    complex *, integer *, complex *, integer *, complex *, integer *,
-	    complex *, integer *), clahrd_(
-	    integer *, integer *, integer *, complex *, integer *, complex *,
-	    complex *, integer *, complex *, integer *), xerbla_(char *,
-	    integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CGEHRD reduces a complex general matrix A to upper Hessenberg form H
-    by a unitary similarity transformation:  Q' * A * Q = H .
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that A is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to CGEBAL; otherwise they should be
-            set to 1 and N respectively. See Further Details.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the N-by-N general matrix to be reduced.
-            On exit, the upper triangle and the first subdiagonal of A
-            are overwritten with the upper Hessenberg matrix H, and the
-            elements below the first subdiagonal, with the array TAU,
-            represent the unitary matrix Q as a product of elementary
-            reflectors. See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    TAU     (output) COMPLEX array, dimension (N-1)
-            The scalar factors of the elementary reflectors (see Further
-            Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
-            zero.
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The length of the array WORK.  LWORK >= max(1,N).
-            For optimum performance LWORK >= N*NB, where NB is the
-            optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of (ihi-ilo) elementary
-    reflectors
-
-       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
-    exit in A(i+2:ihi,i), and tau in TAU(i).
-
-    The contents of A are illustrated by the following example, with
-    n = 7, ilo = 2 and ihi = 6:
-
-    on entry,                        on exit,
-
-    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
-    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
-    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
-    (                         a )    (                          a )
-
-    where a denotes an element of the original matrix A, h denotes a
-    modified element of the upper Hessenberg matrix H, and vi denotes an
-    element of the vector defining H(i).
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-/* Computing MIN */
-    i__1 = 64, i__2 = ilaenv_(&c__1, "CGEHRD", " ", n, ilo, ihi, &c_n1, (
-	    ftnlen)6, (ftnlen)1);
-    nb = min(i__1,i__2);
-    lwkopt = *n * nb;
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    lquery = *lwork == -1;
-    if (*n < 0) {
-	*info = -1;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
-	*info = -2;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*lwork < max(1,*n) && ! lquery) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGEHRD", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
-
-    i__1 = *ilo - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	tau[i__2].r = 0.f, tau[i__2].i = 0.f;
-/* L10: */
-    }
-    i__1 = *n - 1;
-    for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
-	i__2 = i__;
-	tau[i__2].r = 0.f, tau[i__2].i = 0.f;
-/* L20: */
-    }
-
-/*     Quick return if possible */
-
-    nh = *ihi - *ilo + 1;
-    if (nh <= 1) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
-
-    nbmin = 2;
-    iws = 1;
-    if (nb > 1 && nb < nh) {
-
-/*
-          Determine when to cross over from blocked to unblocked code
-          (last block is always handled by unblocked code).
-
-   Computing MAX
-*/
-	i__1 = nb, i__2 = ilaenv_(&c__3, "CGEHRD", " ", n, ilo, ihi, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < nh) {
-
-/*           Determine if workspace is large enough for blocked code. */
-
-	    iws = *n * nb;
-	    if (*lwork < iws) {
-
-/*
-                Not enough workspace to use optimal NB:  determine the
-                minimum value of NB, and reduce NB or force use of
-                unblocked code.
-
-   Computing MAX
-*/
-		i__1 = 2, i__2 = ilaenv_(&c__2, "CGEHRD", " ", n, ilo, ihi, &
-			c_n1, (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-		if (*lwork >= *n * nbmin) {
-		    nb = *lwork / *n;
-		} else {
-		    nb = 1;
-		}
-	    }
-	}
-    }
-    ldwork = *n;
-
-    if ((nb < nbmin) || (nb >= nh)) {
-
-/*        Use unblocked code below */
-
-	i__ = *ilo;
-
-    } else {
-
-/*        Use blocked code */
-
-	i__1 = *ihi - 1 - nx;
-	i__2 = nb;
-	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__3 = nb, i__4 = *ihi - i__;
-	    ib = min(i__3,i__4);
-
-/*
-             Reduce columns i:i+ib-1 to Hessenberg form, returning the
-             matrices V and T of the block reflector H = I - V*T*V'
-             which performs the reduction, and also the matrix Y = A*V*T
-*/
-
-	    clahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
-		    c__65, &work[1], &ldwork);
-
-/*
-             Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
-             right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
-             to 1.
-*/
-
-	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
-	    ei.r = a[i__3].r, ei.i = a[i__3].i;
-	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
-	    a[i__3].r = 1.f, a[i__3].i = 0.f;
-	    i__3 = *ihi - i__ - ib + 1;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    cgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, &
-		    q__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda,
-		     &c_b56, &a[(i__ + ib) * a_dim1 + 1], lda);
-	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
-	    a[i__3].r = ei.r, a[i__3].i = ei.i;
-
-/*
-             Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
-             left
-*/
-
-	    i__3 = *ihi - i__;
-	    i__4 = *n - i__ - ib + 1;
-	    clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise", &
-		    i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &
-		    c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &
-		    ldwork);
-/* L30: */
-	}
-    }
-
-/*     Use unblocked code to reduce the rest of the matrix */
-
-    cgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
-    work[1].r = (real) iws, work[1].i = 0.f;
-
-    return 0;
-
-/*     End of CGEHRD */
-
-} /* cgehrd_ */
-
-/* Subroutine */ int cgelq2_(integer *m, integer *n, complex *a, integer *lda,
-	 complex *tau, complex *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, k;
-    static complex alpha;
-    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
-	    , integer *, complex *, complex *, integer *, complex *),
-	    clarfg_(integer *, complex *, complex *, integer *, complex *),
-	    clacgv_(integer *, complex *, integer *), xerbla_(char *, integer
-	    *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CGELQ2 computes an LQ factorization of a complex m by n matrix A:
-    A = L * Q.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the m by n matrix A.
-            On exit, the elements on and below the diagonal of the array
-            contain the m by min(m,n) lower trapezoidal matrix L (L is
-            lower triangular if m <= n); the elements above the diagonal,
-            with the array TAU, represent the unitary matrix Q as a
-            product of elementary reflectors (see Further Details).
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    TAU     (output) COMPLEX array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace) COMPLEX array, dimension (M)
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of elementary reflectors
-
-       Q = H(k)' . . . H(2)' H(1)', where k = min(m,n).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
-    A(i,i+1:n), and tau in TAU(i).
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGELQ2", &i__1);
-	return 0;
-    }
-
-    k = min(*m,*n);
-
-    i__1 = k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
-
-	i__2 = *n - i__ + 1;
-	clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
-	i__2 = i__ + i__ * a_dim1;
-	alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-	i__2 = *n - i__ + 1;
-/* Computing MIN */
-	i__3 = i__ + 1;
-	clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &tau[i__]
-		);
-	if (i__ < *m) {
-
-/*           Apply H(i) to A(i+1:m,i:n) from the right */
-
-	    i__2 = i__ + i__ * a_dim1;
-	    a[i__2].r = 1.f, a[i__2].i = 0.f;
-	    i__2 = *m - i__;
-	    i__3 = *n - i__ + 1;
-	    clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
-		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
-	}
-	i__2 = i__ + i__ * a_dim1;
-	a[i__2].r = alpha.r, a[i__2].i = alpha.i;
-	i__2 = *n - i__ + 1;
-	clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
-/* L10: */
-    }
-    return 0;
-
-/*     End of CGELQ2 */
-
-} /* cgelq2_ */
-
-/* Subroutine */ int cgelqf_(integer *m, integer *n, complex *a, integer *lda,
-	 complex *tau, complex *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int cgelq2_(integer *, integer *, complex *,
-	    integer *, complex *, complex *, integer *), clarfb_(char *, char
-	    *, char *, char *, integer *, integer *, integer *, complex *,
-	    integer *, complex *, integer *, complex *, integer *, complex *,
-	    integer *), clarft_(char *, char *
-	    , integer *, integer *, complex *, integer *, complex *, complex *
-	    , integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CGELQF computes an LQ factorization of a complex M-by-N matrix A:
-    A = L * Q.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the M-by-N matrix A.
-            On exit, the elements on and below the diagonal of the array
-            contain the m-by-min(m,n) lower trapezoidal matrix L (L is
-            lower triangular if m <= n); the elements above the diagonal,
-            with the array TAU, represent the unitary matrix Q as a
-            product of elementary reflectors (see Further Details).
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    TAU     (output) COMPLEX array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,M).
-            For optimum performance LWORK >= M*NB, where NB is the
-            optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of elementary reflectors
-
-       Q = H(k)' . . . H(2)' H(1)', where k = min(m,n).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
-    A(i,i+1:n), and tau in TAU(i).
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    nb = ilaenv_(&c__1, "CGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
-	    1);
-    lwkopt = *m * nb;
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    } else if (*lwork < max(1,*m) && ! lquery) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGELQF", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    k = min(*m,*n);
-    if (k == 0) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
-
-    nbmin = 2;
-    nx = 0;
-    iws = *m;
-    if (nb > 1 && nb < k) {
-
-/*
-          Determine when to cross over from blocked to unblocked code.
-
-   Computing MAX
-*/
-	i__1 = 0, i__2 = ilaenv_(&c__3, "CGELQF", " ", m, n, &c_n1, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < k) {
-
-/*           Determine if workspace is large enough for blocked code. */
-
-	    ldwork = *m;
-	    iws = ldwork * nb;
-	    if (*lwork < iws) {
-
-/*
-                Not enough workspace to use optimal NB:  reduce NB and
-                determine the minimum value of NB.
-*/
-
-		nb = *lwork / ldwork;
-/* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "CGELQF", " ", m, n, &c_n1, &
-			c_n1, (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-	    }
-	}
-    }
-
-    if (nb >= nbmin && nb < k && nx < k) {
-
-/*        Use blocked code initially */
-
-	i__1 = k - nx;
-	i__2 = nb;
-	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__3 = k - i__ + 1;
-	    ib = min(i__3,nb);
-
-/*
-             Compute the LQ factorization of the current block
-             A(i:i+ib-1,i:n)
-*/
-
-	    i__3 = *n - i__ + 1;
-	    cgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
-		    1], &iinfo);
-	    if (i__ + ib <= *m) {
-
-/*
-                Form the triangular factor of the block reflector
-                H = H(i) H(i+1) . . . H(i+ib-1)
-*/
-
-		i__3 = *n - i__ + 1;
-		clarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ *
-			a_dim1], lda, &tau[i__], &work[1], &ldwork);
-
-/*              Apply H to A(i+ib:m,i:n) from the right */
-
-		i__3 = *m - i__ - ib + 1;
-		i__4 = *n - i__ + 1;
-		clarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
-			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
-			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
-			1], &ldwork);
-	    }
-/* L10: */
-	}
-    } else {
-	i__ = 1;
-    }
-
-/*     Use unblocked code to factor the last or only block. */
-
-    if (i__ <= k) {
-	i__2 = *m - i__ + 1;
-	i__1 = *n - i__ + 1;
-	cgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
-		, &iinfo);
-    }
-
-    work[1].r = (real) iws, work[1].i = 0.f;
-    return 0;
-
-/*     End of CGELQF */
-
-} /* cgelqf_ */
-
-/* Subroutine */ int cgelsd_(integer *m, integer *n, integer *nrhs, complex *
-	a, integer *lda, complex *b, integer *ldb, real *s, real *rcond,
-	integer *rank, complex *work, integer *lwork, real *rwork, integer *
-	iwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
-    real r__1;
-    complex q__1;
-
-    /* Local variables */
-    static integer ie, il, mm;
-    static real eps, anrm, bnrm;
-    static integer itau, iascl, ibscl;
-    static real sfmin;
-    static integer minmn, maxmn, itaup, itauq, mnthr, nwork;
-    extern /* Subroutine */ int cgebrd_(integer *, integer *, complex *,
-	    integer *, real *, real *, complex *, complex *, complex *,
-	    integer *, integer *), slabad_(real *, real *);
-    extern doublereal clange_(char *, integer *, integer *, complex *,
-	    integer *, real *);
-    extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *,
-	    integer *, complex *, complex *, integer *, integer *), clalsd_(
-	    char *, integer *, integer *, integer *, real *, real *, complex *
-	    , integer *, real *, integer *, complex *, real *, integer *,
-	    integer *), clascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, complex *, integer *, integer *), cgeqrf_(integer *, integer *, complex *, integer *,
-	    complex *, complex *, integer *, integer *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
-	    *, integer *, complex *, integer *), claset_(char *,
-	    integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static real bignum;
-    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *), cunmbr_(char *, char *, char *, integer *, integer *,
-	    integer *, complex *, integer *, complex *, complex *, integer *,
-	    complex *, integer *, integer *), slaset_(
-	    char *, integer *, integer *, real *, real *, real *, integer *), cunmlq_(char *, char *, integer *, integer *, integer *,
-	    complex *, integer *, complex *, complex *, integer *, complex *,
-	    integer *, integer *);
-    static integer ldwork;
-    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *,
-	    integer *, complex *, integer *, complex *, complex *, integer *,
-	    complex *, integer *, integer *);
-    static integer minwrk, maxwrk;
-    static real smlnum;
-    static logical lquery;
-    static integer nrwork, smlsiz;
-
-
-/*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    CGELSD computes the minimum-norm solution to a real linear least
-    squares problem:
-        minimize 2-norm(| b - A*x |)
-    using the singular value decomposition (SVD) of A. A is an M-by-N
-    matrix which may be rank-deficient.
-
-    Several right hand side vectors b and solution vectors x can be
-    handled in a single call; they are stored as the columns of the
-    M-by-NRHS right hand side matrix B and the N-by-NRHS solution
-    matrix X.
-
-    The problem is solved in three steps:
-    (1) Reduce the coefficient matrix A to bidiagonal form with
-        Householder tranformations, reducing the original problem
-        into a "bidiagonal least squares problem" (BLS)
-    (2) Solve the BLS using a divide and conquer approach.
-    (3) Apply back all the Householder tranformations to solve
-        the original least squares problem.
-
-    The effective rank of A is determined by treating as zero those
-    singular values which are less than RCOND times the largest singular
-    value.
-
-    The divide and conquer algorithm makes very mild assumptions about
-    floating point arithmetic. It will work on machines with a guard
-    digit in add/subtract, or on those binary machines without guard
-    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
-    Cray-2. It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A. N >= 0.
-
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrices B and X. NRHS >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the M-by-N matrix A.
-            On exit, A has been destroyed.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A. LDA >= max(1,M).
-
-    B       (input/output) COMPLEX array, dimension (LDB,NRHS)
-            On entry, the M-by-NRHS right hand side matrix B.
-            On exit, B is overwritten by the N-by-NRHS solution matrix X.
-            If m >= n and RANK = n, the residual sum-of-squares for
-            the solution in the i-th column is given by the sum of
-            squares of elements n+1:m in that column.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,M,N).
-
-    S       (output) REAL array, dimension (min(M,N))
-            The singular values of A in decreasing order.
-            The condition number of A in the 2-norm = S(1)/S(min(m,n)).
-
-    RCOND   (input) REAL
-            RCOND is used to determine the effective rank of A.
-            Singular values S(i) <= RCOND*S(1) are treated as zero.
-            If RCOND < 0, machine precision is used instead.
-
-    RANK    (output) INTEGER
-            The effective rank of A, i.e., the number of singular values
-            which are greater than RCOND*S(1).
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK must be at least 1.
-            The exact minimum amount of workspace needed depends on M,
-            N and NRHS. As long as LWORK is at least
-                2 * N + N * NRHS
-            if M is greater than or equal to N or
-                2 * M + M * NRHS
-            if M is less than N, the code will execute correctly.
-            For good performance, LWORK should generally be larger.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-
-    RWORK   (workspace) REAL array, dimension at least
-               10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS +
-               (SMLSIZ+1)**2
-            if M is greater than or equal to N or
-               10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS +
-               (SMLSIZ+1)**2
-            if M is less than N, the code will execute correctly.
-            SMLSIZ is returned by ILAENV and is equal to the maximum
-            size of the subproblems at the bottom of the computation
-            tree (usually about 25), and
-               NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
-
-    IWORK   (workspace) INTEGER array, dimension (LIWORK)
-            LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
-            where MINMN = MIN( M,N ).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value.
-            > 0:  the algorithm for computing the SVD failed to converge;
-                  if INFO = i, i off-diagonal elements of an intermediate
-                  bidiagonal form did not converge to zero.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
-
-    =====================================================================
-
-
-       Test the input arguments.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    --s;
-    --work;
-    --rwork;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-    minmn = min(*m,*n);
-    maxmn = max(*m,*n);
-    mnthr = ilaenv_(&c__6, "CGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
-	    ftnlen)1);
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*nrhs < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    } else if (*ldb < max(1,maxmn)) {
-	*info = -7;
-    }
-
-    smlsiz = ilaenv_(&c__9, "CGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-
-/*
-       Compute workspace.
-       (Note: Comments in the code beginning "Workspace:" describe the
-       minimal amount of workspace needed at that point in the code,
-       as well as the preferred amount for good performance.
-       NB refers to the optimal block size for the immediately
-       following subroutine, as returned by ILAENV.)
-*/
-
-    minwrk = 1;
-    if (*info == 0) {
-	maxwrk = 0;
-	mm = *m;
-	if (*m >= *n && *m >= mnthr) {
-
-/*           Path 1a - overdetermined, with many more rows than columns. */
-
-	    mm = *n;
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
-		    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *nrhs * ilaenv_(&c__1, "CUNMQR", "LC", m,
-		    nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
-	    maxwrk = max(i__1,i__2);
-	}
-	if (*m >= *n) {
-
-/*
-             Path 1 - overdetermined or exactly determined.
-
-   Computing MAX
-*/
-	    i__1 = maxwrk, i__2 = ((*n) << (1)) + (mm + *n) * ilaenv_(&c__1,
-		    "CGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1)
-		    ;
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = ((*n) << (1)) + *nrhs * ilaenv_(&c__1,
-		    "CUNMBR", "QLC", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)
-		    3);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = ((*n) << (1)) + (*n - 1) * ilaenv_(&c__1,
-		    "CUNMBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * *nrhs;
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ((*n) << (1)) + mm, i__2 = ((*n) << (1)) + *n * *nrhs;
-	    minwrk = max(i__1,i__2);
-	}
-	if (*n > *m) {
-	    if (*n >= mnthr) {
-
-/*
-                Path 2a - underdetermined, with many more columns
-                than rows.
-*/
-
-		maxwrk = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &c_n1,
-			&c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + ((*m) << (1))
-			* ilaenv_(&c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (
-			ftnlen)6, (ftnlen)1);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + *nrhs *
-			ilaenv_(&c__1, "CUNMBR", "QLC", m, nrhs, m, &c_n1, (
-			ftnlen)6, (ftnlen)3);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + (*m - 1) *
-			ilaenv_(&c__1, "CUNMLQ", "LC", n, nrhs, m, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-		maxwrk = max(i__1,i__2);
-		if (*nrhs > 1) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
-		    maxwrk = max(i__1,i__2);
-		} else {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = *m * *m + ((*m) << (1));
-		    maxwrk = max(i__1,i__2);
-		}
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + *m * *nrhs;
-		maxwrk = max(i__1,i__2);
-	    } else {
-
-/*              Path 2 - underdetermined. */
-
-		maxwrk = ((*m) << (1)) + (*n + *m) * ilaenv_(&c__1, "CGEBRD",
-			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = ((*m) << (1)) + *nrhs * ilaenv_(&c__1,
-			"CUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, (
-			ftnlen)3);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			"CUNMBR", "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (
-			ftnlen)3);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * *nrhs;
-		maxwrk = max(i__1,i__2);
-	    }
-/* Computing MAX */
-	    i__1 = ((*m) << (1)) + *n, i__2 = ((*m) << (1)) + *m * *nrhs;
-	    minwrk = max(i__1,i__2);
-	}
-	minwrk = min(minwrk,maxwrk);
-	r__1 = (real) maxwrk;
-	q__1.r = r__1, q__1.i = 0.f;
-	work[1].r = q__1.r, work[1].i = q__1.i;
-	if (*lwork < minwrk && ! lquery) {
-	    *info = -12;
-	}
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGELSD", &i__1);
-	return 0;
-    } else if (lquery) {
-	goto L10;
-    }
-
-/*     Quick return if possible. */
-
-    if ((*m == 0) || (*n == 0)) {
-	*rank = 0;
-	return 0;
-    }
-
-/*     Get machine parameters. */
-
-    eps = slamch_("P");
-    sfmin = slamch_("S");
-    smlnum = sfmin / eps;
-    bignum = 1.f / smlnum;
-    slabad_(&smlnum, &bignum);
-
-/*     Scale A if max entry outside range [SMLNUM,BIGNUM]. */
-
-    anrm = clange_("M", m, n, &a[a_offset], lda, &rwork[1]);
-    iascl = 0;
-    if (anrm > 0.f && anrm < smlnum) {
-
-/*        Scale matrix norm up to SMLNUM */
-
-	clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda,
-		info);
-	iascl = 1;
-    } else if (anrm > bignum) {
-
-/*        Scale matrix norm down to BIGNUM. */
-
-	clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda,
-		info);
-	iascl = 2;
-    } else if (anrm == 0.f) {
-
-/*        Matrix all zero. Return zero solution. */
-
-	i__1 = max(*m,*n);
-	claset_("F", &i__1, nrhs, &c_b55, &c_b55, &b[b_offset], ldb);
-	slaset_("F", &minmn, &c__1, &c_b320, &c_b320, &s[1], &c__1)
-		;
-	*rank = 0;
-	goto L10;
-    }
-
-/*     Scale B if max entry outside range [SMLNUM,BIGNUM]. */
-
-    bnrm = clange_("M", m, nrhs, &b[b_offset], ldb, &rwork[1]);
-    ibscl = 0;
-    if (bnrm > 0.f && bnrm < smlnum) {
-
-/*        Scale matrix norm up to SMLNUM. */
-
-	clascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
-		 info);
-	ibscl = 1;
-    } else if (bnrm > bignum) {
-
-/*        Scale matrix norm down to BIGNUM. */
-
-	clascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
-		 info);
-	ibscl = 2;
-    }
-
-/*     If M < N make sure B(M+1:N,:) = 0 */
-
-    if (*m < *n) {
-	i__1 = *n - *m;
-	claset_("F", &i__1, nrhs, &c_b55, &c_b55, &b[*m + 1 + b_dim1], ldb);
-    }
-
-/*     Overdetermined case. */
-
-    if (*m >= *n) {
-
-/*        Path 1 - overdetermined or exactly determined. */
-
-	mm = *m;
-	if (*m >= mnthr) {
-
-/*           Path 1a - overdetermined, with many more rows than columns */
-
-	    mm = *n;
-	    itau = 1;
-	    nwork = itau + *n;
-
-/*
-             Compute A=Q*R.
-             (RWorkspace: need N)
-             (CWorkspace: need N, prefer N*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
-		     info);
-
-/*
-             Multiply B by transpose(Q).
-             (RWorkspace: need N)
-             (CWorkspace: need NRHS, prefer NRHS*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    cunmqr_("L", "C", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
-		    b_offset], ldb, &work[nwork], &i__1, info);
-
-/*           Zero out below R. */
-
-	    if (*n > 1) {
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		claset_("L", &i__1, &i__2, &c_b55, &c_b55, &a[a_dim1 + 2],
-			lda);
-	    }
-	}
-
-	itauq = 1;
-	itaup = itauq + *n;
-	nwork = itaup + *n;
-	ie = 1;
-	nrwork = ie + *n;
-
-/*
-          Bidiagonalize R in A.
-          (RWorkspace: need N)
-          (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB)
-*/
-
-	i__1 = *lwork - nwork + 1;
-	cgebrd_(&mm, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], &
-		work[itaup], &work[nwork], &i__1, info);
-
-/*
-          Multiply B by transpose of left bidiagonalizing vectors of R.
-          (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB)
-*/
-
-	i__1 = *lwork - nwork + 1;
-	cunmbr_("Q", "L", "C", &mm, nrhs, n, &a[a_offset], lda, &work[itauq],
-		&b[b_offset], ldb, &work[nwork], &i__1, info);
-
-/*        Solve the bidiagonal least squares problem. */
-
-	clalsd_("U", &smlsiz, n, nrhs, &s[1], &rwork[ie], &b[b_offset], ldb,
-		rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], info);
-	if (*info != 0) {
-	    goto L10;
-	}
-
-/*        Multiply B by right bidiagonalizing vectors of R. */
-
-	i__1 = *lwork - nwork + 1;
-	cunmbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
-		b[b_offset], ldb, &work[nwork], &i__1, info);
-
-    } else /* if(complicated condition) */ {
-/* Computing MAX */
-	i__1 = *m, i__2 = ((*m) << (1)) - 4, i__1 = max(i__1,i__2), i__1 =
-		max(i__1,*nrhs), i__2 = *n - *m * 3;
-	if (*n >= mnthr && *lwork >= ((*m) << (2)) + *m * *m + max(i__1,i__2))
-		 {
-
-/*
-          Path 2a - underdetermined, with many more columns than rows
-          and sufficient workspace for an efficient algorithm.
-*/
-
-	    ldwork = *m;
-/*
-   Computing MAX
-   Computing MAX
-*/
-	    i__3 = *m, i__4 = ((*m) << (1)) - 4, i__3 = max(i__3,i__4), i__3 =
-		     max(i__3,*nrhs), i__4 = *n - *m * 3;
-	    i__1 = ((*m) << (2)) + *m * *lda + max(i__3,i__4), i__2 = *m * *
-		    lda + *m + *m * *nrhs;
-	    if (*lwork >= max(i__1,i__2)) {
-		ldwork = *lda;
-	    }
-	    itau = 1;
-	    nwork = *m + 1;
-
-/*
-          Compute A=L*Q.
-          (CWorkspace: need 2*M, prefer M+M*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
-		     info);
-	    il = nwork;
-
-/*        Copy L to WORK(IL), zeroing out above its diagonal. */
-
-	    clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
-	    i__1 = *m - 1;
-	    i__2 = *m - 1;
-	    claset_("U", &i__1, &i__2, &c_b55, &c_b55, &work[il + ldwork], &
-		    ldwork);
-	    itauq = il + ldwork * *m;
-	    itaup = itauq + *m;
-	    nwork = itaup + *m;
-	    ie = 1;
-	    nrwork = ie + *m;
-
-/*
-          Bidiagonalize L in WORK(IL).
-          (RWorkspace: need M)
-          (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    cgebrd_(m, m, &work[il], &ldwork, &s[1], &rwork[ie], &work[itauq],
-		     &work[itaup], &work[nwork], &i__1, info);
-
-/*
-          Multiply B by transpose of left bidiagonalizing vectors of L.
-          (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    cunmbr_("Q", "L", "C", m, nrhs, m, &work[il], &ldwork, &work[
-		    itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);
-
-/*        Solve the bidiagonal least squares problem. */
-
-	    clalsd_("U", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset],
-		    ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1],
-		     info);
-	    if (*info != 0) {
-		goto L10;
-	    }
-
-/*        Multiply B by right bidiagonalizing vectors of L. */
-
-	    i__1 = *lwork - nwork + 1;
-	    cunmbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
-		    itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);
-
-/*        Zero out below first M rows of B. */
-
-	    i__1 = *n - *m;
-	    claset_("F", &i__1, nrhs, &c_b55, &c_b55, &b[*m + 1 + b_dim1],
-		    ldb);
-	    nwork = itau + *m;
-
-/*
-          Multiply transpose(Q) by B.
-          (CWorkspace: need NRHS, prefer NRHS*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    cunmlq_("L", "C", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
-		    b_offset], ldb, &work[nwork], &i__1, info);
-
-	} else {
-
-/*        Path 2 - remaining underdetermined cases. */
-
-	    itauq = 1;
-	    itaup = itauq + *m;
-	    nwork = itaup + *m;
-	    ie = 1;
-	    nrwork = ie + *m;
-
-/*
-          Bidiagonalize A.
-          (RWorkspace: need M)
-          (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
-		    &work[itaup], &work[nwork], &i__1, info);
-
-/*
-          Multiply B by transpose of left bidiagonalizing vectors.
-          (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    cunmbr_("Q", "L", "C", m, nrhs, n, &a[a_offset], lda, &work[itauq]
-		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
-
-/*        Solve the bidiagonal least squares problem. */
-
-	    clalsd_("L", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset],
-		    ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1],
-		     info);
-	    if (*info != 0) {
-		goto L10;
-	    }
-
-/*        Multiply B by right bidiagonalizing vectors of A. */
-
-	    i__1 = *lwork - nwork + 1;
-	    cunmbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
-		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
-
-	}
-    }
-
-/*     Undo scaling. */
-
-    if (iascl == 1) {
-	clascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
-		 info);
-	slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
-		minmn, info);
-    } else if (iascl == 2) {
-	clascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
-		 info);
-	slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
-		minmn, info);
-    }
-    if (ibscl == 1) {
-	clascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
-		 info);
-    } else if (ibscl == 2) {
-	clascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
-		 info);
-    }
-
-L10:
-    r__1 = (real) maxwrk;
-    q__1.r = r__1, q__1.i = 0.f;
-    work[1].r = q__1.r, work[1].i = q__1.i;
-    return 0;
-
-/*     End of CGELSD */
-
-} /* cgelsd_ */
-
-/* Subroutine */ int cgeqr2_(integer *m, integer *n, complex *a, integer *lda,
-	 complex *tau, complex *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    complex q__1;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, k;
-    static complex alpha;
-    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
-	    , integer *, complex *, complex *, integer *, complex *),
-	    clarfg_(integer *, complex *, complex *, integer *, complex *),
-	    xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CGEQR2 computes a QR factorization of a complex m by n matrix A:
-    A = Q * R.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the m by n matrix A.
-            On exit, the elements on and above the diagonal of the array
-            contain the min(m,n) by n upper trapezoidal matrix R (R is
-            upper triangular if m >= n); the elements below the diagonal,
-            with the array TAU, represent the unitary matrix Q as a
-            product of elementary reflectors (see Further Details).
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    TAU     (output) COMPLEX array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace) COMPLEX array, dimension (N)
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of elementary reflectors
-
-       Q = H(1) H(2) . . . H(k), where k = min(m,n).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
-    and tau in TAU(i).
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGEQR2", &i__1);
-	return 0;
-    }
-
-    k = min(*m,*n);
-
-    i__1 = k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
-
-	i__2 = *m - i__ + 1;
-/* Computing MIN */
-	i__3 = i__ + 1;
-	clarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1]
-		, &c__1, &tau[i__]);
-	if (i__ < *n) {
-
-/*           Apply H(i)' to A(i:m,i+1:n) from the left */
-
-	    i__2 = i__ + i__ * a_dim1;
-	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-	    i__2 = i__ + i__ * a_dim1;
-	    a[i__2].r = 1.f, a[i__2].i = 0.f;
-	    i__2 = *m - i__ + 1;
-	    i__3 = *n - i__;
-	    r_cnjg(&q__1, &tau[i__]);
-	    clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &q__1,
-		     &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
-	    i__2 = i__ + i__ * a_dim1;
-	    a[i__2].r = alpha.r, a[i__2].i = alpha.i;
-	}
-/* L10: */
-    }
-    return 0;
-
-/*     End of CGEQR2 */
-
-} /* cgeqr2_ */
-
-/* Subroutine */ int cgeqrf_(integer *m, integer *n, complex *a, integer *lda,
-	 complex *tau, complex *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int cgeqr2_(integer *, integer *, complex *,
-	    integer *, complex *, complex *, integer *), clarfb_(char *, char
-	    *, char *, char *, integer *, integer *, integer *, complex *,
-	    integer *, complex *, integer *, complex *, integer *, complex *,
-	    integer *), clarft_(char *, char *
-	    , integer *, integer *, complex *, integer *, complex *, complex *
-	    , integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CGEQRF computes a QR factorization of a complex M-by-N matrix A:
-    A = Q * R.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the M-by-N matrix A.
-            On exit, the elements on and above the diagonal of the array
-            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
-            upper triangular if m >= n); the elements below the diagonal,
-            with the array TAU, represent the unitary matrix Q as a
-            product of min(m,n) elementary reflectors (see Further
-            Details).
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    TAU     (output) COMPLEX array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,N).
-            For optimum performance LWORK >= N*NB, where NB is
-            the optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of elementary reflectors
-
-       Q = H(1) H(2) . . . H(k), where k = min(m,n).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
-    and tau in TAU(i).
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    nb = ilaenv_(&c__1, "CGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
-	    1);
-    lwkopt = *n * nb;
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    } else if (*lwork < max(1,*n) && ! lquery) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGEQRF", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    k = min(*m,*n);
-    if (k == 0) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
-
-    nbmin = 2;
-    nx = 0;
-    iws = *n;
-    if (nb > 1 && nb < k) {
-
-/*
-          Determine when to cross over from blocked to unblocked code.
-
-   Computing MAX
-*/
-	i__1 = 0, i__2 = ilaenv_(&c__3, "CGEQRF", " ", m, n, &c_n1, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < k) {
-
-/*           Determine if workspace is large enough for blocked code. */
-
-	    ldwork = *n;
-	    iws = ldwork * nb;
-	    if (*lwork < iws) {
-
-/*
-                Not enough workspace to use optimal NB:  reduce NB and
-                determine the minimum value of NB.
-*/
-
-		nb = *lwork / ldwork;
-/* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "CGEQRF", " ", m, n, &c_n1, &
-			c_n1, (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-	    }
-	}
-    }
-
-    if (nb >= nbmin && nb < k && nx < k) {
-
-/*        Use blocked code initially */
-
-	i__1 = k - nx;
-	i__2 = nb;
-	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__3 = k - i__ + 1;
-	    ib = min(i__3,nb);
-
-/*
-             Compute the QR factorization of the current block
-             A(i:m,i:i+ib-1)
-*/
-
-	    i__3 = *m - i__ + 1;
-	    cgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
-		    1], &iinfo);
-	    if (i__ + ib <= *n) {
-
-/*
-                Form the triangular factor of the block reflector
-                H = H(i) H(i+1) . . . H(i+ib-1)
-*/
-
-		i__3 = *m - i__ + 1;
-		clarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ *
-			a_dim1], lda, &tau[i__], &work[1], &ldwork);
-
-/*              Apply H' to A(i:m,i+ib:n) from the left */
-
-		i__3 = *m - i__ + 1;
-		i__4 = *n - i__ - ib + 1;
-		clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise"
-			, &i__3, &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &
-			work[1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda,
-			&work[ib + 1], &ldwork);
-	    }
-/* L10: */
-	}
-    } else {
-	i__ = 1;
-    }
-
-/*     Use unblocked code to factor the last or only block. */
-
-    if (i__ <= k) {
-	i__2 = *m - i__ + 1;
-	i__1 = *n - i__ + 1;
-	cgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
-		, &iinfo);
-    }
-
-    work[1].r = (real) iws, work[1].i = 0.f;
-    return 0;
-
-/*     End of CGEQRF */
-
-} /* cgeqrf_ */
-
-/* Subroutine */ int cgesdd_(char *jobz, integer *m, integer *n, complex *a,
-	integer *lda, real *s, complex *u, integer *ldu, complex *vt, integer
-	*ldvt, complex *work, integer *lwork, real *rwork, integer *iwork,
-	integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
-	    i__2, i__3;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, ie, il, ir, iu, blk;
-    static real dum[1], eps;
-    static integer iru, ivt, iscl;
-    static real anrm;
-    static integer idum[1], ierr, itau, irvt;
-    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
-	    integer *, complex *, complex *, integer *, complex *, integer *,
-	    complex *, complex *, integer *);
-    extern logical lsame_(char *, char *);
-    static integer chunk, minmn, wrkbl, itaup, itauq;
-    static logical wntqa;
-    static integer nwork;
-    extern /* Subroutine */ int clacp2_(char *, integer *, integer *, real *,
-	    integer *, complex *, integer *);
-    static logical wntqn, wntqo, wntqs;
-    static integer mnthr1, mnthr2;
-    extern /* Subroutine */ int cgebrd_(integer *, integer *, complex *,
-	    integer *, real *, real *, complex *, complex *, complex *,
-	    integer *, integer *);
-    extern doublereal clange_(char *, integer *, integer *, complex *,
-	    integer *, real *);
-    extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *,
-	    integer *, complex *, complex *, integer *, integer *), clacrm_(
-	    integer *, integer *, complex *, integer *, real *, integer *,
-	    complex *, integer *, real *), clarcm_(integer *, integer *, real
-	    *, integer *, complex *, integer *, complex *, integer *, real *),
-	     clascl_(char *, integer *, integer *, real *, real *, integer *,
-	    integer *, complex *, integer *, integer *), sbdsdc_(char
-	    *, char *, integer *, real *, real *, real *, integer *, real *,
-	    integer *, real *, integer *, real *, integer *, integer *), cgeqrf_(integer *, integer *, complex *, integer
-	    *, complex *, complex *, integer *, integer *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
-	    *, integer *, complex *, integer *), claset_(char *,
-	    integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int cungbr_(char *, integer *, integer *, integer
-	    *, complex *, integer *, complex *, complex *, integer *, integer
-	    *);
-    static real bignum;
-    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *), cunmbr_(char *, char *, char *, integer *, integer *,
-	    integer *, complex *, integer *, complex *, complex *, integer *,
-	    complex *, integer *, integer *), cunglq_(
-	    integer *, integer *, integer *, complex *, integer *, complex *,
-	    complex *, integer *, integer *);
-    static integer ldwrkl;
-    extern /* Subroutine */ int cungqr_(integer *, integer *, integer *,
-	    complex *, integer *, complex *, complex *, integer *, integer *);
-    static integer ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt;
-    static real smlnum;
-    static logical wntqas, lquery;
-    static integer nrwork;
-
-
-/*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    CGESDD computes the singular value decomposition (SVD) of a complex
-    M-by-N matrix A, optionally computing the left and/or right singular
-    vectors, by using divide-and-conquer method. The SVD is written
-
-         A = U * SIGMA * conjugate-transpose(V)
-
-    where SIGMA is an M-by-N matrix which is zero except for its
-    min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
-    V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
-    are the singular values of A; they are real and non-negative, and
-    are returned in descending order.  The first min(m,n) columns of
-    U and V are the left and right singular vectors of A.
-
-    Note that the routine returns VT = V**H, not V.
-
-    The divide and conquer algorithm makes very mild assumptions about
-    floating point arithmetic. It will work on machines with a guard
-    digit in add/subtract, or on those binary machines without guard
-    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
-    Cray-2. It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
-
-    Arguments
-    =========
-
-    JOBZ    (input) CHARACTER*1
-            Specifies options for computing all or part of the matrix U:
-            = 'A':  all M columns of U and all N rows of V**H are
-                    returned in the arrays U and VT;
-            = 'S':  the first min(M,N) columns of U and the first
-                    min(M,N) rows of V**H are returned in the arrays U
-                    and VT;
-            = 'O':  If M >= N, the first N columns of U are overwritten
-                    on the array A and all rows of V**H are returned in
-                    the array VT;
-                    otherwise, all columns of U are returned in the
-                    array U and the first M rows of V**H are overwritten
-                    in the array VT;
-            = 'N':  no columns of U or rows of V**H are computed.
-
-    M       (input) INTEGER
-            The number of rows of the input matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the input matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the M-by-N matrix A.
-            On exit,
-            if JOBZ = 'O',  A is overwritten with the first N columns
-                            of U (the left singular vectors, stored
-                            columnwise) if M >= N;
-                            A is overwritten with the first M rows
-                            of V**H (the right singular vectors, stored
-                            rowwise) otherwise.
-            if JOBZ .ne. 'O', the contents of A are destroyed.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    S       (output) REAL array, dimension (min(M,N))
-            The singular values of A, sorted so that S(i) >= S(i+1).
-
-    U       (output) COMPLEX array, dimension (LDU,UCOL)
-            UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
-            UCOL = min(M,N) if JOBZ = 'S'.
-            If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
-            unitary matrix U;
-            if JOBZ = 'S', U contains the first min(M,N) columns of U
-            (the left singular vectors, stored columnwise);
-            if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
-
-    LDU     (input) INTEGER
-            The leading dimension of the array U.  LDU >= 1; if
-            JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
-
-    VT      (output) COMPLEX array, dimension (LDVT,N)
-            If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
-            N-by-N unitary matrix V**H;
-            if JOBZ = 'S', VT contains the first min(M,N) rows of
-            V**H (the right singular vectors, stored rowwise);
-            if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
-
-    LDVT    (input) INTEGER
-            The leading dimension of the array VT.  LDVT >= 1; if
-            JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
-            if JOBZ = 'S', LDVT >= min(M,N).
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK >= 1.
-            if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N).
-            if JOBZ = 'O',
-                  LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
-            if JOBZ = 'S' or 'A',
-                  LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
-            For good performance, LWORK should generally be larger.
-            If LWORK < 0 but other input arguments are legal, WORK(1)
-            returns the optimal LWORK.
-
-    RWORK   (workspace) REAL array, dimension (LRWORK)
-            If JOBZ = 'N', LRWORK >= 7*min(M,N).
-            Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N)
-
-    IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
-
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The updating process of SBDSDC did not converge.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --s;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    --work;
-    --rwork;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-    minmn = min(*m,*n);
-    mnthr1 = (integer) (minmn * 17.f / 9.f);
-    mnthr2 = (integer) (minmn * 5.f / 3.f);
-    wntqa = lsame_(jobz, "A");
-    wntqs = lsame_(jobz, "S");
-    wntqas = (wntqa) || (wntqs);
-    wntqo = lsame_(jobz, "O");
-    wntqn = lsame_(jobz, "N");
-    minwrk = 1;
-    maxwrk = 1;
-    lquery = *lwork == -1;
-
-    if (! ((((wntqa) || (wntqs)) || (wntqo)) || (wntqn))) {
-	*info = -1;
-    } else if (*m < 0) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    } else if (((*ldu < 1) || (wntqas && *ldu < *m)) || (wntqo && *m < *n && *
-	    ldu < *m)) {
-	*info = -8;
-    } else if ((((*ldvt < 1) || (wntqa && *ldvt < *n)) || (wntqs && *ldvt <
-	    minmn)) || (wntqo && *m >= *n && *ldvt < *n)) {
-	*info = -10;
-    }
-
-/*
-       Compute workspace
-        (Note: Comments in the code beginning "Workspace:" describe the
-         minimal amount of workspace needed at that point in the code,
-         as well as the preferred amount for good performance.
-         CWorkspace refers to complex workspace, and RWorkspace to
-         real workspace. NB refers to the optimal block size for the
-         immediately following subroutine, as returned by ILAENV.)
-*/
-
-    if (*info == 0 && *m > 0 && *n > 0) {
-	if (*m >= *n) {
-
-/*
-             There is no complex work space needed for bidiagonal SVD
-             The real work space needed for bidiagonal SVD is BDSPAC,
-             BDSPAC = 3*N*N + 4*N
-*/
-
-	    if (*m >= mnthr1) {
-		if (wntqn) {
-
-/*                 Path 1 (M much larger than N, JOBZ='N') */
-
-		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
-			    ilaenv_(&c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl;
-		    minwrk = *n * 3;
-		} else if (wntqo) {
-
-/*                 Path 2 (M much larger than N, JOBZ='O') */
-
-		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR",
-			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
-			    ilaenv_(&c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = *m * *n + *n * *n + wrkbl;
-		    minwrk = ((*n) << (1)) * *n + *n * 3;
-		} else if (wntqs) {
-
-/*                 Path 3 (M much larger than N, JOBZ='S') */
-
-		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR",
-			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
-			    ilaenv_(&c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = *n * *n + wrkbl;
-		    minwrk = *n * *n + *n * 3;
-		} else if (wntqa) {
-
-/*                 Path 4 (M much larger than N, JOBZ='A') */
-
-		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "CUNGQR",
-			    " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
-			    ilaenv_(&c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = *n * *n + wrkbl;
-		    minwrk = *n * *n + ((*n) << (1)) + *m;
-		}
-	    } else if (*m >= mnthr2) {
-
-/*              Path 5 (M much larger than N, but not as much as MNTHR1) */
-
-		maxwrk = ((*n) << (1)) + (*m + *n) * ilaenv_(&c__1, "CGEBRD",
-			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-		minwrk = ((*n) << (1)) + *m;
-		if (wntqo) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
-			    1);
-		    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)
-			    1);
-		    maxwrk = max(i__1,i__2);
-		    maxwrk += *m * *n;
-		    minwrk += *n * *n;
-		} else if (wntqs) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
-			    1);
-		    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)
-			    1);
-		    maxwrk = max(i__1,i__2);
-		} else if (wntqa) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
-			    1);
-		    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
-			    1);
-		    maxwrk = max(i__1,i__2);
-		}
-	    } else {
-
-/*              Path 6 (M at least N, but not much larger) */
-
-		maxwrk = ((*n) << (1)) + (*m + *n) * ilaenv_(&c__1, "CGEBRD",
-			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-		minwrk = ((*n) << (1)) + *m;
-		if (wntqo) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    maxwrk = max(i__1,i__2);
-		    maxwrk += *m * *n;
-		    minwrk += *n * *n;
-		} else if (wntqs) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    maxwrk = max(i__1,i__2);
-		} else if (wntqa) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNGBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    maxwrk = max(i__1,i__2);
-		}
-	    }
-	} else {
-
-/*
-             There is no complex work space needed for bidiagonal SVD
-             The real work space needed for bidiagonal SVD is BDSPAC,
-             BDSPAC = 3*M*M + 4*M
-*/
-
-	    if (*n >= mnthr1) {
-		if (wntqn) {
-
-/*                 Path 1t (N much larger than M, JOBZ='N') */
-
-		    maxwrk = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + ((*m) << (1)) *
-			    ilaenv_(&c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *m * 3;
-		} else if (wntqo) {
-
-/*                 Path 2t (N much larger than M, JOBZ='O') */
-
-		    wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "CUNGLQ",
-			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + ((*m) << (1)) *
-			    ilaenv_(&c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = *m * *n + *m * *m + wrkbl;
-		    minwrk = ((*m) << (1)) * *m + *m * 3;
-		} else if (wntqs) {
-
-/*                 Path 3t (N much larger than M, JOBZ='S') */
-
-		    wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "CUNGLQ",
-			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + ((*m) << (1)) *
-			    ilaenv_(&c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = *m * *m + wrkbl;
-		    minwrk = *m * *m + *m * 3;
-		} else if (wntqa) {
-
-/*                 Path 4t (N much larger than M, JOBZ='A') */
-
-		    wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "CUNGLQ",
-			    " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + ((*m) << (1)) *
-			    ilaenv_(&c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = *m * *m + wrkbl;
-		    minwrk = *m * *m + ((*m) << (1)) + *n;
-		}
-	    } else if (*n >= mnthr2) {
-
-/*              Path 5t (N much larger than M, but not as much as MNTHR1) */
-
-		maxwrk = ((*m) << (1)) + (*m + *n) * ilaenv_(&c__1, "CGEBRD",
-			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-		minwrk = ((*m) << (1)) + *n;
-		if (wntqo) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)
-			    1);
-		    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
-			    1);
-		    maxwrk = max(i__1,i__2);
-		    maxwrk += *m * *n;
-		    minwrk += *m * *m;
-		} else if (wntqs) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)
-			    1);
-		    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
-			    1);
-		    maxwrk = max(i__1,i__2);
-		} else if (wntqa) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNGBR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen)
-			    1);
-		    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
-			    1);
-		    maxwrk = max(i__1,i__2);
-		}
-	    } else {
-
-/*              Path 6t (N greater than M, but not much larger) */
-
-		maxwrk = ((*m) << (1)) + (*m + *n) * ilaenv_(&c__1, "CGEBRD",
-			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-		minwrk = ((*m) << (1)) + *n;
-		if (wntqo) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNMBR", "PRC", m, n, m, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNMBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    maxwrk = max(i__1,i__2);
-		    maxwrk += *m * *n;
-		    minwrk += *m * *m;
-		} else if (wntqs) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNGBR", "PRC", m, n, m, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    maxwrk = max(i__1,i__2);
-		} else if (wntqa) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *n * ilaenv_(&c__1,
-			    "CUNGBR", "PRC", n, n, m, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			    "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
-			    ftnlen)3);
-		    maxwrk = max(i__1,i__2);
-		}
-	    }
-	}
-	maxwrk = max(maxwrk,minwrk);
-	work[1].r = (real) maxwrk, work[1].i = 0.f;
-    }
-
-    if (*lwork < minwrk && ! lquery) {
-	*info = -13;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGESDD", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*m == 0) || (*n == 0)) {
-	if (*lwork >= 1) {
-	    work[1].r = 1.f, work[1].i = 0.f;
-	}
-	return 0;
-    }
-
-/*     Get machine constants */
-
-    eps = slamch_("P");
-    smlnum = sqrt(slamch_("S")) / eps;
-    bignum = 1.f / smlnum;
-
-/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
-
-    anrm = clange_("M", m, n, &a[a_offset], lda, dum);
-    iscl = 0;
-    if (anrm > 0.f && anrm < smlnum) {
-	iscl = 1;
-	clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
-		ierr);
-    } else if (anrm > bignum) {
-	iscl = 1;
-	clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
-		ierr);
-    }
-
-    if (*m >= *n) {
-
-/*
-          A has at least as many rows as columns. If A has sufficiently
-          more rows than columns, first reduce using the QR
-          decomposition (if sufficient workspace available)
-*/
-
-	if (*m >= mnthr1) {
-
-	    if (wntqn) {
-
-/*
-                Path 1 (M much larger than N, JOBZ='N')
-                No singular vectors to be computed
-*/
-
-		itau = 1;
-		nwork = itau + *n;
-
-/*
-                Compute A=Q*R
-                (CWorkspace: need 2*N, prefer N+N*NB)
-                (RWorkspace: need 0)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__1, &ierr);
-
-/*              Zero out below R */
-
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		claset_("L", &i__1, &i__2, &c_b55, &c_b55, &a[a_dim1 + 2],
-			lda);
-		ie = 1;
-		itauq = 1;
-		itaup = itauq + *n;
-		nwork = itaup + *n;
-
-/*
-                Bidiagonalize R in A
-                (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
-                (RWorkspace: need N)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
-		nrwork = ie + *n;
-
-/*
-                Perform bidiagonal SVD, compute singular values only
-                (CWorkspace: 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
-			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
-
-	    } else if (wntqo) {
-
-/*
-                Path 2 (M much larger than N, JOBZ='O')
-                N left singular vectors to be overwritten on A and
-                N right singular vectors to be computed in VT
-*/
-
-		iu = 1;
-
-/*              WORK(IU) is N by N */
-
-		ldwrku = *n;
-		ir = iu + ldwrku * *n;
-		if (*lwork >= *m * *n + *n * *n + *n * 3) {
-
-/*                 WORK(IR) is M by N */
-
-		    ldwrkr = *m;
-		} else {
-		    ldwrkr = (*lwork - *n * *n - *n * 3) / *n;
-		}
-		itau = ir + ldwrkr * *n;
-		nwork = itau + *n;
-
-/*
-                Compute A=Q*R
-                (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__1, &ierr);
-
-/*              Copy R to WORK( IR ), zeroing out below it */
-
-		clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		claset_("L", &i__1, &i__2, &c_b55, &c_b55, &work[ir + 1], &
-			ldwrkr);
-
-/*
-                Generate Q in A
-                (CWorkspace: need 2*N, prefer N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
-			 &i__1, &ierr);
-		ie = 1;
-		itauq = itau;
-		itaup = itauq + *n;
-		nwork = itaup + *n;
-
-/*
-                Bidiagonalize R in WORK(IR)
-                (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB)
-                (RWorkspace: need N)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of R in WORK(IRU) and computing right singular vectors
-                of R in WORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		iru = ie + *n;
-		irvt = iru + *n * *n;
-		nrwork = irvt + *n * *n;
-		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
-			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
-                Overwrite WORK(IU) by the left singular vectors of R
-                (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
-		i__1 = *lwork - nwork + 1;
-		cunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
-			itauq], &work[iu], &ldwrku, &work[nwork], &i__1, &
-			ierr);
-
-/*
-                Copy real matrix RWORK(IRVT) to complex matrix VT
-                Overwrite VT by the right singular vectors of R
-                (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
-		i__1 = *lwork - nwork + 1;
-		cunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-
-/*
-                Multiply Q in A by left singular vectors of R in
-                WORK(IU), storing result in WORK(IR) and copying to A
-                (CWorkspace: need 2*N*N, prefer N*N+M*N)
-                (RWorkspace: 0)
-*/
-
-		i__1 = *m;
-		i__2 = ldwrkr;
-		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
-			i__2) {
-/* Computing MIN */
-		    i__3 = *m - i__ + 1;
-		    chunk = min(i__3,ldwrkr);
-		    cgemm_("N", "N", &chunk, n, n, &c_b56, &a[i__ + a_dim1],
-			    lda, &work[iu], &ldwrku, &c_b55, &work[ir], &
-			    ldwrkr);
-		    clacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
-			    a_dim1], lda);
-/* L10: */
-		}
-
-	    } else if (wntqs) {
-
-/*
-                Path 3 (M much larger than N, JOBZ='S')
-                N left singular vectors to be computed in U and
-                N right singular vectors to be computed in VT
-*/
-
-		ir = 1;
-
-/*              WORK(IR) is N by N */
-
-		ldwrkr = *n;
-		itau = ir + ldwrkr * *n;
-		nwork = itau + *n;
-
-/*
-                Compute A=Q*R
-                (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__2, &ierr);
-
-/*              Copy R to WORK(IR), zeroing out below it */
-
-		clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
-		i__2 = *n - 1;
-		i__1 = *n - 1;
-		claset_("L", &i__2, &i__1, &c_b55, &c_b55, &work[ir + 1], &
-			ldwrkr);
-
-/*
-                Generate Q in A
-                (CWorkspace: need 2*N, prefer N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
-			 &i__2, &ierr);
-		ie = 1;
-		itauq = itau;
-		itaup = itauq + *n;
-		nwork = itaup + *n;
-
-/*
-                Bidiagonalize R in WORK(IR)
-                (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
-                (RWorkspace: need N)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		iru = ie + *n;
-		irvt = iru + *n * *n;
-		nrwork = irvt + *n * *n;
-		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
-			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Copy real matrix RWORK(IRU) to complex matrix U
-                Overwrite U by left singular vectors of R
-                (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
-		i__2 = *lwork - nwork + 1;
-		cunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
-
-/*
-                Copy real matrix RWORK(IRVT) to complex matrix VT
-                Overwrite VT by right singular vectors of R
-                (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
-		i__2 = *lwork - nwork + 1;
-		cunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
-			ierr);
-
-/*
-                Multiply Q in A by left singular vectors of R in
-                WORK(IR), storing result in U
-                (CWorkspace: need N*N)
-                (RWorkspace: 0)
-*/
-
-		clacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
-		cgemm_("N", "N", m, n, n, &c_b56, &a[a_offset], lda, &work[ir]
-			, &ldwrkr, &c_b55, &u[u_offset], ldu);
-
-	    } else if (wntqa) {
-
-/*
-                Path 4 (M much larger than N, JOBZ='A')
-                M left singular vectors to be computed in U and
-                N right singular vectors to be computed in VT
-*/
-
-		iu = 1;
-
-/*              WORK(IU) is N by N */
-
-		ldwrku = *n;
-		itau = iu + ldwrku * *n;
-		nwork = itau + *n;
-
-/*
-                Compute A=Q*R, copying result to U
-                (CWorkspace: need 2*N, prefer N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__2, &ierr);
-		clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
-
-/*
-                Generate Q in U
-                (CWorkspace: need N+M, prefer N+M*NB)
-                (RWorkspace: 0)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		cungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
-			 &i__2, &ierr);
-
-/*              Produce R in A, zeroing out below it */
-
-		i__2 = *n - 1;
-		i__1 = *n - 1;
-		claset_("L", &i__2, &i__1, &c_b55, &c_b55, &a[a_dim1 + 2],
-			lda);
-		ie = 1;
-		itauq = itau;
-		itaup = itauq + *n;
-		nwork = itaup + *n;
-
-/*
-                Bidiagonalize R in A
-                (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
-                (RWorkspace: need N)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
-		iru = ie + *n;
-		irvt = iru + *n * *n;
-		nrwork = irvt + *n * *n;
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
-			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
-                Overwrite WORK(IU) by left singular vectors of R
-                (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
-		i__2 = *lwork - nwork + 1;
-		cunmbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
-			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
-			ierr);
-
-/*
-                Copy real matrix RWORK(IRVT) to complex matrix VT
-                Overwrite VT by right singular vectors of R
-                (CWorkspace: need 3*N, prefer 2*N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
-		i__2 = *lwork - nwork + 1;
-		cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
-			ierr);
-
-/*
-                Multiply Q in U by left singular vectors of R in
-                WORK(IU), storing result in A
-                (CWorkspace: need N*N)
-                (RWorkspace: 0)
-*/
-
-		cgemm_("N", "N", m, n, n, &c_b56, &u[u_offset], ldu, &work[iu]
-			, &ldwrku, &c_b55, &a[a_offset], lda);
-
-/*              Copy left singular vectors of A from A to U */
-
-		clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
-
-	    }
-
-	} else if (*m >= mnthr2) {
-
-/*
-             MNTHR2 <= M < MNTHR1
-
-             Path 5 (M much larger than N, but not as much as MNTHR1)
-             Reduce to bidiagonal form without QR decomposition, use
-             CUNGBR and matrix multiplication to compute singular vectors
-*/
-
-	    ie = 1;
-	    nrwork = ie + *n;
-	    itauq = 1;
-	    itaup = itauq + *n;
-	    nwork = itaup + *n;
-
-/*
-             Bidiagonalize A
-             (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
-             (RWorkspace: need N)
-*/
-
-	    i__2 = *lwork - nwork + 1;
-	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
-		    &work[itaup], &work[nwork], &i__2, &ierr);
-	    if (wntqn) {
-
-/*
-                Compute singular values only
-                (Cworkspace: 0)
-                (Rworkspace: need BDSPAC)
-*/
-
-		sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
-			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
-	    } else if (wntqo) {
-		iu = nwork;
-		iru = nrwork;
-		irvt = iru + *n * *n;
-		nrwork = irvt + *n * *n;
-
-/*
-                Copy A to VT, generate P**H
-                (Cworkspace: need 2*N, prefer N+N*NB)
-                (Rworkspace: 0)
-*/
-
-		clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-		i__2 = *lwork - nwork + 1;
-		cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
-			work[nwork], &i__2, &ierr);
-
-/*
-                Generate Q in A
-                (CWorkspace: need 2*N, prefer N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[
-			nwork], &i__2, &ierr);
-
-		if (*lwork >= *m * *n + *n * 3) {
-
-/*                 WORK( IU ) is M by N */
-
-		    ldwrku = *m;
-		} else {
-
-/*                 WORK(IU) is LDWRKU by N */
-
-		    ldwrku = (*lwork - *n * 3) / *n;
-		}
-		nwork = iu + ldwrku * *n;
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
-			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Multiply real matrix RWORK(IRVT) by P**H in VT,
-                storing the result in WORK(IU), copying to VT
-                (Cworkspace: need 0)
-                (Rworkspace: need 3*N*N)
-*/
-
-		clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &work[iu]
-			, &ldwrku, &rwork[nrwork]);
-		clacpy_("F", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt);
-
-/*
-                Multiply Q in A by real matrix RWORK(IRU), storing the
-                result in WORK(IU), copying to A
-                (CWorkspace: need N*N, prefer M*N)
-                (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
-*/
-
-		nrwork = irvt;
-		i__2 = *m;
-		i__1 = ldwrku;
-		for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
-			i__1) {
-/* Computing MIN */
-		    i__3 = *m - i__ + 1;
-		    chunk = min(i__3,ldwrku);
-		    clacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], n,
-			    &work[iu], &ldwrku, &rwork[nrwork]);
-		    clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ +
-			    a_dim1], lda);
-/* L20: */
-		}
-
-	    } else if (wntqs) {
-
-/*
-                Copy A to VT, generate P**H
-                (Cworkspace: need 2*N, prefer N+N*NB)
-                (Rworkspace: 0)
-*/
-
-		clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-		i__1 = *lwork - nwork + 1;
-		cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
-			work[nwork], &i__1, &ierr);
-
-/*
-                Copy A to U, generate Q
-                (Cworkspace: need 2*N, prefer N+N*NB)
-                (Rworkspace: 0)
-*/
-
-		clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
-		i__1 = *lwork - nwork + 1;
-		cungbr_("Q", m, n, n, &u[u_offset], ldu, &work[itauq], &work[
-			nwork], &i__1, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		iru = nrwork;
-		irvt = iru + *n * *n;
-		nrwork = irvt + *n * *n;
-		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
-			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Multiply real matrix RWORK(IRVT) by P**H in VT,
-                storing the result in A, copying to VT
-                (Cworkspace: need 0)
-                (Rworkspace: need 3*N*N)
-*/
-
-		clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[
-			a_offset], lda, &rwork[nrwork]);
-		clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-
-/*
-                Multiply Q in U by real matrix RWORK(IRU), storing the
-                result in A, copying to U
-                (CWorkspace: need 0)
-                (Rworkspace: need N*N+2*M*N)
-*/
-
-		nrwork = irvt;
-		clacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset],
-			 lda, &rwork[nrwork]);
-		clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
-	    } else {
-
-/*
-                Copy A to VT, generate P**H
-                (Cworkspace: need 2*N, prefer N+N*NB)
-                (Rworkspace: 0)
-*/
-
-		clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-		i__1 = *lwork - nwork + 1;
-		cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
-			work[nwork], &i__1, &ierr);
-
-/*
-                Copy A to U, generate Q
-                (Cworkspace: need 2*N, prefer N+N*NB)
-                (Rworkspace: 0)
-*/
-
-		clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
-		i__1 = *lwork - nwork + 1;
-		cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
-			nwork], &i__1, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		iru = nrwork;
-		irvt = iru + *n * *n;
-		nrwork = irvt + *n * *n;
-		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
-			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Multiply real matrix RWORK(IRVT) by P**H in VT,
-                storing the result in A, copying to VT
-                (Cworkspace: need 0)
-                (Rworkspace: need 3*N*N)
-*/
-
-		clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[
-			a_offset], lda, &rwork[nrwork]);
-		clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-
-/*
-                Multiply Q in U by real matrix RWORK(IRU), storing the
-                result in A, copying to U
-                (CWorkspace: 0)
-                (Rworkspace: need 3*N*N)
-*/
-
-		nrwork = irvt;
-		clacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset],
-			 lda, &rwork[nrwork]);
-		clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
-	    }
-
-	} else {
-
-/*
-             M .LT. MNTHR2
-
-             Path 6 (M at least N, but not much larger)
-             Reduce to bidiagonal form without QR decomposition
-             Use CUNMBR to compute singular vectors
-*/
-
-	    ie = 1;
-	    nrwork = ie + *n;
-	    itauq = 1;
-	    itaup = itauq + *n;
-	    nwork = itaup + *n;
-
-/*
-             Bidiagonalize A
-             (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
-             (RWorkspace: need N)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
-		    &work[itaup], &work[nwork], &i__1, &ierr);
-	    if (wntqn) {
-
-/*
-                Compute singular values only
-                (Cworkspace: 0)
-                (Rworkspace: need BDSPAC)
-*/
-
-		sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
-			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
-	    } else if (wntqo) {
-		iu = nwork;
-		iru = nrwork;
-		irvt = iru + *n * *n;
-		nrwork = irvt + *n * *n;
-		if (*lwork >= *m * *n + *n * 3) {
-
-/*                 WORK( IU ) is M by N */
-
-		    ldwrku = *m;
-		} else {
-
-/*                 WORK( IU ) is LDWRKU by N */
-
-		    ldwrku = (*lwork - *n * 3) / *n;
-		}
-		nwork = iu + ldwrku * *n;
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
-			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Copy real matrix RWORK(IRVT) to complex matrix VT
-                Overwrite VT by right singular vectors of A
-                (Cworkspace: need 2*N, prefer N+N*NB)
-                (Rworkspace: need 0)
-*/
-
-		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
-		i__1 = *lwork - nwork + 1;
-		cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-
-		if (*lwork >= *m * *n + *n * 3) {
-
-/*
-                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
-                Overwrite WORK(IU) by left singular vectors of A, copying
-                to A
-                (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB)
-                (Rworkspace: need 0)
-*/
-
-		    claset_("F", m, n, &c_b55, &c_b55, &work[iu], &ldwrku);
-		    clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
-		    i__1 = *lwork - nwork + 1;
-		    cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
-			    itauq], &work[iu], &ldwrku, &work[nwork], &i__1, &
-			    ierr);
-		    clacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
-		} else {
-
-/*
-                   Generate Q in A
-                   (Cworkspace: need 2*N, prefer N+N*NB)
-                   (Rworkspace: need 0)
-*/
-
-		    i__1 = *lwork - nwork + 1;
-		    cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
-			    work[nwork], &i__1, &ierr);
-
-/*
-                   Multiply Q in A by real matrix RWORK(IRU), storing the
-                   result in WORK(IU), copying to A
-                   (CWorkspace: need N*N, prefer M*N)
-                   (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
-*/
-
-		    nrwork = irvt;
-		    i__1 = *m;
-		    i__2 = ldwrku;
-		    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
-			     i__2) {
-/* Computing MIN */
-			i__3 = *m - i__ + 1;
-			chunk = min(i__3,ldwrku);
-			clacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru],
-				 n, &work[iu], &ldwrku, &rwork[nrwork]);
-			clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ +
-				a_dim1], lda);
-/* L30: */
-		    }
-		}
-
-	    } else if (wntqs) {
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		iru = nrwork;
-		irvt = iru + *n * *n;
-		nrwork = irvt + *n * *n;
-		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
-			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Copy real matrix RWORK(IRU) to complex matrix U
-                Overwrite U by left singular vectors of A
-                (CWorkspace: need 3*N, prefer 2*N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		claset_("F", m, n, &c_b55, &c_b55, &u[u_offset], ldu);
-		clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
-		i__2 = *lwork - nwork + 1;
-		cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
-
-/*
-                Copy real matrix RWORK(IRVT) to complex matrix VT
-                Overwrite VT by right singular vectors of A
-                (CWorkspace: need 3*N, prefer 2*N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
-		i__2 = *lwork - nwork + 1;
-		cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
-			ierr);
-	    } else {
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		iru = nrwork;
-		irvt = iru + *n * *n;
-		nrwork = irvt + *n * *n;
-		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
-			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*              Set the right corner of U to identity matrix */
-
-		claset_("F", m, m, &c_b55, &c_b55, &u[u_offset], ldu);
-		i__2 = *m - *n;
-		i__1 = *m - *n;
-		claset_("F", &i__2, &i__1, &c_b55, &c_b56, &u[*n + 1 + (*n +
-			1) * u_dim1], ldu);
-
-/*
-                Copy real matrix RWORK(IRU) to complex matrix U
-                Overwrite U by left singular vectors of A
-                (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
-		i__2 = *lwork - nwork + 1;
-		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
-
-/*
-                Copy real matrix RWORK(IRVT) to complex matrix VT
-                Overwrite VT by right singular vectors of A
-                (CWorkspace: need 3*N, prefer 2*N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
-		i__2 = *lwork - nwork + 1;
-		cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
-			ierr);
-	    }
-
-	}
-
-    } else {
-
-/*
-          A has more columns than rows. If A has sufficiently more
-          columns than rows, first reduce using the LQ decomposition
-          (if sufficient workspace available)
-*/
-
-	if (*n >= mnthr1) {
-
-	    if (wntqn) {
-
-/*
-                Path 1t (N much larger than M, JOBZ='N')
-                No singular vectors to be computed
-*/
-
-		itau = 1;
-		nwork = itau + *m;
-
-/*
-                Compute A=L*Q
-                (CWorkspace: need 2*M, prefer M+M*NB)
-                (RWorkspace: 0)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__2, &ierr);
-
-/*              Zero out above L */
-
-		i__2 = *m - 1;
-		i__1 = *m - 1;
-		claset_("U", &i__2, &i__1, &c_b55, &c_b55, &a[((a_dim1) << (1)
-			) + 1], lda);
-		ie = 1;
-		itauq = 1;
-		itaup = itauq + *m;
-		nwork = itaup + *m;
-
-/*
-                Bidiagonalize L in A
-                (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
-                (RWorkspace: need M)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
-		nrwork = ie + *m;
-
-/*
-                Perform bidiagonal SVD, compute singular values only
-                (CWorkspace: 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		sbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
-			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
-
-	    } else if (wntqo) {
-
-/*
-                Path 2t (N much larger than M, JOBZ='O')
-                M right singular vectors to be overwritten on A and
-                M left singular vectors to be computed in U
-*/
-
-		ivt = 1;
-		ldwkvt = *m;
-
-/*              WORK(IVT) is M by M */
-
-		il = ivt + ldwkvt * *m;
-		if (*lwork >= *m * *n + *m * *m + *m * 3) {
-
-/*                 WORK(IL) M by N */
-
-		    ldwrkl = *m;
-		    chunk = *n;
-		} else {
-
-/*                 WORK(IL) is M by CHUNK */
-
-		    ldwrkl = *m;
-		    chunk = (*lwork - *m * *m - *m * 3) / *m;
-		}
-		itau = il + ldwrkl * chunk;
-		nwork = itau + *m;
-
-/*
-                Compute A=L*Q
-                (CWorkspace: need 2*M, prefer M+M*NB)
-                (RWorkspace: 0)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__2, &ierr);
-
-/*              Copy L to WORK(IL), zeroing about above it */
-
-		clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
-		i__2 = *m - 1;
-		i__1 = *m - 1;
-		claset_("U", &i__2, &i__1, &c_b55, &c_b55, &work[il + ldwrkl],
-			 &ldwrkl);
-
-/*
-                Generate Q in A
-                (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
-                (RWorkspace: 0)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
-			 &i__2, &ierr);
-		ie = 1;
-		itauq = itau;
-		itaup = itauq + *m;
-		nwork = itaup + *m;
-
-/*
-                Bidiagonalize L in WORK(IL)
-                (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
-                (RWorkspace: need M)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		cgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		iru = ie + *m;
-		irvt = iru + *m * *m;
-		nrwork = irvt + *m * *m;
-		sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
-			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
-                Overwrite WORK(IU) by the left singular vectors of L
-                (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
-		i__2 = *lwork - nwork + 1;
-		cunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
-
-/*
-                Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
-                Overwrite WORK(IVT) by the right singular vectors of L
-                (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt);
-		i__2 = *lwork - nwork + 1;
-		cunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[
-			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
-			ierr);
-
-/*
-                Multiply right singular vectors of L in WORK(IL) by Q
-                in A, storing result in WORK(IL) and copying to A
-                (CWorkspace: need 2*M*M, prefer M*M+M*N))
-                (RWorkspace: 0)
-*/
-
-		i__2 = *n;
-		i__1 = chunk;
-		for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
-			i__1) {
-/* Computing MIN */
-		    i__3 = *n - i__ + 1;
-		    blk = min(i__3,chunk);
-		    cgemm_("N", "N", m, &blk, m, &c_b56, &work[ivt], m, &a[
-			    i__ * a_dim1 + 1], lda, &c_b55, &work[il], &
-			    ldwrkl);
-		    clacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
-			    + 1], lda);
-/* L40: */
-		}
-
-	    } else if (wntqs) {
-
-/*
-               Path 3t (N much larger than M, JOBZ='S')
-               M right singular vectors to be computed in VT and
-               M left singular vectors to be computed in U
-*/
-
-		il = 1;
-
-/*              WORK(IL) is M by M */
-
-		ldwrkl = *m;
-		itau = il + ldwrkl * *m;
-		nwork = itau + *m;
-
-/*
-                Compute A=L*Q
-                (CWorkspace: need 2*M, prefer M+M*NB)
-                (RWorkspace: 0)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__1, &ierr);
-
-/*              Copy L to WORK(IL), zeroing out above it */
-
-		clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		claset_("U", &i__1, &i__2, &c_b55, &c_b55, &work[il + ldwrkl],
-			 &ldwrkl);
-
-/*
-                Generate Q in A
-                (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
-                (RWorkspace: 0)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
-			 &i__1, &ierr);
-		ie = 1;
-		itauq = itau;
-		itaup = itauq + *m;
-		nwork = itaup + *m;
-
-/*
-                Bidiagonalize L in WORK(IL)
-                (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
-                (RWorkspace: need M)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		cgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		iru = ie + *m;
-		irvt = iru + *m * *m;
-		nrwork = irvt + *m * *m;
-		sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
-			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Copy real matrix RWORK(IRU) to complex matrix U
-                Overwrite U by left singular vectors of L
-                (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
-		i__1 = *lwork - nwork + 1;
-		cunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-
-/*
-                Copy real matrix RWORK(IRVT) to complex matrix VT
-                Overwrite VT by left singular vectors of L
-                (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
-		i__1 = *lwork - nwork + 1;
-		cunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-
-/*
-                Copy VT to WORK(IL), multiply right singular vectors of L
-                in WORK(IL) by Q in A, storing result in VT
-                (CWorkspace: need M*M)
-                (RWorkspace: 0)
-*/
-
-		clacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
-		cgemm_("N", "N", m, n, m, &c_b56, &work[il], &ldwrkl, &a[
-			a_offset], lda, &c_b55, &vt[vt_offset], ldvt);
-
-	    } else if (wntqa) {
-
-/*
-                Path 9t (N much larger than M, JOBZ='A')
-                N right singular vectors to be computed in VT and
-                M left singular vectors to be computed in U
-*/
-
-		ivt = 1;
-
-/*              WORK(IVT) is M by M */
-
-		ldwkvt = *m;
-		itau = ivt + ldwkvt * *m;
-		nwork = itau + *m;
-
-/*
-                Compute A=L*Q, copying result to VT
-                (CWorkspace: need 2*M, prefer M+M*NB)
-                (RWorkspace: 0)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__1, &ierr);
-		clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-
-/*
-                Generate Q in VT
-                (CWorkspace: need M+N, prefer M+N*NB)
-                (RWorkspace: 0)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		cunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
-			nwork], &i__1, &ierr);
-
-/*              Produce L in A, zeroing out above it */
-
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		claset_("U", &i__1, &i__2, &c_b55, &c_b55, &a[((a_dim1) << (1)
-			) + 1], lda);
-		ie = 1;
-		itauq = itau;
-		itaup = itauq + *m;
-		nwork = itaup + *m;
-
-/*
-                Bidiagonalize L in A
-                (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
-                (RWorkspace: need M)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		iru = ie + *m;
-		irvt = iru + *m * *m;
-		nrwork = irvt + *m * *m;
-		sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
-			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Copy real matrix RWORK(IRU) to complex matrix U
-                Overwrite U by left singular vectors of L
-                (CWorkspace: need 3*M, prefer 2*M+M*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
-		i__1 = *lwork - nwork + 1;
-		cunmbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-
-/*
-                Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
-                Overwrite WORK(IVT) by right singular vectors of L
-                (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
-                (RWorkspace: 0)
-*/
-
-		clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt);
-		i__1 = *lwork - nwork + 1;
-		cunmbr_("P", "R", "C", m, m, m, &a[a_offset], lda, &work[
-			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__1, &
-			ierr);
-
-/*
-                Multiply right singular vectors of L in WORK(IVT) by
-                Q in VT, storing result in A
-                (CWorkspace: need M*M)
-                (RWorkspace: 0)
-*/
-
-		cgemm_("N", "N", m, n, m, &c_b56, &work[ivt], &ldwkvt, &vt[
-			vt_offset], ldvt, &c_b55, &a[a_offset], lda);
-
-/*              Copy right singular vectors of A from A to VT */
-
-		clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-
-	    }
-
-	} else if (*n >= mnthr2) {
-
-/*
-             MNTHR2 <= N < MNTHR1
-
-             Path 5t (N much larger than M, but not as much as MNTHR1)
-             Reduce to bidiagonal form without QR decomposition, use
-             CUNGBR and matrix multiplication to compute singular vectors
-*/
-
-
-	    ie = 1;
-	    nrwork = ie + *m;
-	    itauq = 1;
-	    itaup = itauq + *m;
-	    nwork = itaup + *m;
-
-/*
-             Bidiagonalize A
-             (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
-             (RWorkspace: M)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
-		    &work[itaup], &work[nwork], &i__1, &ierr);
-
-	    if (wntqn) {
-
-/*
-                Compute singular values only
-                (Cworkspace: 0)
-                (Rworkspace: need BDSPAC)
-*/
-
-		sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
-			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
-	    } else if (wntqo) {
-		irvt = nrwork;
-		iru = irvt + *m * *m;
-		nrwork = iru + *m * *m;
-		ivt = nwork;
-
-/*
-                Copy A to U, generate Q
-                (Cworkspace: need 2*M, prefer M+M*NB)
-                (Rworkspace: 0)
-*/
-
-		clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
-		i__1 = *lwork - nwork + 1;
-		cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
-			nwork], &i__1, &ierr);
-
-/*
-                Generate P**H in A
-                (Cworkspace: need 2*M, prefer M+M*NB)
-                (Rworkspace: 0)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
-			nwork], &i__1, &ierr);
-
-		ldwkvt = *m;
-		if (*lwork >= *m * *n + *m * 3) {
-
-/*                 WORK( IVT ) is M by N */
-
-		    nwork = ivt + ldwkvt * *n;
-		    chunk = *n;
-		} else {
-
-/*                 WORK( IVT ) is M by CHUNK */
-
-		    chunk = (*lwork - *m * 3) / *m;
-		    nwork = ivt + ldwkvt * chunk;
-		}
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
-			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Multiply Q in U by real matrix RWORK(IRVT)
-                storing the result in WORK(IVT), copying to U
-                (Cworkspace: need 0)
-                (Rworkspace: need 2*M*M)
-*/
-
-		clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &work[ivt], &
-			ldwkvt, &rwork[nrwork]);
-		clacpy_("F", m, m, &work[ivt], &ldwkvt, &u[u_offset], ldu);
-
-/*
-                Multiply RWORK(IRVT) by P**H in A, storing the
-                result in WORK(IVT), copying to A
-                (CWorkspace: need M*M, prefer M*N)
-                (Rworkspace: need 2*M*M, prefer 2*M*N)
-*/
-
-		nrwork = iru;
-		i__1 = *n;
-		i__2 = chunk;
-		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
-			i__2) {
-/* Computing MIN */
-		    i__3 = *n - i__ + 1;
-		    blk = min(i__3,chunk);
-		    clarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1],
-			    lda, &work[ivt], &ldwkvt, &rwork[nrwork]);
-		    clacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ *
-			    a_dim1 + 1], lda);
-/* L50: */
-		}
-	    } else if (wntqs) {
-
-/*
-                Copy A to U, generate Q
-                (Cworkspace: need 2*M, prefer M+M*NB)
-                (Rworkspace: 0)
-*/
-
-		clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
-		i__2 = *lwork - nwork + 1;
-		cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
-			nwork], &i__2, &ierr);
-
-/*
-                Copy A to VT, generate P**H
-                (Cworkspace: need 2*M, prefer M+M*NB)
-                (Rworkspace: 0)
-*/
-
-		clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-		i__2 = *lwork - nwork + 1;
-		cungbr_("P", m, n, m, &vt[vt_offset], ldvt, &work[itaup], &
-			work[nwork], &i__2, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		irvt = nrwork;
-		iru = irvt + *m * *m;
-		nrwork = iru + *m * *m;
-		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
-			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Multiply Q in U by real matrix RWORK(IRU), storing the
-                result in A, copying to U
-                (CWorkspace: need 0)
-                (Rworkspace: need 3*M*M)
-*/
-
-		clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset],
-			 lda, &rwork[nrwork]);
-		clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
-
-/*
-                Multiply real matrix RWORK(IRVT) by P**H in VT,
-                storing the result in A, copying to VT
-                (Cworkspace: need 0)
-                (Rworkspace: need M*M+2*M*N)
-*/
-
-		nrwork = iru;
-		clarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[
-			a_offset], lda, &rwork[nrwork]);
-		clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-	    } else {
-
-/*
-                Copy A to U, generate Q
-                (Cworkspace: need 2*M, prefer M+M*NB)
-                (Rworkspace: 0)
-*/
-
-		clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
-		i__2 = *lwork - nwork + 1;
-		cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
-			nwork], &i__2, &ierr);
-
-/*
-                Copy A to VT, generate P**H
-                (Cworkspace: need 2*M, prefer M+M*NB)
-                (Rworkspace: 0)
-*/
-
-		clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-		i__2 = *lwork - nwork + 1;
-		cungbr_("P", n, n, m, &vt[vt_offset], ldvt, &work[itaup], &
-			work[nwork], &i__2, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		irvt = nrwork;
-		iru = irvt + *m * *m;
-		nrwork = iru + *m * *m;
-		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
-			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Multiply Q in U by real matrix RWORK(IRU), storing the
-                result in A, copying to U
-                (CWorkspace: need 0)
-                (Rworkspace: need 3*M*M)
-*/
-
-		clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset],
-			 lda, &rwork[nrwork]);
-		clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
-
-/*
-                Multiply real matrix RWORK(IRVT) by P**H in VT,
-                storing the result in A, copying to VT
-                (Cworkspace: need 0)
-                (Rworkspace: need M*M+2*M*N)
-*/
-
-		clarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[
-			a_offset], lda, &rwork[nrwork]);
-		clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-	    }
-
-	} else {
-
-/*
-             N .LT. MNTHR2
-
-             Path 6t (N greater than M, but not much larger)
-             Reduce to bidiagonal form without LQ decomposition
-             Use CUNMBR to compute singular vectors
-*/
-
-	    ie = 1;
-	    nrwork = ie + *m;
-	    itauq = 1;
-	    itaup = itauq + *m;
-	    nwork = itaup + *m;
-
-/*
-             Bidiagonalize A
-             (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
-             (RWorkspace: M)
-*/
-
-	    i__2 = *lwork - nwork + 1;
-	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
-		    &work[itaup], &work[nwork], &i__2, &ierr);
-	    if (wntqn) {
-
-/*
-                Compute singular values only
-                (Cworkspace: 0)
-                (Rworkspace: need BDSPAC)
-*/
-
-		sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
-			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
-	    } else if (wntqo) {
-		ldwkvt = *m;
-		ivt = nwork;
-		if (*lwork >= *m * *n + *m * 3) {
-
-/*                 WORK( IVT ) is M by N */
-
-		    claset_("F", m, n, &c_b55, &c_b55, &work[ivt], &ldwkvt);
-		    nwork = ivt + ldwkvt * *n;
-		} else {
-
-/*                 WORK( IVT ) is M by CHUNK */
-
-		    chunk = (*lwork - *m * 3) / *m;
-		    nwork = ivt + ldwkvt * chunk;
-		}
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		irvt = nrwork;
-		iru = irvt + *m * *m;
-		nrwork = iru + *m * *m;
-		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
-			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Copy real matrix RWORK(IRU) to complex matrix U
-                Overwrite U by left singular vectors of A
-                (Cworkspace: need 2*M, prefer M+M*NB)
-                (Rworkspace: need 0)
-*/
-
-		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
-		i__2 = *lwork - nwork + 1;
-		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
-
-		if (*lwork >= *m * *n + *m * 3) {
-
-/*
-                Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
-                Overwrite WORK(IVT) by right singular vectors of A,
-                copying to A
-                (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB)
-                (Rworkspace: need 0)
-*/
-
-		    clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt);
-		    i__2 = *lwork - nwork + 1;
-		    cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[
-			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2,
-			    &ierr);
-		    clacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
-		} else {
-
-/*
-                   Generate P**H in A
-                   (Cworkspace: need 2*M, prefer M+M*NB)
-                   (Rworkspace: need 0)
-*/
-
-		    i__2 = *lwork - nwork + 1;
-		    cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
-			    work[nwork], &i__2, &ierr);
-
-/*
-                   Multiply Q in A by real matrix RWORK(IRU), storing the
-                   result in WORK(IU), copying to A
-                   (CWorkspace: need M*M, prefer M*N)
-                   (Rworkspace: need 3*M*M, prefer M*M+2*M*N)
-*/
-
-		    nrwork = iru;
-		    i__2 = *n;
-		    i__1 = chunk;
-		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
-			     i__1) {
-/* Computing MIN */
-			i__3 = *n - i__ + 1;
-			blk = min(i__3,chunk);
-			clarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1]
-				, lda, &work[ivt], &ldwkvt, &rwork[nrwork]);
-			clacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ *
-				a_dim1 + 1], lda);
-/* L60: */
-		    }
-		}
-	    } else if (wntqs) {
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		irvt = nrwork;
-		iru = irvt + *m * *m;
-		nrwork = iru + *m * *m;
-		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
-			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Copy real matrix RWORK(IRU) to complex matrix U
-                Overwrite U by left singular vectors of A
-                (CWorkspace: need 3*M, prefer 2*M+M*NB)
-                (RWorkspace: M*M)
-*/
-
-		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
-		i__1 = *lwork - nwork + 1;
-		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-
-/*
-                Copy real matrix RWORK(IRVT) to complex matrix VT
-                Overwrite VT by right singular vectors of A
-                (CWorkspace: need 3*M, prefer 2*M+M*NB)
-                (RWorkspace: M*M)
-*/
-
-		claset_("F", m, n, &c_b55, &c_b55, &vt[vt_offset], ldvt);
-		clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
-		i__1 = *lwork - nwork + 1;
-		cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-	    } else {
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in RWORK(IRU) and computing right
-                singular vectors of bidiagonal matrix in RWORK(IRVT)
-                (CWorkspace: need 0)
-                (RWorkspace: need BDSPAC)
-*/
-
-		irvt = nrwork;
-		iru = irvt + *m * *m;
-		nrwork = iru + *m * *m;
-
-		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
-			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
-			info);
-
-/*
-                Copy real matrix RWORK(IRU) to complex matrix U
-                Overwrite U by left singular vectors of A
-                (CWorkspace: need 3*M, prefer 2*M+M*NB)
-                (RWorkspace: M*M)
-*/
-
-		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
-		i__1 = *lwork - nwork + 1;
-		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-
-/*              Set the right corner of VT to identity matrix */
-
-		i__1 = *n - *m;
-		i__2 = *n - *m;
-		claset_("F", &i__1, &i__2, &c_b55, &c_b56, &vt[*m + 1 + (*m +
-			1) * vt_dim1], ldvt);
-
-/*
-                Copy real matrix RWORK(IRVT) to complex matrix VT
-                Overwrite VT by right singular vectors of A
-                (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
-                (RWorkspace: M*M)
-*/
-
-		claset_("F", n, n, &c_b55, &c_b55, &vt[vt_offset], ldvt);
-		clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
-		i__1 = *lwork - nwork + 1;
-		cunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-	    }
-
-	}
-
-    }
-
-/*     Undo scaling if necessary */
-
-    if (iscl == 1) {
-	if (anrm > bignum) {
-	    slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
-		    minmn, &ierr);
-	}
-	if (anrm < smlnum) {
-	    slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
-		    minmn, &ierr);
-	}
-    }
-
-/*     Return optimal workspace in WORK(1) */
-
-    work[1].r = (real) maxwrk, work[1].i = 0.f;
-
-    return 0;
-
-/*     End of CGESDD */
-
-} /* cgesdd_ */
-
-/* Subroutine */ int cgesv_(integer *n, integer *nrhs, complex *a, integer *
-	lda, integer *ipiv, complex *b, integer *ldb, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
-
-    /* Local variables */
-    extern /* Subroutine */ int cgetrf_(integer *, integer *, complex *,
-	    integer *, integer *, integer *), xerbla_(char *, integer *), cgetrs_(char *, integer *, integer *, complex *, integer
-	    *, integer *, complex *, integer *, integer *);
-
-
-/*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
-
-
-    Purpose
-    =======
-
-    CGESV computes the solution to a complex system of linear equations
-       A * X = B,
-    where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
-
-    The LU decomposition with partial pivoting and row interchanges is
-    used to factor A as
-       A = P * L * U,
-    where P is a permutation matrix, L is unit lower triangular, and U is
-    upper triangular.  The factored form of A is then used to solve the
-    system of equations A * X = B.
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The number of linear equations, i.e., the order of the
-            matrix A.  N >= 0.
-
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrix B.  NRHS >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the N-by-N coefficient matrix A.
-            On exit, the factors L and U from the factorization
-            A = P*L*U; the unit diagonal elements of L are not stored.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    IPIV    (output) INTEGER array, dimension (N)
-            The pivot indices that define the permutation matrix P;
-            row i of the matrix was interchanged with row IPIV(i).
-
-    B       (input/output) COMPLEX array, dimension (LDB,NRHS)
-            On entry, the N-by-NRHS matrix of right hand side matrix B.
-            On exit, if INFO = 0, the N-by-NRHS solution matrix X.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
-                  has been completed, but the factor U is exactly
-                  singular, so the solution could not be computed.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    *info = 0;
-    if (*n < 0) {
-	*info = -1;
-    } else if (*nrhs < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    } else if (*ldb < max(1,*n)) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGESV ", &i__1);
-	return 0;
-    }
-
-/*     Compute the LU factorization of A. */
-
-    cgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
-    if (*info == 0) {
-
-/*        Solve the system A*X = B, overwriting B with X. */
-
-	cgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
-		b_offset], ldb, info);
-    }
-    return 0;
-
-/*     End of CGESV */
-
-} /* cgesv_ */
-
-/* Subroutine */ int cgetf2_(integer *m, integer *n, complex *a, integer *lda,
-	 integer *ipiv, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    complex q__1;
-
-    /* Builtin functions */
-    void c_div(complex *, complex *, complex *);
-
-    /* Local variables */
-    static integer j, jp;
-    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *), cgeru_(integer *, integer *, complex *, complex *,
-	    integer *, complex *, integer *, complex *, integer *), cswap_(
-	    integer *, complex *, integer *, complex *, integer *);
-    extern integer icamax_(integer *, complex *, integer *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CGETF2 computes an LU factorization of a general m-by-n matrix A
-    using partial pivoting with row interchanges.
-
-    The factorization has the form
-       A = P * L * U
-    where P is a permutation matrix, L is lower triangular with unit
-    diagonal elements (lower trapezoidal if m > n), and U is upper
-    triangular (upper trapezoidal if m < n).
-
-    This is the right-looking Level 2 BLAS version of the algorithm.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the m by n matrix to be factored.
-            On exit, the factors L and U from the factorization
-            A = P*L*U; the unit diagonal elements of L are not stored.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    IPIV    (output) INTEGER array, dimension (min(M,N))
-            The pivot indices; for 1 <= i <= min(M,N), row i of the
-            matrix was interchanged with row IPIV(i).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-            > 0: if INFO = k, U(k,k) is exactly zero. The factorization
-                 has been completed, but the factor U is exactly
-                 singular, and division by zero will occur if it is used
-                 to solve a system of equations.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGETF2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*m == 0) || (*n == 0)) {
-	return 0;
-    }
-
-    i__1 = min(*m,*n);
-    for (j = 1; j <= i__1; ++j) {
-
-/*        Find pivot and test for singularity. */
-
-	i__2 = *m - j + 1;
-	jp = j - 1 + icamax_(&i__2, &a[j + j * a_dim1], &c__1);
-	ipiv[j] = jp;
-	i__2 = jp + j * a_dim1;
-	if ((a[i__2].r != 0.f) || (a[i__2].i != 0.f)) {
-
-/*           Apply the interchange to columns 1:N. */
-
-	    if (jp != j) {
-		cswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
-	    }
-
-/*           Compute elements J+1:M of J-th column. */
-
-	    if (j < *m) {
-		i__2 = *m - j;
-		c_div(&q__1, &c_b56, &a[j + j * a_dim1]);
-		cscal_(&i__2, &q__1, &a[j + 1 + j * a_dim1], &c__1);
-	    }
-
-	} else if (*info == 0) {
-
-	    *info = j;
-	}
-
-	if (j < min(*m,*n)) {
-
-/*           Update trailing submatrix. */
-
-	    i__2 = *m - j;
-	    i__3 = *n - j;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    cgeru_(&i__2, &i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, &a[j +
-		    (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda)
-		    ;
-	}
-/* L10: */
-    }
-    return 0;
-
-/*     End of CGETF2 */
-
-} /* cgetf2_ */
-
-/* Subroutine */ int cgetrf_(integer *m, integer *n, complex *a, integer *lda,
-	 integer *ipiv, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1;
-
-    /* Local variables */
-    static integer i__, j, jb, nb;
-    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
-	    integer *, complex *, complex *, integer *, complex *, integer *,
-	    complex *, complex *, integer *);
-    static integer iinfo;
-    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
-	    integer *, integer *, complex *, complex *, integer *, complex *,
-	    integer *), cgetf2_(integer *,
-	    integer *, complex *, integer *, integer *, integer *), xerbla_(
-	    char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int claswp_(integer *, complex *, integer *,
-	    integer *, integer *, integer *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CGETRF computes an LU factorization of a general M-by-N matrix A
-    using partial pivoting with row interchanges.
-
-    The factorization has the form
-       A = P * L * U
-    where P is a permutation matrix, L is lower triangular with unit
-    diagonal elements (lower trapezoidal if m > n), and U is upper
-    triangular (upper trapezoidal if m < n).
-
-    This is the right-looking Level 3 BLAS version of the algorithm.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the M-by-N matrix to be factored.
-            On exit, the factors L and U from the factorization
-            A = P*L*U; the unit diagonal elements of L are not stored.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    IPIV    (output) INTEGER array, dimension (min(M,N))
-            The pivot indices; for 1 <= i <= min(M,N), row i of the
-            matrix was interchanged with row IPIV(i).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
-                  has been completed, but the factor U is exactly
-                  singular, and division by zero will occur if it is used
-                  to solve a system of equations.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGETRF", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*m == 0) || (*n == 0)) {
-	return 0;
-    }
-
-/*     Determine the block size for this environment. */
-
-    nb = ilaenv_(&c__1, "CGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
-	    1);
-    if ((nb <= 1) || (nb >= min(*m,*n))) {
-
-/*        Use unblocked code. */
-
-	cgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
-    } else {
-
-/*        Use blocked code. */
-
-	i__1 = min(*m,*n);
-	i__2 = nb;
-	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
-/* Computing MIN */
-	    i__3 = min(*m,*n) - j + 1;
-	    jb = min(i__3,nb);
-
-/*
-             Factor diagonal and subdiagonal blocks and test for exact
-             singularity.
-*/
-
-	    i__3 = *m - j + 1;
-	    cgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
-
-/*           Adjust INFO and the pivot indices. */
-
-	    if (*info == 0 && iinfo > 0) {
-		*info = iinfo + j - 1;
-	    }
-/* Computing MIN */
-	    i__4 = *m, i__5 = j + jb - 1;
-	    i__3 = min(i__4,i__5);
-	    for (i__ = j; i__ <= i__3; ++i__) {
-		ipiv[i__] = j - 1 + ipiv[i__];
-/* L10: */
-	    }
-
-/*           Apply interchanges to columns 1:J-1. */
-
-	    i__3 = j - 1;
-	    i__4 = j + jb - 1;
-	    claswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
-
-	    if (j + jb <= *n) {
-
-/*              Apply interchanges to columns J+JB:N. */
-
-		i__3 = *n - j - jb + 1;
-		i__4 = j + jb - 1;
-		claswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
-			ipiv[1], &c__1);
-
-/*              Compute block row of U. */
-
-		i__3 = *n - j - jb + 1;
-		ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
-			c_b56, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
-			a_dim1], lda);
-		if (j + jb <= *m) {
-
-/*                 Update trailing submatrix. */
-
-		    i__3 = *m - j - jb + 1;
-		    i__4 = *n - j - jb + 1;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
-			    &q__1, &a[j + jb + j * a_dim1], lda, &a[j + (j +
-			    jb) * a_dim1], lda, &c_b56, &a[j + jb + (j + jb) *
-			     a_dim1], lda);
-		}
-	    }
-/* L20: */
-	}
-    }
-    return 0;
-
-/*     End of CGETRF */
-
-} /* cgetrf_ */
-
-/* Subroutine */ int cgetrs_(char *trans, integer *n, integer *nrhs, complex *
-	a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer *
-	info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
-
-    /* Local variables */
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
-	    integer *, integer *, complex *, complex *, integer *, complex *,
-	    integer *), xerbla_(char *,
-	    integer *), claswp_(integer *, complex *, integer *,
-	    integer *, integer *, integer *, integer *);
-    static logical notran;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CGETRS solves a system of linear equations
-       A * X = B,  A**T * X = B,  or  A**H * X = B
-    with a general N-by-N matrix A using the LU factorization computed
-    by CGETRF.
-
-    Arguments
-    =========
-
-    TRANS   (input) CHARACTER*1
-            Specifies the form of the system of equations:
-            = 'N':  A * X = B     (No transpose)
-            = 'T':  A**T * X = B  (Transpose)
-            = 'C':  A**H * X = B  (Conjugate transpose)
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrix B.  NRHS >= 0.
-
-    A       (input) COMPLEX array, dimension (LDA,N)
-            The factors L and U from the factorization A = P*L*U
-            as computed by CGETRF.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    IPIV    (input) INTEGER array, dimension (N)
-            The pivot indices from CGETRF; for 1<=i<=N, row i of the
-            matrix was interchanged with row IPIV(i).
-
-    B       (input/output) COMPLEX array, dimension (LDB,NRHS)
-            On entry, the right hand side matrix B.
-            On exit, the solution matrix X.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    *info = 0;
-    notran = lsame_(trans, "N");
-    if (! notran && ! lsame_(trans, "T") && ! lsame_(
-	    trans, "C")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*nrhs < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*ldb < max(1,*n)) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CGETRS", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*n == 0) || (*nrhs == 0)) {
-	return 0;
-    }
-
-    if (notran) {
-
-/*
-          Solve A * X = B.
-
-          Apply row interchanges to the right hand sides.
-*/
-
-	claswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
-
-/*        Solve L*X = B, overwriting B with X. */
-
-	ctrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b56, &a[
-		a_offset], lda, &b[b_offset], ldb);
-
-/*        Solve U*X = B, overwriting B with X. */
-
-	ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b56, &
-		a[a_offset], lda, &b[b_offset], ldb);
-    } else {
-
-/*
-          Solve A**T * X = B  or A**H * X = B.
-
-          Solve U'*X = B, overwriting B with X.
-*/
-
-	ctrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b56, &a[
-		a_offset], lda, &b[b_offset], ldb);
-
-/*        Solve L'*X = B, overwriting B with X. */
-
-	ctrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b56, &a[a_offset],
-		lda, &b[b_offset], ldb);
-
-/*        Apply row interchanges to the solution vectors. */
-
-	claswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
-    }
-
-    return 0;
-
-/*     End of CGETRS */
-
-} /* cgetrs_ */
-
-/* Subroutine */ int cheevd_(char *jobz, char *uplo, integer *n, complex *a,
-	integer *lda, real *w, complex *work, integer *lwork, real *rwork,
-	integer *lrwork, integer *iwork, integer *liwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    real r__1, r__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static real eps;
-    static integer inde;
-    static real anrm;
-    static integer imax;
-    static real rmin, rmax;
-    static integer lopt;
-    static real sigma;
-    extern logical lsame_(char *, char *);
-    static integer iinfo;
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
-    static integer lwmin, liopt;
-    static logical lower;
-    static integer llrwk, lropt;
-    static logical wantz;
-    static integer indwk2, llwrk2;
-    extern doublereal clanhe_(char *, char *, integer *, complex *, integer *,
-	     real *);
-    static integer iscale;
-    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *,
-	    integer *, complex *, integer *, real *, integer *, integer *,
-	    integer *, integer *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int chetrd_(char *, integer *, complex *, integer
-	    *, real *, real *, complex *, complex *, integer *, integer *), clacpy_(char *, integer *, integer *, complex *, integer
-	    *, complex *, integer *);
-    static real safmin;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static real bignum;
-    static integer indtau, indrwk, indwrk, liwmin;
-    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
-    static integer lrwmin;
-    extern /* Subroutine */ int cunmtr_(char *, char *, char *, integer *,
-	    integer *, complex *, integer *, complex *, complex *, integer *,
-	    complex *, integer *, integer *);
-    static integer llwork;
-    static real smlnum;
-    static logical lquery;
-
-
-/*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CHEEVD computes all eigenvalues and, optionally, eigenvectors of a
-    complex Hermitian matrix A.  If eigenvectors are desired, it uses a
-    divide and conquer algorithm.
-
-    The divide and conquer algorithm makes very mild assumptions about
-    floating point arithmetic. It will work on machines with a guard
-    digit in add/subtract, or on those binary machines without guard
-    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
-    Cray-2. It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
-
-    Arguments
-    =========
-
-    JOBZ    (input) CHARACTER*1
-            = 'N':  Compute eigenvalues only;
-            = 'V':  Compute eigenvalues and eigenvectors.
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA, N)
-            On entry, the Hermitian matrix A.  If UPLO = 'U', the
-            leading N-by-N upper triangular part of A contains the
-            upper triangular part of the matrix A.  If UPLO = 'L',
-            the leading N-by-N lower triangular part of A contains
-            the lower triangular part of the matrix A.
-            On exit, if JOBZ = 'V', then if INFO = 0, A contains the
-            orthonormal eigenvectors of the matrix A.
-            If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
-            or the upper triangle (if UPLO='U') of A, including the
-            diagonal, is destroyed.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    W       (output) REAL array, dimension (N)
-            If INFO = 0, the eigenvalues in ascending order.
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The length of the array WORK.
-            If N <= 1,                LWORK must be at least 1.
-            If JOBZ  = 'N' and N > 1, LWORK must be at least N + 1.
-            If JOBZ  = 'V' and N > 1, LWORK must be at least 2*N + N**2.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    RWORK   (workspace/output) REAL array,
-                                           dimension (LRWORK)
-            On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
-
-    LRWORK  (input) INTEGER
-            The dimension of the array RWORK.
-            If N <= 1,                LRWORK must be at least 1.
-            If JOBZ  = 'N' and N > 1, LRWORK must be at least N.
-            If JOBZ  = 'V' and N > 1, LRWORK must be at least
-                           1 + 5*N + 2*N**2.
-
-            If LRWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the RWORK array,
-            returns this value as the first entry of the RWORK array, and
-            no error message related to LRWORK is issued by XERBLA.
-
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
-            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
-
-    LIWORK  (input) INTEGER
-            The dimension of the array IWORK.
-            If N <= 1,                LIWORK must be at least 1.
-            If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
-            If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
-
-            If LIWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the IWORK array,
-            returns this value as the first entry of the IWORK array, and
-            no error message related to LIWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the algorithm failed to converge; i
-                  off-diagonal elements of an intermediate tridiagonal
-                  form did not converge to zero.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --w;
-    --work;
-    --rwork;
-    --iwork;
-
-    /* Function Body */
-    wantz = lsame_(jobz, "V");
-    lower = lsame_(uplo, "L");
-    lquery = ((*lwork == -1) || (*lrwork == -1)) || (*liwork == -1);
-
-    *info = 0;
-    if (*n <= 1) {
-	lwmin = 1;
-	lrwmin = 1;
-	liwmin = 1;
-	lopt = lwmin;
-	lropt = lrwmin;
-	liopt = liwmin;
-    } else {
-	if (wantz) {
-	    lwmin = ((*n) << (1)) + *n * *n;
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lrwmin = *n * 5 + 1 + ((i__1 * i__1) << (1));
-	    liwmin = *n * 5 + 3;
-	} else {
-	    lwmin = *n + 1;
-	    lrwmin = *n;
-	    liwmin = 1;
-	}
-	lopt = lwmin;
-	lropt = lrwmin;
-	liopt = liwmin;
-    }
-    if (! ((wantz) || (lsame_(jobz, "N")))) {
-	*info = -1;
-    } else if (! ((lower) || (lsame_(uplo, "U")))) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*lrwork < lrwmin && ! lquery) {
-	*info = -10;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
-	work[1].r = (real) lopt, work[1].i = 0.f;
-	rwork[1] = (real) lropt;
-	iwork[1] = liopt;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CHEEVD", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (*n == 1) {
-	i__1 = a_dim1 + 1;
-	w[1] = a[i__1].r;
-	if (wantz) {
-	    i__1 = a_dim1 + 1;
-	    a[i__1].r = 1.f, a[i__1].i = 0.f;
-	}
-	return 0;
-    }
-
-/*     Get machine constants. */
-
-    safmin = slamch_("Safe minimum");
-    eps = slamch_("Precision");
-    smlnum = safmin / eps;
-    bignum = 1.f / smlnum;
-    rmin = sqrt(smlnum);
-    rmax = sqrt(bignum);
-
-/*     Scale matrix to allowable range, if necessary. */
-
-    anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]);
-    iscale = 0;
-    if (anrm > 0.f && anrm < rmin) {
-	iscale = 1;
-	sigma = rmin / anrm;
-    } else if (anrm > rmax) {
-	iscale = 1;
-	sigma = rmax / anrm;
-    }
-    if (iscale == 1) {
-	clascl_(uplo, &c__0, &c__0, &c_b1011, &sigma, n, n, &a[a_offset], lda,
-		 info);
-    }
-
-/*     Call CHETRD to reduce Hermitian matrix to tridiagonal form. */
-
-    inde = 1;
-    indtau = 1;
-    indwrk = indtau + *n;
-    indrwk = inde + *n;
-    indwk2 = indwrk + *n * *n;
-    llwork = *lwork - indwrk + 1;
-    llwrk2 = *lwork - indwk2 + 1;
-    llrwk = *lrwork - indrwk + 1;
-    chetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], &
-	    work[indwrk], &llwork, &iinfo);
-/* Computing MAX */
-    i__1 = indwrk;
-    r__1 = (real) lopt, r__2 = (real) (*n) + work[i__1].r;
-    lopt = dmax(r__1,r__2);
-
-/*
-       For eigenvalues only, call SSTERF.  For eigenvectors, first call
-       CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
-       tridiagonal matrix, then call CUNMTR to multiply it to the
-       Householder transformations represented as Householder vectors in
-       A.
-*/
-
-    if (! wantz) {
-	ssterf_(n, &w[1], &rwork[inde], info);
-    } else {
-	cstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2],
-		&llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info);
-	cunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
-		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
-	clacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
-/*
-   Computing MAX
-   Computing 2nd power
-*/
-	i__3 = *n;
-	i__4 = indwk2;
-	i__1 = lopt, i__2 = *n + i__3 * i__3 + (integer) work[i__4].r;
-	lopt = max(i__1,i__2);
-    }
-
-/*     If matrix was scaled, then rescale eigenvalues appropriately. */
-
-    if (iscale == 1) {
-	if (*info == 0) {
-	    imax = *n;
-	} else {
-	    imax = *info - 1;
-	}
-	r__1 = 1.f / sigma;
-	sscal_(&imax, &r__1, &w[1], &c__1);
-    }
-
-    work[1].r = (real) lopt, work[1].i = 0.f;
-    rwork[1] = (real) lropt;
-    iwork[1] = liopt;
-
-    return 0;
-
-/*     End of CHEEVD */
-
-} /* cheevd_ */
-
-/* Subroutine */ int chetd2_(char *uplo, integer *n, complex *a, integer *lda,
-	 real *d__, real *e, complex *tau, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Local variables */
-    static integer i__;
-    static complex taui;
-    extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex *
-	    , integer *, complex *, integer *, complex *, integer *);
-    static complex alpha;
-    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
-	    *, complex *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex *
-	    , integer *, complex *, integer *, complex *, complex *, integer *
-	    ), caxpy_(integer *, complex *, complex *, integer *,
-	    complex *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int clarfg_(integer *, complex *, complex *,
-	    integer *, complex *), xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    CHETD2 reduces a complex Hermitian matrix A to real symmetric
-    tridiagonal form T by a unitary similarity transformation:
-    Q' * A * Q = T.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the upper or lower triangular part of the
-            Hermitian matrix A is stored:
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
-            n-by-n upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading n-by-n lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
-            On exit, if UPLO = 'U', the diagonal and first superdiagonal
-            of A are overwritten by the corresponding elements of the
-            tridiagonal matrix T, and the elements above the first
-            superdiagonal, with the array TAU, represent the unitary
-            matrix Q as a product of elementary reflectors; if UPLO
-            = 'L', the diagonal and first subdiagonal of A are over-
-            written by the corresponding elements of the tridiagonal
-            matrix T, and the elements below the first subdiagonal, with
-            the array TAU, represent the unitary matrix Q as a product
-            of elementary reflectors. See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    D       (output) REAL array, dimension (N)
-            The diagonal elements of the tridiagonal matrix T:
-            D(i) = A(i,i).
-
-    E       (output) REAL array, dimension (N-1)
-            The off-diagonal elements of the tridiagonal matrix T:
-            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
-
-    TAU     (output) COMPLEX array, dimension (N-1)
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    If UPLO = 'U', the matrix Q is represented as a product of elementary
-    reflectors
-
-       Q = H(n-1) . . . H(2) H(1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
-    A(1:i-1,i+1), and tau in TAU(i).
-
-    If UPLO = 'L', the matrix Q is represented as a product of elementary
-    reflectors
-
-       Q = H(1) H(2) . . . H(n-1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
-    and tau in TAU(i).
-
-    The contents of A on exit are illustrated by the following examples
-    with n = 5:
-
-    if UPLO = 'U':                       if UPLO = 'L':
-
-      (  d   e   v2  v3  v4 )              (  d                  )
-      (      d   e   v3  v4 )              (  e   d              )
-      (          d   e   v4 )              (  v1  e   d          )
-      (              d   e  )              (  v1  v2  e   d      )
-      (                  d  )              (  v1  v2  v3  e   d  )
-
-    where d and e denote diagonal and off-diagonal elements of T, and vi
-    denotes an element of the vector defining H(i).
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --d__;
-    --e;
-    --tau;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CHETD2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n <= 0) {
-	return 0;
-    }
-
-    if (upper) {
-
-/*        Reduce the upper triangle of A */
-
-	i__1 = *n + *n * a_dim1;
-	i__2 = *n + *n * a_dim1;
-	r__1 = a[i__2].r;
-	a[i__1].r = r__1, a[i__1].i = 0.f;
-	for (i__ = *n - 1; i__ >= 1; --i__) {
-
-/*
-             Generate elementary reflector H(i) = I - tau * v * v'
-             to annihilate A(1:i-1,i+1)
-*/
-
-	    i__1 = i__ + (i__ + 1) * a_dim1;
-	    alpha.r = a[i__1].r, alpha.i = a[i__1].i;
-	    clarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui);
-	    i__1 = i__;
-	    e[i__1] = alpha.r;
-
-	    if ((taui.r != 0.f) || (taui.i != 0.f)) {
-
-/*              Apply H(i) from both sides to A(1:i,1:i) */
-
-		i__1 = i__ + (i__ + 1) * a_dim1;
-		a[i__1].r = 1.f, a[i__1].i = 0.f;
-
-/*              Compute  x := tau * A * v  storing x in TAU(1:i) */
-
-		chemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
-			a_dim1 + 1], &c__1, &c_b55, &tau[1], &c__1)
-			;
-
-/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
-
-		q__3.r = -.5f, q__3.i = -0.f;
-		q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r *
-			taui.i + q__3.i * taui.r;
-		cdotc_(&q__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1]
-			, &c__1);
-		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r *
-			q__4.i + q__2.i * q__4.r;
-		alpha.r = q__1.r, alpha.i = q__1.i;
-		caxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
-			1], &c__1);
-
-/*
-                Apply the transformation as a rank-2 update:
-                   A := A - v * w' - w * v'
-*/
-
-		q__1.r = -1.f, q__1.i = -0.f;
-		cher2_(uplo, &i__, &q__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, &
-			tau[1], &c__1, &a[a_offset], lda);
-
-	    } else {
-		i__1 = i__ + i__ * a_dim1;
-		i__2 = i__ + i__ * a_dim1;
-		r__1 = a[i__2].r;
-		a[i__1].r = r__1, a[i__1].i = 0.f;
-	    }
-	    i__1 = i__ + (i__ + 1) * a_dim1;
-	    i__2 = i__;
-	    a[i__1].r = e[i__2], a[i__1].i = 0.f;
-	    i__1 = i__ + 1;
-	    i__2 = i__ + 1 + (i__ + 1) * a_dim1;
-	    d__[i__1] = a[i__2].r;
-	    i__1 = i__;
-	    tau[i__1].r = taui.r, tau[i__1].i = taui.i;
-/* L10: */
-	}
-	i__1 = a_dim1 + 1;
-	d__[1] = a[i__1].r;
-    } else {
-
-/*        Reduce the lower triangle of A */
-
-	i__1 = a_dim1 + 1;
-	i__2 = a_dim1 + 1;
-	r__1 = a[i__2].r;
-	a[i__1].r = r__1, a[i__1].i = 0.f;
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*
-             Generate elementary reflector H(i) = I - tau * v * v'
-             to annihilate A(i+2:n,i)
-*/
-
-	    i__2 = i__ + 1 + i__ * a_dim1;
-	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-	    i__2 = *n - i__;
-/* Computing MIN */
-	    i__3 = i__ + 2;
-	    clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &
-		    taui);
-	    i__2 = i__;
-	    e[i__2] = alpha.r;
-
-	    if ((taui.r != 0.f) || (taui.i != 0.f)) {
-
-/*              Apply H(i) from both sides to A(i+1:n,i+1:n) */
-
-		i__2 = i__ + 1 + i__ * a_dim1;
-		a[i__2].r = 1.f, a[i__2].i = 0.f;
-
-/*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */
-
-		i__2 = *n - i__;
-		chemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
-			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b55, &tau[
-			i__], &c__1);
-
-/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
-
-		q__3.r = -.5f, q__3.i = -0.f;
-		q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r *
-			taui.i + q__3.i * taui.r;
-		i__2 = *n - i__;
-		cdotc_(&q__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ *
-			a_dim1], &c__1);
-		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r *
-			q__4.i + q__2.i * q__4.r;
-		alpha.r = q__1.r, alpha.i = q__1.i;
-		i__2 = *n - i__;
-		caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
-			i__], &c__1);
-
-/*
-                Apply the transformation as a rank-2 update:
-                   A := A - v * w' - w * v'
-*/
-
-		i__2 = *n - i__;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cher2_(uplo, &i__2, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1,
-			&tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1],
-			lda);
-
-	    } else {
-		i__2 = i__ + 1 + (i__ + 1) * a_dim1;
-		i__3 = i__ + 1 + (i__ + 1) * a_dim1;
-		r__1 = a[i__3].r;
-		a[i__2].r = r__1, a[i__2].i = 0.f;
-	    }
-	    i__2 = i__ + 1 + i__ * a_dim1;
-	    i__3 = i__;
-	    a[i__2].r = e[i__3], a[i__2].i = 0.f;
-	    i__2 = i__;
-	    i__3 = i__ + i__ * a_dim1;
-	    d__[i__2] = a[i__3].r;
-	    i__2 = i__;
-	    tau[i__2].r = taui.r, tau[i__2].i = taui.i;
-/* L20: */
-	}
-	i__1 = *n;
-	i__2 = *n + *n * a_dim1;
-	d__[i__1] = a[i__2].r;
-    }
-
-    return 0;
-
-/*     End of CHETD2 */
-
-} /* chetd2_ */
-
-/* Subroutine */ int chetrd_(char *uplo, integer *n, complex *a, integer *lda,
-	 real *d__, real *e, complex *tau, complex *work, integer *lwork,
-	integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1;
-
-    /* Local variables */
-    static integer i__, j, nb, kk, nx, iws;
-    extern logical lsame_(char *, char *);
-    static integer nbmin, iinfo;
-    static logical upper;
-    extern /* Subroutine */ int chetd2_(char *, integer *, complex *, integer
-	    *, real *, real *, complex *, integer *), cher2k_(char *,
-	    char *, integer *, integer *, complex *, complex *, integer *,
-	    complex *, integer *, real *, complex *, integer *), clatrd_(char *, integer *, integer *, complex *, integer
-	    *, real *, complex *, complex *, integer *), xerbla_(char
-	    *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CHETRD reduces a complex Hermitian matrix A to real symmetric
-    tridiagonal form T by a unitary similarity transformation:
-    Q**H * A * Q = T.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
-            N-by-N upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading N-by-N lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
-            On exit, if UPLO = 'U', the diagonal and first superdiagonal
-            of A are overwritten by the corresponding elements of the
-            tridiagonal matrix T, and the elements above the first
-            superdiagonal, with the array TAU, represent the unitary
-            matrix Q as a product of elementary reflectors; if UPLO
-            = 'L', the diagonal and first subdiagonal of A are over-
-            written by the corresponding elements of the tridiagonal
-            matrix T, and the elements below the first subdiagonal, with
-            the array TAU, represent the unitary matrix Q as a product
-            of elementary reflectors. See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    D       (output) REAL array, dimension (N)
-            The diagonal elements of the tridiagonal matrix T:
-            D(i) = A(i,i).
-
-    E       (output) REAL array, dimension (N-1)
-            The off-diagonal elements of the tridiagonal matrix T:
-            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
-
-    TAU     (output) COMPLEX array, dimension (N-1)
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= 1.
-            For optimum performance LWORK >= N*NB, where NB is the
-            optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    If UPLO = 'U', the matrix Q is represented as a product of elementary
-    reflectors
-
-       Q = H(n-1) . . . H(2) H(1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
-    A(1:i-1,i+1), and tau in TAU(i).
-
-    If UPLO = 'L', the matrix Q is represented as a product of elementary
-    reflectors
-
-       Q = H(1) H(2) . . . H(n-1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
-    and tau in TAU(i).
-
-    The contents of A on exit are illustrated by the following examples
-    with n = 5:
-
-    if UPLO = 'U':                       if UPLO = 'L':
-
-      (  d   e   v2  v3  v4 )              (  d                  )
-      (      d   e   v3  v4 )              (  e   d              )
-      (          d   e   v4 )              (  v1  e   d          )
-      (              d   e  )              (  v1  v2  e   d      )
-      (                  d  )              (  v1  v2  v3  e   d  )
-
-    where d and e denote diagonal and off-diagonal elements of T, and vi
-    denotes an element of the vector defining H(i).
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --d__;
-    --e;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    lquery = *lwork == -1;
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    } else if (*lwork < 1 && ! lquery) {
-	*info = -9;
-    }
-
-    if (*info == 0) {
-
-/*        Determine the block size. */
-
-	nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
-		 (ftnlen)1);
-	lwkopt = *n * nb;
-	work[1].r = (real) lwkopt, work[1].i = 0.f;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CHETRD", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
-
-    nx = *n;
-    iws = 1;
-    if (nb > 1 && nb < *n) {
-
-/*
-          Determine when to cross over from blocked to unblocked code
-          (last block is always handled by unblocked code).
-
-   Computing MAX
-*/
-	i__1 = nb, i__2 = ilaenv_(&c__3, "CHETRD", uplo, n, &c_n1, &c_n1, &
-		c_n1, (ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < *n) {
-
-/*           Determine if workspace is large enough for blocked code. */
-
-	    ldwork = *n;
-	    iws = ldwork * nb;
-	    if (*lwork < iws) {
-
-/*
-                Not enough workspace to use optimal NB:  determine the
-                minimum value of NB, and reduce NB or force use of
-                unblocked code by setting NX = N.
-
-   Computing MAX
-*/
-		i__1 = *lwork / ldwork;
-		nb = max(i__1,1);
-		nbmin = ilaenv_(&c__2, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1,
-			 (ftnlen)6, (ftnlen)1);
-		if (nb < nbmin) {
-		    nx = *n;
-		}
-	    }
-	} else {
-	    nx = *n;
-	}
-    } else {
-	nb = 1;
-    }
-
-    if (upper) {
-
-/*
-          Reduce the upper triangle of A.
-          Columns 1:kk are handled by the unblocked method.
-*/
-
-	kk = *n - (*n - nx + nb - 1) / nb * nb;
-	i__1 = kk + 1;
-	i__2 = -nb;
-	for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
-		i__2) {
-
-/*
-             Reduce columns i:i+nb-1 to tridiagonal form and form the
-             matrix W which is needed to update the unreduced part of
-             the matrix
-*/
-
-	    i__3 = i__ + nb - 1;
-	    clatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
-		    work[1], &ldwork);
-
-/*
-             Update the unreduced submatrix A(1:i-1,1:i-1), using an
-             update of the form:  A := A - V*W' - W*V'
-*/
-
-	    i__3 = i__ - 1;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ * a_dim1
-		    + 1], lda, &work[1], &ldwork, &c_b1011, &a[a_offset], lda);
-
-/*
-             Copy superdiagonal elements back into A, and diagonal
-             elements into D
-*/
-
-	    i__3 = i__ + nb - 1;
-	    for (j = i__; j <= i__3; ++j) {
-		i__4 = j - 1 + j * a_dim1;
-		i__5 = j - 1;
-		a[i__4].r = e[i__5], a[i__4].i = 0.f;
-		i__4 = j;
-		i__5 = j + j * a_dim1;
-		d__[i__4] = a[i__5].r;
-/* L10: */
-	    }
-/* L20: */
-	}
-
-/*        Use unblocked code to reduce the last or only block */
-
-	chetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
-    } else {
-
-/*        Reduce the lower triangle of A */
-
-	i__2 = *n - nx;
-	i__1 = nb;
-	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-
-/*
-             Reduce columns i:i+nb-1 to tridiagonal form and form the
-             matrix W which is needed to update the unreduced part of
-             the matrix
-*/
-
-	    i__3 = *n - i__ + 1;
-	    clatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
-		    tau[i__], &work[1], &ldwork);
-
-/*
-             Update the unreduced submatrix A(i+nb:n,i+nb:n), using
-             an update of the form:  A := A - V*W' - W*V'
-*/
-
-	    i__3 = *n - i__ - nb + 1;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ + nb +
-		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b1011, &a[
-		    i__ + nb + (i__ + nb) * a_dim1], lda);
-
-/*
-             Copy subdiagonal elements back into A, and diagonal
-             elements into D
-*/
-
-	    i__3 = i__ + nb - 1;
-	    for (j = i__; j <= i__3; ++j) {
-		i__4 = j + 1 + j * a_dim1;
-		i__5 = j;
-		a[i__4].r = e[i__5], a[i__4].i = 0.f;
-		i__4 = j;
-		i__5 = j + j * a_dim1;
-		d__[i__4] = a[i__5].r;
-/* L30: */
-	    }
-/* L40: */
-	}
-
-/*        Use unblocked code to reduce the last or only block */
-
-	i__1 = *n - i__ + 1;
-	chetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__],
-		&tau[i__], &iinfo);
-    }
-
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    return 0;
-
-/*     End of CHETRD */
-
-} /* chetrd_ */
-
-/* Subroutine */ int chseqr_(char *job, char *compz, integer *n, integer *ilo,
-	 integer *ihi, complex *h__, integer *ldh, complex *w, complex *z__,
-	integer *ldz, complex *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4[2],
-	    i__5, i__6;
-    real r__1, r__2, r__3, r__4;
-    complex q__1;
-    char ch__1[2];
-
-    /* Builtin functions */
-    double r_imag(complex *);
-    void r_cnjg(complex *, complex *);
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer i__, j, k, l;
-    static complex s[225]	/* was [15][15] */, v[16];
-    static integer i1, i2, ii, nh, nr, ns, nv;
-    static complex vv[16];
-    static integer itn;
-    static complex tau;
-    static integer its;
-    static real ulp, tst1;
-    static integer maxb, ierr;
-    static real unfl;
-    static complex temp;
-    static real ovfl;
-    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
-	    , complex *, integer *, complex *, integer *, complex *, complex *
-	    , integer *), ccopy_(integer *, complex *, integer *,
-	    complex *, integer *);
-    static integer itemp;
-    static real rtemp;
-    static logical initz, wantt, wantz;
-    static real rwork[1];
-    extern doublereal slapy2_(real *, real *);
-    extern /* Subroutine */ int slabad_(real *, real *), clarfg_(integer *,
-	    complex *, complex *, integer *, complex *);
-    extern integer icamax_(integer *, complex *, integer *);
-    extern doublereal slamch_(char *), clanhs_(char *, integer *,
-	    complex *, integer *, real *);
-    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
-	    *), clahqr_(logical *, logical *, integer *, integer *, integer *,
-	     complex *, integer *, complex *, integer *, integer *, complex *,
-	     integer *, integer *), clacpy_(char *, integer *, integer *,
-	    complex *, integer *, complex *, integer *), claset_(char
-	    *, integer *, integer *, complex *, complex *, complex *, integer
-	    *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int clarfx_(char *, integer *, integer *, complex
-	    *, complex *, complex *, integer *, complex *);
-    static real smlnum;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CHSEQR computes the eigenvalues of a complex upper Hessenberg
-    matrix H, and, optionally, the matrices T and Z from the Schur
-    decomposition H = Z T Z**H, where T is an upper triangular matrix
-    (the Schur form), and Z is the unitary matrix of Schur vectors.
-
-    Optionally Z may be postmultiplied into an input unitary matrix Q,
-    so that this routine can give the Schur factorization of a matrix A
-    which has been reduced to the Hessenberg form H by the unitary
-    matrix Q:  A = Q*H*Q**H = (QZ)*T*(QZ)**H.
-
-    Arguments
-    =========
-
-    JOB     (input) CHARACTER*1
-            = 'E': compute eigenvalues only;
-            = 'S': compute eigenvalues and the Schur form T.
-
-    COMPZ   (input) CHARACTER*1
-            = 'N': no Schur vectors are computed;
-            = 'I': Z is initialized to the unit matrix and the matrix Z
-                   of Schur vectors of H is returned;
-            = 'V': Z must contain an unitary matrix Q on entry, and
-                   the product Q*Z is returned.
-
-    N       (input) INTEGER
-            The order of the matrix H.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that H is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to CGEBAL, and then passed to CGEHRD
-            when the matrix output by CGEBAL is reduced to Hessenberg
-            form. Otherwise ILO and IHI should be set to 1 and N
-            respectively.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
-
-    H       (input/output) COMPLEX array, dimension (LDH,N)
-            On entry, the upper Hessenberg matrix H.
-            On exit, if JOB = 'S', H contains the upper triangular matrix
-            T from the Schur decomposition (the Schur form). If
-            JOB = 'E', the contents of H are unspecified on exit.
-
-    LDH     (input) INTEGER
-            The leading dimension of the array H. LDH >= max(1,N).
-
-    W       (output) COMPLEX array, dimension (N)
-            The computed eigenvalues. If JOB = 'S', the eigenvalues are
-            stored in the same order as on the diagonal of the Schur form
-            returned in H, with W(i) = H(i,i).
-
-    Z       (input/output) COMPLEX array, dimension (LDZ,N)
-            If COMPZ = 'N': Z is not referenced.
-            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
-            contains the unitary matrix Z of the Schur vectors of H.
-            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
-            which is assumed to be equal to the unit matrix except for
-            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
-            Normally Q is the unitary matrix generated by CUNGHR after
-            the call to CGEHRD which formed the Hessenberg matrix H.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.
-            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,N).
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, CHSEQR failed to compute all the
-                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
-                  elements 1:ilo-1 and i+1:n of W contain those
-                  eigenvalues which have been successfully computed.
-
-    =====================================================================
-
-
-       Decode and test the input parameters
-*/
-
-    /* Parameter adjustments */
-    h_dim1 = *ldh;
-    h_offset = 1 + h_dim1;
-    h__ -= h_offset;
-    --w;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --work;
-
-    /* Function Body */
-    wantt = lsame_(job, "S");
-    initz = lsame_(compz, "I");
-    wantz = (initz) || (lsame_(compz, "V"));
-
-    *info = 0;
-    i__1 = max(1,*n);
-    work[1].r = (real) i__1, work[1].i = 0.f;
-    lquery = *lwork == -1;
-    if (! lsame_(job, "E") && ! wantt) {
-	*info = -1;
-    } else if (! lsame_(compz, "N") && ! wantz) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
-	*info = -4;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
-	*info = -5;
-    } else if (*ldh < max(1,*n)) {
-	*info = -7;
-    } else if ((*ldz < 1) || (wantz && *ldz < max(1,*n))) {
-	*info = -10;
-    } else if (*lwork < max(1,*n) && ! lquery) {
-	*info = -12;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CHSEQR", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Initialize Z, if necessary */
-
-    if (initz) {
-	claset_("Full", n, n, &c_b55, &c_b56, &z__[z_offset], ldz);
-    }
-
-/*     Store the eigenvalues isolated by CGEBAL. */
-
-    i__1 = *ilo - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	i__3 = i__ + i__ * h_dim1;
-	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
-/* L10: */
-    }
-    i__1 = *n;
-    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	i__3 = i__ + i__ * h_dim1;
-	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
-/* L20: */
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-	return 0;
-    }
-    if (*ilo == *ihi) {
-	i__1 = *ilo;
-	i__2 = *ilo + *ilo * h_dim1;
-	w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
-	return 0;
-    }
-
-/*
-       Set rows and columns ILO to IHI to zero below the first
-       subdiagonal.
-*/
-
-    i__1 = *ihi - 2;
-    for (j = *ilo; j <= i__1; ++j) {
-	i__2 = *n;
-	for (i__ = j + 2; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * h_dim1;
-	    h__[i__3].r = 0.f, h__[i__3].i = 0.f;
-/* L30: */
-	}
-/* L40: */
-    }
-    nh = *ihi - *ilo + 1;
-
-/*
-       I1 and I2 are the indices of the first row and last column of H
-       to which transformations must be applied. If eigenvalues only are
-       being computed, I1 and I2 are re-set inside the main loop.
-*/
-
-    if (wantt) {
-	i1 = 1;
-	i2 = *n;
-    } else {
-	i1 = *ilo;
-	i2 = *ihi;
-    }
-
-/*     Ensure that the subdiagonal elements are real. */
-
-    i__1 = *ihi;
-    for (i__ = *ilo + 1; i__ <= i__1; ++i__) {
-	i__2 = i__ + (i__ - 1) * h_dim1;
-	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
-	if (r_imag(&temp) != 0.f) {
-	    r__1 = temp.r;
-	    r__2 = r_imag(&temp);
-	    rtemp = slapy2_(&r__1, &r__2);
-	    i__2 = i__ + (i__ - 1) * h_dim1;
-	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
-	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
-	    temp.r = q__1.r, temp.i = q__1.i;
-	    if (i2 > i__) {
-		i__2 = i2 - i__;
-		r_cnjg(&q__1, &temp);
-		cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
-	    }
-	    i__2 = i__ - i1;
-	    cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
-	    if (i__ < *ihi) {
-		i__2 = i__ + 1 + i__ * h_dim1;
-		i__3 = i__ + 1 + i__ * h_dim1;
-		q__1.r = temp.r * h__[i__3].r - temp.i * h__[i__3].i, q__1.i =
-			 temp.r * h__[i__3].i + temp.i * h__[i__3].r;
-		h__[i__2].r = q__1.r, h__[i__2].i = q__1.i;
-	    }
-	    if (wantz) {
-		cscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1);
-	    }
-	}
-/* L50: */
-    }
-
-/*
-       Determine the order of the multi-shift QR algorithm to be used.
-
-   Writing concatenation
-*/
-    i__4[0] = 1, a__1[0] = job;
-    i__4[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
-    ns = ilaenv_(&c__4, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-/* Writing concatenation */
-    i__4[0] = 1, a__1[0] = job;
-    i__4[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
-    maxb = ilaenv_(&c__8, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-    if (((ns <= 1) || (ns > nh)) || (maxb >= nh)) {
-
-/*        Use the standard double-shift algorithm */
-
-	clahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo,
-		ihi, &z__[z_offset], ldz, info);
-	return 0;
-    }
-    maxb = max(2,maxb);
-/* Computing MIN */
-    i__1 = min(ns,maxb);
-    ns = min(i__1,15);
-
-/*
-       Now 1 < NS <= MAXB < NH.
-
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
-
-    unfl = slamch_("Safe minimum");
-    ovfl = 1.f / unfl;
-    slabad_(&unfl, &ovfl);
-    ulp = slamch_("Precision");
-    smlnum = unfl * (nh / ulp);
-
-/*     ITN is the total number of multiple-shift QR iterations allowed. */
-
-    itn = nh * 30;
-
-/*
-       The main loop begins here. I is the loop index and decreases from
-       IHI to ILO in steps of at most MAXB. Each iteration of the loop
-       works with the active submatrix in rows and columns L to I.
-       Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
-       H(L,L-1) is negligible so that the matrix splits.
-*/
-
-    i__ = *ihi;
-L60:
-    if (i__ < *ilo) {
-	goto L180;
-    }
-
-/*
-       Perform multiple-shift QR iterations on rows and columns ILO to I
-       until a submatrix of order at most MAXB splits off at the bottom
-       because a subdiagonal element has become negligible.
-*/
-
-    l = *ilo;
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
-
-/*        Look for a single small subdiagonal element. */
-
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    i__3 = k - 1 + (k - 1) * h_dim1;
-	    i__5 = k + k * h_dim1;
-	    tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[k -
-		    1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__5]
-		    .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]),
-		    dabs(r__4)));
-	    if (tst1 == 0.f) {
-		i__3 = i__ - l + 1;
-		tst1 = clanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork);
-	    }
-	    i__3 = k + (k - 1) * h_dim1;
-/* Computing MAX */
-	    r__2 = ulp * tst1;
-	    if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) {
-		goto L80;
-	    }
-/* L70: */
-	}
-L80:
-	l = k;
-	if (l > *ilo) {
-
-/*           H(L,L-1) is negligible. */
-
-	    i__2 = l + (l - 1) * h_dim1;
-	    h__[i__2].r = 0.f, h__[i__2].i = 0.f;
-	}
-
-/*        Exit from loop if a submatrix of order <= MAXB has split off. */
-
-	if (l >= i__ - maxb + 1) {
-	    goto L170;
-	}
-
-/*
-          Now the active submatrix is in rows and columns L to I. If
-          eigenvalues only are being computed, only the active submatrix
-          need be transformed.
-*/
-
-	if (! wantt) {
-	    i1 = l;
-	    i2 = i__;
-	}
-
-	if ((its == 20) || (its == 30)) {
-
-/*           Exceptional shifts. */
-
-	    i__2 = i__;
-	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
-		i__3 = ii;
-		i__5 = ii + (ii - 1) * h_dim1;
-		i__6 = ii + ii * h_dim1;
-		r__3 = ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = h__[i__6]
-			.r, dabs(r__2))) * 1.5f;
-		w[i__3].r = r__3, w[i__3].i = 0.f;
-/* L90: */
-	    }
-	} else {
-
-/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
-
-	    clacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
-		    h_dim1], ldh, s, &c__15);
-	    clahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &w[i__ -
-		    ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr);
-	    if (ierr > 0) {
-
-/*
-                If CLAHQR failed to compute all NS eigenvalues, use the
-                unconverged diagonal elements as the remaining shifts.
-*/
-
-		i__2 = ierr;
-		for (ii = 1; ii <= i__2; ++ii) {
-		    i__3 = i__ - ns + ii;
-		    i__5 = ii + ii * 15 - 16;
-		    w[i__3].r = s[i__5].r, w[i__3].i = s[i__5].i;
-/* L100: */
-		}
-	    }
-	}
-
-/*
-          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
-          where G is the Hessenberg submatrix H(L:I,L:I) and w is
-          the vector of shifts (stored in W). The result is
-          stored in the local array V.
-*/
-
-	v[0].r = 1.f, v[0].i = 0.f;
-	i__2 = ns + 1;
-	for (ii = 2; ii <= i__2; ++ii) {
-	    i__3 = ii - 1;
-	    v[i__3].r = 0.f, v[i__3].i = 0.f;
-/* L110: */
-	}
-	nv = 1;
-	i__2 = i__;
-	for (j = i__ - ns + 1; j <= i__2; ++j) {
-	    i__3 = nv + 1;
-	    ccopy_(&i__3, v, &c__1, vv, &c__1);
-	    i__3 = nv + 1;
-	    i__5 = j;
-	    q__1.r = -w[i__5].r, q__1.i = -w[i__5].i;
-	    cgemv_("No transpose", &i__3, &nv, &c_b56, &h__[l + l * h_dim1],
-		    ldh, vv, &c__1, &q__1, v, &c__1);
-	    ++nv;
-
-/*
-             Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
-             reset it to the unit vector.
-*/
-
-	    itemp = icamax_(&nv, v, &c__1);
-	    i__3 = itemp - 1;
-	    rtemp = (r__1 = v[i__3].r, dabs(r__1)) + (r__2 = r_imag(&v[itemp
-		    - 1]), dabs(r__2));
-	    if (rtemp == 0.f) {
-		v[0].r = 1.f, v[0].i = 0.f;
-		i__3 = nv;
-		for (ii = 2; ii <= i__3; ++ii) {
-		    i__5 = ii - 1;
-		    v[i__5].r = 0.f, v[i__5].i = 0.f;
-/* L120: */
-		}
-	    } else {
-		rtemp = dmax(rtemp,smlnum);
-		r__1 = 1.f / rtemp;
-		csscal_(&nv, &r__1, v, &c__1);
-	    }
-/* L130: */
-	}
-
-/*        Multiple-shift QR step */
-
-	i__2 = i__ - 1;
-	for (k = l; k <= i__2; ++k) {
-
-/*
-             The first iteration of this loop determines a reflection G
-             from the vector V and applies it from left and right to H,
-             thus creating a nonzero bulge below the subdiagonal.
-
-             Each subsequent iteration determines a reflection G to
-             restore the Hessenberg form in the (K-1)th column, and thus
-             chases the bulge one step toward the bottom of the active
-             submatrix. NR is the order of G.
-
-   Computing MIN
-*/
-	    i__3 = ns + 1, i__5 = i__ - k + 1;
-	    nr = min(i__3,i__5);
-	    if (k > l) {
-		ccopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
-	    }
-	    clarfg_(&nr, v, &v[1], &c__1, &tau);
-	    if (k > l) {
-		i__3 = k + (k - 1) * h_dim1;
-		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
-		i__3 = i__;
-		for (ii = k + 1; ii <= i__3; ++ii) {
-		    i__5 = ii + (k - 1) * h_dim1;
-		    h__[i__5].r = 0.f, h__[i__5].i = 0.f;
-/* L140: */
-		}
-	    }
-	    v[0].r = 1.f, v[0].i = 0.f;
-
-/*
-             Apply G' from the left to transform the rows of the matrix
-             in columns K to I2.
-*/
-
-	    i__3 = i2 - k + 1;
-	    r_cnjg(&q__1, &tau);
-	    clarfx_("Left", &nr, &i__3, v, &q__1, &h__[k + k * h_dim1], ldh, &
-		    work[1]);
-
-/*
-             Apply G from the right to transform the columns of the
-             matrix in rows I1 to min(K+NR,I).
-
-   Computing MIN
-*/
-	    i__5 = k + nr;
-	    i__3 = min(i__5,i__) - i1 + 1;
-	    clarfx_("Right", &i__3, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
-		    &work[1]);
-
-	    if (wantz) {
-
-/*              Accumulate transformations in the matrix Z */
-
-		clarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
-			ldz, &work[1]);
-	    }
-/* L150: */
-	}
-
-/*        Ensure that H(I,I-1) is real. */
-
-	i__2 = i__ + (i__ - 1) * h_dim1;
-	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
-	if (r_imag(&temp) != 0.f) {
-	    r__1 = temp.r;
-	    r__2 = r_imag(&temp);
-	    rtemp = slapy2_(&r__1, &r__2);
-	    i__2 = i__ + (i__ - 1) * h_dim1;
-	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
-	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
-	    temp.r = q__1.r, temp.i = q__1.i;
-	    if (i2 > i__) {
-		i__2 = i2 - i__;
-		r_cnjg(&q__1, &temp);
-		cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
-	    }
-	    i__2 = i__ - i1;
-	    cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
-	    if (wantz) {
-		cscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1);
-	    }
-	}
-
-/* L160: */
-    }
-
-/*     Failure to converge in remaining number of iterations */
-
-    *info = i__;
-    return 0;
-
-L170:
-
-/*
-       A submatrix of order <= MAXB in rows and columns L to I has split
-       off. Use the double-shift QR algorithm to handle it.
-*/
-
-    clahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &w[1], ilo, ihi,
-	     &z__[z_offset], ldz, info);
-    if (*info > 0) {
-	return 0;
-    }
-
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with a new value of I.
-*/
-
-    itn -= its;
-    i__ = l - 1;
-    goto L60;
-
-L180:
-    i__1 = max(1,*n);
-    work[1].r = (real) i__1, work[1].i = 0.f;
-    return 0;
-
-/*     End of CHSEQR */
-
-} /* chseqr_ */
-
-/* Subroutine */ int clabrd_(integer *m, integer *n, integer *nb, complex *a,
-	integer *lda, real *d__, real *e, complex *tauq, complex *taup,
-	complex *x, integer *ldx, complex *y, integer *ldy)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2,
-	    i__3;
-    complex q__1;
-
-    /* Local variables */
-    static integer i__;
-    static complex alpha;
-    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *), cgemv_(char *, integer *, integer *, complex *,
-	    complex *, integer *, complex *, integer *, complex *, complex *,
-	    integer *), clarfg_(integer *, complex *, complex *,
-	    integer *, complex *), clacgv_(integer *, complex *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CLABRD reduces the first NB rows and columns of a complex general
-    m by n matrix A to upper or lower real bidiagonal form by a unitary
-    transformation Q' * A * P, and returns the matrices X and Y which
-    are needed to apply the transformation to the unreduced part of A.
-
-    If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
-    bidiagonal form.
-
-    This is an auxiliary routine called by CGEBRD
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows in the matrix A.
-
-    N       (input) INTEGER
-            The number of columns in the matrix A.
-
-    NB      (input) INTEGER
-            The number of leading rows and columns of A to be reduced.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the m by n general matrix to be reduced.
-            On exit, the first NB rows and columns of the matrix are
-            overwritten; the rest of the array is unchanged.
-            If m >= n, elements on and below the diagonal in the first NB
-              columns, with the array TAUQ, represent the unitary
-              matrix Q as a product of elementary reflectors; and
-              elements above the diagonal in the first NB rows, with the
-              array TAUP, represent the unitary matrix P as a product
-              of elementary reflectors.
-            If m < n, elements below the diagonal in the first NB
-              columns, with the array TAUQ, represent the unitary
-              matrix Q as a product of elementary reflectors, and
-              elements on and above the diagonal in the first NB rows,
-              with the array TAUP, represent the unitary matrix P as
-              a product of elementary reflectors.
-            See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    D       (output) REAL array, dimension (NB)
-            The diagonal elements of the first NB rows and columns of
-            the reduced matrix.  D(i) = A(i,i).
-
-    E       (output) REAL array, dimension (NB)
-            The off-diagonal elements of the first NB rows and columns of
-            the reduced matrix.
-
-    TAUQ    (output) COMPLEX array dimension (NB)
-            The scalar factors of the elementary reflectors which
-            represent the unitary matrix Q. See Further Details.
-
-    TAUP    (output) COMPLEX array, dimension (NB)
-            The scalar factors of the elementary reflectors which
-            represent the unitary matrix P. See Further Details.
-
-    X       (output) COMPLEX array, dimension (LDX,NB)
-            The m-by-nb matrix X required to update the unreduced part
-            of A.
-
-    LDX     (input) INTEGER
-            The leading dimension of the array X. LDX >= max(1,M).
-
-    Y       (output) COMPLEX array, dimension (LDY,NB)
-            The n-by-nb matrix Y required to update the unreduced part
-            of A.
-
-    LDY     (output) INTEGER
-            The leading dimension of the array Y. LDY >= max(1,N).
-
-    Further Details
-    ===============
-
-    The matrices Q and P are represented as products of elementary
-    reflectors:
-
-       Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
-
-    Each H(i) and G(i) has the form:
-
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
-
-    where tauq and taup are complex scalars, and v and u are complex
-    vectors.
-
-    If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
-    A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
-    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
-
-    If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
-    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
-    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
-
-    The elements of the vectors v and u together form the m-by-nb matrix
-    V and the nb-by-n matrix U' which are needed, with X and Y, to apply
-    the transformation to the unreduced part of the matrix, using a block
-    update of the form:  A := A - V*Y' - X*U'.
-
-    The contents of A on exit are illustrated by the following examples
-    with nb = 2:
-
-    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
-
-      (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
-      (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
-      (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
-      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
-      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
-      (  v1  v2  a   a   a  )
-
-    where a denotes an element of the original matrix which is unchanged,
-    vi denotes an element of the vector defining H(i), and ui an element
-    of the vector defining G(i).
-
-    =====================================================================
-
-
-       Quick return if possible
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --d__;
-    --e;
-    --tauq;
-    --taup;
-    x_dim1 = *ldx;
-    x_offset = 1 + x_dim1;
-    x -= x_offset;
-    y_dim1 = *ldy;
-    y_offset = 1 + y_dim1;
-    y -= y_offset;
-
-    /* Function Body */
-    if ((*m <= 0) || (*n <= 0)) {
-	return 0;
-    }
-
-    if (*m >= *n) {
-
-/*        Reduce to upper bidiagonal form */
-
-	i__1 = *nb;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*           Update A(i:m,i) */
-
-	    i__2 = i__ - 1;
-	    clacgv_(&i__2, &y[i__ + y_dim1], ldy);
-	    i__2 = *m - i__ + 1;
-	    i__3 = i__ - 1;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda,
-		     &y[i__ + y_dim1], ldy, &c_b56, &a[i__ + i__ * a_dim1], &
-		    c__1);
-	    i__2 = i__ - 1;
-	    clacgv_(&i__2, &y[i__ + y_dim1], ldy);
-	    i__2 = *m - i__ + 1;
-	    i__3 = i__ - 1;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + x_dim1], ldx,
-		     &a[i__ * a_dim1 + 1], &c__1, &c_b56, &a[i__ + i__ *
-		    a_dim1], &c__1);
-
-/*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
-
-	    i__2 = i__ + i__ * a_dim1;
-	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-	    i__2 = *m - i__ + 1;
-/* Computing MIN */
-	    i__3 = i__ + 1;
-	    clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, &
-		    tauq[i__]);
-	    i__2 = i__;
-	    d__[i__2] = alpha.r;
-	    if (i__ < *n) {
-		i__2 = i__ + i__ * a_dim1;
-		a[i__2].r = 1.f, a[i__2].i = 0.f;
-
-/*              Compute Y(i+1:n,i) */
-
-		i__2 = *m - i__ + 1;
-		i__3 = *n - i__;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + (
-			i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], &
-			c__1, &c_b55, &y[i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *m - i__ + 1;
-		i__3 = i__ - 1;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
-			a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b55, &
-			y[i__ * y_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 +
-			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b56, &y[
-			i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *m - i__ + 1;
-		i__3 = i__ - 1;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &x[i__ +
-			x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b55, &
-			y[i__ * y_dim1 + 1], &c__1);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ +
-			1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
-			c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *n - i__;
-		cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
-
-/*              Update A(i,i+1:n) */
-
-		i__2 = *n - i__;
-		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
-		clacgv_(&i__, &a[i__ + a_dim1], lda);
-		i__2 = *n - i__;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__2, &i__, &q__1, &y[i__ + 1 +
-			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b56, &a[i__ +
-			(i__ + 1) * a_dim1], lda);
-		clacgv_(&i__, &a[i__ + a_dim1], lda);
-		i__2 = i__ - 1;
-		clacgv_(&i__2, &x[i__ + x_dim1], ldx);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ +
-			1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b56,
-			&a[i__ + (i__ + 1) * a_dim1], lda);
-		i__2 = i__ - 1;
-		clacgv_(&i__2, &x[i__ + x_dim1], ldx);
-
-/*              Generate reflection P(i) to annihilate A(i,i+2:n) */
-
-		i__2 = i__ + (i__ + 1) * a_dim1;
-		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-		i__2 = *n - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
-			taup[i__]);
-		i__2 = i__;
-		e[i__2] = alpha.r;
-		i__2 = i__ + (i__ + 1) * a_dim1;
-		a[i__2].r = 1.f, a[i__2].i = 0.f;
-
-/*              Compute X(i+1:m,i) */
-
-		i__2 = *m - i__;
-		i__3 = *n - i__;
-		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ + 1 + (
-			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
-			 lda, &c_b55, &x[i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *n - i__;
-		cgemv_("Conjugate transpose", &i__2, &i__, &c_b56, &y[i__ + 1
-			+ y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b55, &x[i__ * x_dim1 + 1], &c__1);
-		i__2 = *m - i__;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__2, &i__, &q__1, &a[i__ + 1 +
-			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
-			i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b55, &x[i__ * x_dim1 + 1], &c__1);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 +
-			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
-			i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *m - i__;
-		cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *n - i__;
-		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
-	    }
-/* L10: */
-	}
-    } else {
-
-/*        Reduce to lower bidiagonal form */
-
-	i__1 = *nb;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*           Update A(i,i:n) */
-
-	    i__2 = *n - i__ + 1;
-	    clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
-	    i__2 = i__ - 1;
-	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
-	    i__2 = *n - i__ + 1;
-	    i__3 = i__ - 1;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + y_dim1], ldy,
-		     &a[i__ + a_dim1], lda, &c_b56, &a[i__ + i__ * a_dim1],
-		    lda);
-	    i__2 = i__ - 1;
-	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
-	    i__2 = i__ - 1;
-	    clacgv_(&i__2, &x[i__ + x_dim1], ldx);
-	    i__2 = i__ - 1;
-	    i__3 = *n - i__ + 1;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[i__ *
-		    a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b56, &a[i__ +
-		    i__ * a_dim1], lda);
-	    i__2 = i__ - 1;
-	    clacgv_(&i__2, &x[i__ + x_dim1], ldx);
-
-/*           Generate reflection P(i) to annihilate A(i,i+1:n) */
-
-	    i__2 = i__ + i__ * a_dim1;
-	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-	    i__2 = *n - i__ + 1;
-/* Computing MIN */
-	    i__3 = i__ + 1;
-	    clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
-		    taup[i__]);
-	    i__2 = i__;
-	    d__[i__2] = alpha.r;
-	    if (i__ < *m) {
-		i__2 = i__ + i__ * a_dim1;
-		a[i__2].r = 1.f, a[i__2].i = 0.f;
-
-/*              Compute X(i+1:m,i) */
-
-		i__2 = *m - i__;
-		i__3 = *n - i__ + 1;
-		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ + 1 + i__
-			* a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b55, &
-			x[i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *n - i__ + 1;
-		i__3 = i__ - 1;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &y[i__ +
-			y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b55, &x[
-			i__ * x_dim1 + 1], &c__1);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 +
-			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
-			i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = i__ - 1;
-		i__3 = *n - i__ + 1;
-		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ * a_dim1
-			+ 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b55, &x[
-			i__ * x_dim1 + 1], &c__1);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 +
-			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
-			i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *m - i__;
-		cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *n - i__ + 1;
-		clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
-
-/*              Update A(i+1:m,i) */
-
-		i__2 = i__ - 1;
-		clacgv_(&i__2, &y[i__ + y_dim1], ldy);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 +
-			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b56, &a[i__ +
-			1 + i__ * a_dim1], &c__1);
-		i__2 = i__ - 1;
-		clacgv_(&i__2, &y[i__ + y_dim1], ldy);
-		i__2 = *m - i__;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__2, &i__, &q__1, &x[i__ + 1 +
-			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b56, &a[
-			i__ + 1 + i__ * a_dim1], &c__1);
-
-/*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
-
-		i__2 = i__ + 1 + i__ * a_dim1;
-		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-		i__2 = *m - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1,
-			 &tauq[i__]);
-		i__2 = i__;
-		e[i__2] = alpha.r;
-		i__2 = i__ + 1 + i__ * a_dim1;
-		a[i__2].r = 1.f, a[i__2].i = 0.f;
-
-/*              Compute Y(i+1:n,i) */
-
-		i__2 = *m - i__;
-		i__3 = *n - i__;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
-			1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ *
-			a_dim1], &c__1, &c_b55, &y[i__ + 1 + i__ * y_dim1], &
-			c__1);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
-			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b55, &y[i__ * y_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 +
-			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b56, &y[
-			i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *m - i__;
-		cgemv_("Conjugate transpose", &i__2, &i__, &c_b56, &x[i__ + 1
-			+ x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b55, &y[i__ * y_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("Conjugate transpose", &i__, &i__2, &q__1, &a[(i__ + 1)
-			 * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
-			c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *n - i__;
-		cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
-	    } else {
-		i__2 = *n - i__ + 1;
-		clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
-	    }
-/* L20: */
-	}
-    }
-    return 0;
-
-/*     End of CLABRD */
-
-} /* clabrd_ */
-
-/* Subroutine */ int clacgv_(integer *n, complex *x, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    complex q__1;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, ioff;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    CLACGV conjugates a complex vector of length N.
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The length of the vector X.  N >= 0.
-
-    X       (input/output) COMPLEX array, dimension
-                           (1+(N-1)*abs(INCX))
-            On entry, the vector of length N to be conjugated.
-            On exit, X is overwritten with conjg(X).
-
-    INCX    (input) INTEGER
-            The spacing between successive elements of X.
-
-   =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if (*incx == 1) {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = i__;
-	    r_cnjg(&q__1, &x[i__]);
-	    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-/* L10: */
-	}
-    } else {
-	ioff = 1;
-	if (*incx < 0) {
-	    ioff = 1 - (*n - 1) * *incx;
-	}
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = ioff;
-	    r_cnjg(&q__1, &x[ioff]);
-	    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-	    ioff += *incx;
-/* L20: */
-	}
-    }
-    return 0;
-
-/*     End of CLACGV */
-
-} /* clacgv_ */
-
-/* Subroutine */ int clacp2_(char *uplo, integer *m, integer *n, real *a,
-	integer *lda, complex *b, integer *ldb)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j;
-    extern logical lsame_(char *, char *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CLACP2 copies all or part of a real two-dimensional matrix A to a
-    complex matrix B.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies the part of the matrix A to be copied to B.
-            = 'U':      Upper triangular part
-            = 'L':      Lower triangular part
-            Otherwise:  All of the matrix A
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input) REAL array, dimension (LDA,N)
-            The m by n matrix A.  If UPLO = 'U', only the upper trapezium
-            is accessed; if UPLO = 'L', only the lower trapezium is
-            accessed.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    B       (output) COMPLEX array, dimension (LDB,N)
-            On exit, B = A in the locations specified by UPLO.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,M).
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    if (lsame_(uplo, "U")) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = min(j,*m);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * b_dim1;
-		i__4 = i__ + j * a_dim1;
-		b[i__3].r = a[i__4], b[i__3].i = 0.f;
-/* L10: */
-	    }
-/* L20: */
-	}
-
-    } else if (lsame_(uplo, "L")) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = j; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * b_dim1;
-		i__4 = i__ + j * a_dim1;
-		b[i__3].r = a[i__4], b[i__3].i = 0.f;
-/* L30: */
-	    }
-/* L40: */
-	}
-
-    } else {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * b_dim1;
-		i__4 = i__ + j * a_dim1;
-		b[i__3].r = a[i__4], b[i__3].i = 0.f;
-/* L50: */
-	    }
-/* L60: */
-	}
-    }
-
-    return 0;
-
-/*     End of CLACP2 */
-
-} /* clacp2_ */
-
-/* Subroutine */ int clacpy_(char *uplo, integer *m, integer *n, complex *a,
-	integer *lda, complex *b, integer *ldb)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j;
-    extern logical lsame_(char *, char *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    CLACPY copies all or part of a two-dimensional matrix A to another
-    matrix B.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies the part of the matrix A to be copied to B.
-            = 'U':      Upper triangular part
-            = 'L':      Lower triangular part
-            Otherwise:  All of the matrix A
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input) COMPLEX array, dimension (LDA,N)
-            The m by n matrix A.  If UPLO = 'U', only the upper trapezium
-            is accessed; if UPLO = 'L', only the lower trapezium is
-            accessed.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    B       (output) COMPLEX array, dimension (LDB,N)
-            On exit, B = A in the locations specified by UPLO.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,M).
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    if (lsame_(uplo, "U")) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = min(j,*m);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * b_dim1;
-		i__4 = i__ + j * a_dim1;
-		b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i;
-/* L10: */
-	    }
-/* L20: */
-	}
-
-    } else if (lsame_(uplo, "L")) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = j; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * b_dim1;
-		i__4 = i__ + j * a_dim1;
-		b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i;
-/* L30: */
-	    }
-/* L40: */
-	}
-
-    } else {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * b_dim1;
-		i__4 = i__ + j * a_dim1;
-		b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i;
-/* L50: */
-	    }
-/* L60: */
-	}
-    }
-
-    return 0;
-
-/*     End of CLACPY */
-
-} /* clacpy_ */
-
-/* Subroutine */ int clacrm_(integer *m, integer *n, complex *a, integer *lda,
-	 real *b, integer *ldb, complex *c__, integer *ldc, real *rwork)
-{
-    /* System generated locals */
-    integer b_dim1, b_offset, a_dim1, a_offset, c_dim1, c_offset, i__1, i__2,
-	    i__3, i__4, i__5;
-    real r__1;
-    complex q__1;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-
-    /* Local variables */
-    static integer i__, j, l;
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CLACRM performs a very simple matrix-matrix multiplication:
-             C := A * B,
-    where A is M by N and complex; B is N by N and real;
-    C is M by N and complex.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A and of the matrix C.
-            M >= 0.
-
-    N       (input) INTEGER
-            The number of columns and rows of the matrix B and
-            the number of columns of the matrix C.
-            N >= 0.
-
-    A       (input) COMPLEX array, dimension (LDA, N)
-            A contains the M by N matrix A.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A. LDA >=max(1,M).
-
-    B       (input) REAL array, dimension (LDB, N)
-            B contains the N by N matrix B.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B. LDB >=max(1,N).
-
-    C       (input) COMPLEX array, dimension (LDC, N)
-            C contains the M by N matrix C.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >=max(1,N).
-
-    RWORK   (workspace) REAL array, dimension (2*M*N)
-
-    =====================================================================
-
-
-       Quick return if possible.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --rwork;
-
-    /* Function Body */
-    if ((*m == 0) || (*n == 0)) {
-	return 0;
-    }
-
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *m;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * a_dim1;
-	    rwork[(j - 1) * *m + i__] = a[i__3].r;
-/* L10: */
-	}
-/* L20: */
-    }
-
-    l = *m * *n + 1;
-    sgemm_("N", "N", m, n, n, &c_b1011, &rwork[1], m, &b[b_offset], ldb, &
-	    c_b320, &rwork[l], m);
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *m;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * c_dim1;
-	    i__4 = l + (j - 1) * *m + i__ - 1;
-	    c__[i__3].r = rwork[i__4], c__[i__3].i = 0.f;
-/* L30: */
-	}
-/* L40: */
-    }
-
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *m;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    rwork[(j - 1) * *m + i__] = r_imag(&a[i__ + j * a_dim1]);
-/* L50: */
-	}
-/* L60: */
-    }
-    sgemm_("N", "N", m, n, n, &c_b1011, &rwork[1], m, &b[b_offset], ldb, &
-	    c_b320, &rwork[l], m);
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *m;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * c_dim1;
-	    i__4 = i__ + j * c_dim1;
-	    r__1 = c__[i__4].r;
-	    i__5 = l + (j - 1) * *m + i__ - 1;
-	    q__1.r = r__1, q__1.i = rwork[i__5];
-	    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L70: */
-	}
-/* L80: */
-    }
-
-    return 0;
-
-/*     End of CLACRM */
-
-} /* clacrm_ */
-
-/* Complex */ VOID cladiv_(complex * ret_val, complex *x, complex *y)
-{
-    /* System generated locals */
-    real r__1, r__2, r__3, r__4;
-    complex q__1;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-
-    /* Local variables */
-    static real zi, zr;
-    extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real *
-	    , real *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    CLADIV := X / Y, where X and Y are complex.  The computation of X / Y
-    will not overflow on an intermediary step unless the results
-    overflows.
-
-    Arguments
-    =========
-
-    X       (input) COMPLEX
-    Y       (input) COMPLEX
-            The complex scalars X and Y.
-
-    =====================================================================
-*/
-
-
-    r__1 = x->r;
-    r__2 = r_imag(x);
-    r__3 = y->r;
-    r__4 = r_imag(y);
-    sladiv_(&r__1, &r__2, &r__3, &r__4, &zr, &zi);
-    q__1.r = zr, q__1.i = zi;
-     ret_val->r = q__1.r,  ret_val->i = q__1.i;
-
-    return ;
-
-/*     End of CLADIV */
-
-} /* cladiv_ */
-
-/* Subroutine */ int claed0_(integer *qsiz, integer *n, real *d__, real *e,
-	complex *q, integer *ldq, complex *qstore, integer *ldqs, real *rwork,
-	 integer *iwork, integer *info)
-{
-    /* System generated locals */
-    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
-    real r__1;
-
-    /* Builtin functions */
-    double log(doublereal);
-    integer pow_ii(integer *, integer *);
-
-    /* Local variables */
-    static integer i__, j, k, ll, iq, lgn, msd2, smm1, spm1, spm2;
-    static real temp;
-    static integer curr, iperm;
-    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
-	    complex *, integer *);
-    static integer indxq, iwrem;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *);
-    static integer iqptr;
-    extern /* Subroutine */ int claed7_(integer *, integer *, integer *,
-	    integer *, integer *, integer *, real *, complex *, integer *,
-	    real *, integer *, real *, integer *, integer *, integer *,
-	    integer *, integer *, real *, complex *, real *, integer *,
-	    integer *);
-    static integer tlvls;
-    extern /* Subroutine */ int clacrm_(integer *, integer *, complex *,
-	    integer *, real *, integer *, complex *, integer *, real *);
-    static integer igivcl;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer igivnm, submat, curprb, subpbs, igivpt, curlvl, matsiz,
-	    iprmpt, smlsiz;
-    extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *,
-	    real *, integer *, real *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    Using the divide and conquer method, CLAED0 computes all eigenvalues
-    of a symmetric tridiagonal matrix which is one diagonal block of
-    those from reducing a dense or band Hermitian matrix and
-    corresponding eigenvectors of the dense or band matrix.
-
-    Arguments
-    =========
-
-    QSIZ   (input) INTEGER
-           The dimension of the unitary matrix used to reduce
-           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
-
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    D      (input/output) REAL array, dimension (N)
-           On entry, the diagonal elements of the tridiagonal matrix.
-           On exit, the eigenvalues in ascending order.
-
-    E      (input/output) REAL array, dimension (N-1)
-           On entry, the off-diagonal elements of the tridiagonal matrix.
-           On exit, E has been destroyed.
-
-    Q      (input/output) COMPLEX array, dimension (LDQ,N)
-           On entry, Q must contain an QSIZ x N matrix whose columns
-           unitarily orthonormal. It is a part of the unitary matrix
-           that reduces the full dense Hermitian matrix to a
-           (reducible) symmetric tridiagonal matrix.
-
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max(1,N).
-
-    IWORK  (workspace) INTEGER array,
-           the dimension of IWORK must be at least
-                        6 + 6*N + 5*N*lg N
-                        ( lg( N ) = smallest integer k
-                                    such that 2^k >= N )
-
-    RWORK  (workspace) REAL array,
-                                 dimension (1 + 3*N + 2*N*lg N + 3*N**2)
-                          ( lg( N ) = smallest integer k
-                                      such that 2^k >= N )
-
-    QSTORE (workspace) COMPLEX array, dimension (LDQS, N)
-           Used to store parts of
-           the eigenvector matrix when the updating matrix multiplies
-           take place.
-
-    LDQS   (input) INTEGER
-           The leading dimension of the array QSTORE.
-           LDQS >= max(1,N).
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The algorithm failed to compute an eigenvalue while
-                  working on the submatrix lying in rows and columns
-                  INFO/(N+1) through mod(INFO,N+1).
-
-    =====================================================================
-
-    Warning:      N could be as big as QSIZ!
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    qstore_dim1 = *ldqs;
-    qstore_offset = 1 + qstore_dim1;
-    qstore -= qstore_offset;
-    --rwork;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-/*
-       IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN
-          INFO = -1
-       ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) )
-      $        THEN
-*/
-    if (*qsiz < max(0,*n)) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*ldq < max(1,*n)) {
-	*info = -6;
-    } else if (*ldqs < max(1,*n)) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CLAED0", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    smlsiz = ilaenv_(&c__9, "CLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-
-/*
-       Determine the size and placement of the submatrices, and save in
-       the leading elements of IWORK.
-*/
-
-    iwork[1] = *n;
-    subpbs = 1;
-    tlvls = 0;
-L10:
-    if (iwork[subpbs] > smlsiz) {
-	for (j = subpbs; j >= 1; --j) {
-	    iwork[j * 2] = (iwork[j] + 1) / 2;
-	    iwork[((j) << (1)) - 1] = iwork[j] / 2;
-/* L20: */
-	}
-	++tlvls;
-	subpbs <<= 1;
-	goto L10;
-    }
-    i__1 = subpbs;
-    for (j = 2; j <= i__1; ++j) {
-	iwork[j] += iwork[j - 1];
-/* L30: */
-    }
-
-/*
-       Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
-       using rank-1 modifications (cuts).
-*/
-
-    spm1 = subpbs - 1;
-    i__1 = spm1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	submat = iwork[i__] + 1;
-	smm1 = submat - 1;
-	d__[smm1] -= (r__1 = e[smm1], dabs(r__1));
-	d__[submat] -= (r__1 = e[smm1], dabs(r__1));
-/* L40: */
-    }
-
-    indxq = ((*n) << (2)) + 3;
-
-/*
-       Set up workspaces for eigenvalues only/accumulate new vectors
-       routine
-*/
-
-    temp = log((real) (*n)) / log(2.f);
-    lgn = (integer) temp;
-    if (pow_ii(&c__2, &lgn) < *n) {
-	++lgn;
-    }
-    if (pow_ii(&c__2, &lgn) < *n) {
-	++lgn;
-    }
-    iprmpt = indxq + *n + 1;
-    iperm = iprmpt + *n * lgn;
-    iqptr = iperm + *n * lgn;
-    igivpt = iqptr + *n + 2;
-    igivcl = igivpt + *n * lgn;
-
-    igivnm = 1;
-    iq = igivnm + ((*n) << (1)) * lgn;
-/* Computing 2nd power */
-    i__1 = *n;
-    iwrem = iq + i__1 * i__1 + 1;
-/*     Initialize pointers */
-    i__1 = subpbs;
-    for (i__ = 0; i__ <= i__1; ++i__) {
-	iwork[iprmpt + i__] = 1;
-	iwork[igivpt + i__] = 1;
-/* L50: */
-    }
-    iwork[iqptr] = 1;
-
-/*
-       Solve each submatrix eigenproblem at the bottom of the divide and
-       conquer tree.
-*/
-
-    curr = 0;
-    i__1 = spm1;
-    for (i__ = 0; i__ <= i__1; ++i__) {
-	if (i__ == 0) {
-	    submat = 1;
-	    matsiz = iwork[1];
-	} else {
-	    submat = iwork[i__] + 1;
-	    matsiz = iwork[i__ + 1] - iwork[i__];
-	}
-	ll = iq - 1 + iwork[iqptr + curr];
-	ssteqr_("I", &matsiz, &d__[submat], &e[submat], &rwork[ll], &matsiz, &
-		rwork[1], info);
-	clacrm_(qsiz, &matsiz, &q[submat * q_dim1 + 1], ldq, &rwork[ll], &
-		matsiz, &qstore[submat * qstore_dim1 + 1], ldqs, &rwork[iwrem]
-		);
-/* Computing 2nd power */
-	i__2 = matsiz;
-	iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
-	++curr;
-	if (*info > 0) {
-	    *info = submat * (*n + 1) + submat + matsiz - 1;
-	    return 0;
-	}
-	k = 1;
-	i__2 = iwork[i__ + 1];
-	for (j = submat; j <= i__2; ++j) {
-	    iwork[indxq + j] = k;
-	    ++k;
-/* L60: */
-	}
-/* L70: */
-    }
-
-/*
-       Successively merge eigensystems of adjacent submatrices
-       into eigensystem for the corresponding larger matrix.
-
-       while ( SUBPBS > 1 )
-*/
-
-    curlvl = 1;
-L80:
-    if (subpbs > 1) {
-	spm2 = subpbs - 2;
-	i__1 = spm2;
-	for (i__ = 0; i__ <= i__1; i__ += 2) {
-	    if (i__ == 0) {
-		submat = 1;
-		matsiz = iwork[2];
-		msd2 = iwork[1];
-		curprb = 0;
-	    } else {
-		submat = iwork[i__] + 1;
-		matsiz = iwork[i__ + 2] - iwork[i__];
-		msd2 = matsiz / 2;
-		++curprb;
-	    }
-
-/*
-       Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
-       into an eigensystem of size MATSIZ.  CLAED7 handles the case
-       when the eigenvectors of a full or band Hermitian matrix (which
-       was reduced to tridiagonal form) are desired.
-
-       I am free to use Q as a valuable working space until Loop 150.
-*/
-
-	    claed7_(&matsiz, &msd2, qsiz, &tlvls, &curlvl, &curprb, &d__[
-		    submat], &qstore[submat * qstore_dim1 + 1], ldqs, &e[
-		    submat + msd2 - 1], &iwork[indxq + submat], &rwork[iq], &
-		    iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[
-		    igivpt], &iwork[igivcl], &rwork[igivnm], &q[submat *
-		    q_dim1 + 1], &rwork[iwrem], &iwork[subpbs + 1], info);
-	    if (*info > 0) {
-		*info = submat * (*n + 1) + submat + matsiz - 1;
-		return 0;
-	    }
-	    iwork[i__ / 2 + 1] = iwork[i__ + 2];
-/* L90: */
-	}
-	subpbs /= 2;
-	++curlvl;
-	goto L80;
-    }
-
-/*
-       end while
-
-       Re-merge the eigenvalues/vectors which were deflated at the final
-       merge step.
-*/
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	j = iwork[indxq + i__];
-	rwork[i__] = d__[j];
-	ccopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1]
-		, &c__1);
-/* L100: */
-    }
-    scopy_(n, &rwork[1], &c__1, &d__[1], &c__1);
-
-    return 0;
-
-/*     End of CLAED0 */
-
-} /* claed0_ */
-
-/* Subroutine */ int claed7_(integer *n, integer *cutpnt, integer *qsiz,
-	integer *tlvls, integer *curlvl, integer *curpbm, real *d__, complex *
-	q, integer *ldq, real *rho, integer *indxq, real *qstore, integer *
-	qptr, integer *prmptr, integer *perm, integer *givptr, integer *
-	givcol, real *givnum, complex *work, real *rwork, integer *iwork,
-	integer *info)
-{
-    /* System generated locals */
-    integer q_dim1, q_offset, i__1, i__2;
-
-    /* Builtin functions */
-    integer pow_ii(integer *, integer *);
-
-    /* Local variables */
-    static integer i__, k, n1, n2, iq, iw, iz, ptr, ind1, ind2, indx, curr,
-	    indxc, indxp;
-    extern /* Subroutine */ int claed8_(integer *, integer *, integer *,
-	    complex *, integer *, real *, real *, integer *, real *, real *,
-	    complex *, integer *, real *, integer *, integer *, integer *,
-	    integer *, integer *, integer *, real *, integer *), slaed9_(
-	    integer *, integer *, integer *, integer *, real *, real *,
-	    integer *, real *, real *, real *, real *, integer *, integer *),
-	    slaeda_(integer *, integer *, integer *, integer *, integer *,
-	    integer *, integer *, integer *, real *, real *, integer *, real *
-	    , real *, integer *);
-    static integer idlmda;
-    extern /* Subroutine */ int clacrm_(integer *, integer *, complex *,
-	    integer *, real *, integer *, complex *, integer *, real *),
-	    xerbla_(char *, integer *), slamrg_(integer *, integer *,
-	    real *, integer *, integer *, integer *);
-    static integer coltyp;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CLAED7 computes the updated eigensystem of a diagonal
-    matrix after modification by a rank-one symmetric matrix. This
-    routine is used only for the eigenproblem which requires all
-    eigenvalues and optionally eigenvectors of a dense or banded
-    Hermitian matrix that has been reduced to tridiagonal form.
-
-      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
-
-      where Z = Q'u, u is a vector of length N with ones in the
-      CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
-
-       The eigenvectors of the original matrix are stored in Q, and the
-       eigenvalues are in D.  The algorithm consists of three stages:
-
-          The first stage consists of deflating the size of the problem
-          when there are multiple eigenvalues or if there is a zero in
-          the Z vector.  For each such occurence the dimension of the
-          secular equation problem is reduced by one.  This stage is
-          performed by the routine SLAED2.
-
-          The second stage consists of calculating the updated
-          eigenvalues. This is done by finding the roots of the secular
-          equation via the routine SLAED4 (as called by SLAED3).
-          This routine also calculates the eigenvectors of the current
-          problem.
-
-          The final stage consists of computing the updated eigenvectors
-          directly using the updated eigenvalues.  The eigenvectors for
-          the current problem are multiplied with the eigenvectors from
-          the overall problem.
-
-    Arguments
-    =========
-
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    CUTPNT (input) INTEGER
-           Contains the location of the last eigenvalue in the leading
-           sub-matrix.  min(1,N) <= CUTPNT <= N.
-
-    QSIZ   (input) INTEGER
-           The dimension of the unitary matrix used to reduce
-           the full matrix to tridiagonal form.  QSIZ >= N.
-
-    TLVLS  (input) INTEGER
-           The total number of merging levels in the overall divide and
-           conquer tree.
-
-    CURLVL (input) INTEGER
-           The current level in the overall merge routine,
-           0 <= curlvl <= tlvls.
-
-    CURPBM (input) INTEGER
-           The current problem in the current level in the overall
-           merge routine (counting from upper left to lower right).
-
-    D      (input/output) REAL array, dimension (N)
-           On entry, the eigenvalues of the rank-1-perturbed matrix.
-           On exit, the eigenvalues of the repaired matrix.
-
-    Q      (input/output) COMPLEX array, dimension (LDQ,N)
-           On entry, the eigenvectors of the rank-1-perturbed matrix.
-           On exit, the eigenvectors of the repaired tridiagonal matrix.
-
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max(1,N).
-
-    RHO    (input) REAL
-           Contains the subdiagonal element used to create the rank-1
-           modification.
-
-    INDXQ  (output) INTEGER array, dimension (N)
-           This contains the permutation which will reintegrate the
-           subproblem just solved back into sorted order,
-           ie. D( INDXQ( I = 1, N ) ) will be in ascending order.
-
-    IWORK  (workspace) INTEGER array, dimension (4*N)
-
-    RWORK  (workspace) REAL array,
-                                   dimension (3*N+2*QSIZ*N)
-
-    WORK   (workspace) COMPLEX array, dimension (QSIZ*N)
-
-    QSTORE (input/output) REAL array, dimension (N**2+1)
-           Stores eigenvectors of submatrices encountered during
-           divide and conquer, packed together. QPTR points to
-           beginning of the submatrices.
-
-    QPTR   (input/output) INTEGER array, dimension (N+2)
-           List of indices pointing to beginning of submatrices stored
-           in QSTORE. The submatrices are numbered starting at the
-           bottom left of the divide and conquer tree, from left to
-           right and bottom to top.
-
-    PRMPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in PERM a
-           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
-           indicates the size of the permutation and also the size of
-           the full, non-deflated problem.
-
-    PERM   (input) INTEGER array, dimension (N lg N)
-           Contains the permutations (from deflation and sorting) to be
-           applied to each eigenblock.
-
-    GIVPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in GIVCOL a
-           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
-           indicates the number of Givens rotations.
-
-    GIVCOL (input) INTEGER array, dimension (2, N lg N)
-           Each pair of numbers indicates a pair of columns to take place
-           in a Givens rotation.
-
-    GIVNUM (input) REAL array, dimension (2, N lg N)
-           Each number indicates the S value to be used in the
-           corresponding Givens rotation.
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an eigenvalue did not converge
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --indxq;
-    --qstore;
-    --qptr;
-    --prmptr;
-    --perm;
-    --givptr;
-    givcol -= 3;
-    givnum -= 3;
-    --work;
-    --rwork;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-/*
-       IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN
-          INFO = -1
-       ELSE IF( N.LT.0 ) THEN
-*/
-    if (*n < 0) {
-	*info = -1;
-    } else if ((min(1,*n) > *cutpnt) || (*n < *cutpnt)) {
-	*info = -2;
-    } else if (*qsiz < *n) {
-	*info = -3;
-    } else if (*ldq < max(1,*n)) {
-	*info = -9;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CLAED7", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*
-       The following values are for bookkeeping purposes only.  They are
-       integer pointers which indicate the portion of the workspace
-       used by a particular array in SLAED2 and SLAED3.
-*/
-
-    iz = 1;
-    idlmda = iz + *n;
-    iw = idlmda + *n;
-    iq = iw + *n;
-
-    indx = 1;
-    indxc = indx + *n;
-    coltyp = indxc + *n;
-    indxp = coltyp + *n;
-
-/*
-       Form the z-vector which consists of the last row of Q_1 and the
-       first row of Q_2.
-*/
-
-    ptr = pow_ii(&c__2, tlvls) + 1;
-    i__1 = *curlvl - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = *tlvls - i__;
-	ptr += pow_ii(&c__2, &i__2);
-/* L10: */
-    }
-    curr = ptr + *curpbm;
-    slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
-	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &rwork[iz], &rwork[
-	    iz + *n], info);
-
-/*
-       When solving the final problem, we no longer need the stored data,
-       so we will overwrite the data from this level onto the previously
-       used storage space.
-*/
-
-    if (*curlvl == *tlvls) {
-	qptr[curr] = 1;
-	prmptr[curr] = 1;
-	givptr[curr] = 1;
-    }
-
-/*     Sort and Deflate eigenvalues. */
-
-    claed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz],
-	    &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[
-	    indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[
-	    ((givptr[curr]) << (1)) + 1], &givnum[((givptr[curr]) << (1)) + 1]
-	    , info);
-    prmptr[curr + 1] = prmptr[curr] + *n;
-    givptr[curr + 1] += givptr[curr];
-
-/*     Solve Secular Equation. */
-
-    if (k != 0) {
-	slaed9_(&k, &c__1, &k, n, &d__[1], &rwork[iq], &k, rho, &rwork[idlmda]
-		, &rwork[iw], &qstore[qptr[curr]], &k, info);
-	clacrm_(qsiz, &k, &work[1], qsiz, &qstore[qptr[curr]], &k, &q[
-		q_offset], ldq, &rwork[iq]);
-/* Computing 2nd power */
-	i__1 = k;
-	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
-	if (*info != 0) {
-	    return 0;
-	}
-
-/*     Prepare the INDXQ sorting premutation. */
-
-	n1 = k;
-	n2 = *n - k;
-	ind1 = 1;
-	ind2 = *n;
-	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
-    } else {
-	qptr[curr + 1] = qptr[curr];
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    indxq[i__] = i__;
-/* L20: */
-	}
-    }
-
-    return 0;
-
-/*     End of CLAED7 */
-
-} /* claed7_ */
-
-/* Subroutine */ int claed8_(integer *k, integer *n, integer *qsiz, complex *
-	q, integer *ldq, real *d__, real *rho, integer *cutpnt, real *z__,
-	real *dlamda, complex *q2, integer *ldq2, real *w, integer *indxp,
-	integer *indx, integer *indxq, integer *perm, integer *givptr,
-	integer *givcol, real *givnum, integer *info)
-{
-    /* System generated locals */
-    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
-    real r__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static real c__;
-    static integer i__, j;
-    static real s, t;
-    static integer k2, n1, n2, jp, n1p1;
-    static real eps, tau, tol;
-    static integer jlam, imax, jmax;
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
-	    ccopy_(integer *, complex *, integer *, complex *, integer *),
-	    csrot_(integer *, complex *, integer *, complex *, integer *,
-	    real *, real *), scopy_(integer *, real *, integer *, real *,
-	    integer *);
-    extern doublereal slapy2_(real *, real *), slamch_(char *);
-    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
-	    *, integer *, complex *, integer *), xerbla_(char *,
-	    integer *);
-    extern integer isamax_(integer *, real *, integer *);
-    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
-	    *, integer *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CLAED8 merges the two sets of eigenvalues together into a single
-    sorted set.  Then it tries to deflate the size of the problem.
-    There are two ways in which deflation can occur:  when two or more
-    eigenvalues are close together or if there is a tiny element in the
-    Z vector.  For each such occurrence the order of the related secular
-    equation problem is reduced by one.
-
-    Arguments
-    =========
-
-    K      (output) INTEGER
-           Contains the number of non-deflated eigenvalues.
-           This is the order of the related secular equation.
-
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    QSIZ   (input) INTEGER
-           The dimension of the unitary matrix used to reduce
-           the dense or band matrix to tridiagonal form.
-           QSIZ >= N if ICOMPQ = 1.
-
-    Q      (input/output) COMPLEX array, dimension (LDQ,N)
-           On entry, Q contains the eigenvectors of the partially solved
-           system which has been previously updated in matrix
-           multiplies with other partially solved eigensystems.
-           On exit, Q contains the trailing (N-K) updated eigenvectors
-           (those which were deflated) in its last N-K columns.
-
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max( 1, N ).
-
-    D      (input/output) REAL array, dimension (N)
-           On entry, D contains the eigenvalues of the two submatrices to
-           be combined.  On exit, D contains the trailing (N-K) updated
-           eigenvalues (those which were deflated) sorted into increasing
-           order.
-
-    RHO    (input/output) REAL
-           Contains the off diagonal element associated with the rank-1
-           cut which originally split the two submatrices which are now
-           being recombined. RHO is modified during the computation to
-           the value required by SLAED3.
-
-    CUTPNT (input) INTEGER
-           Contains the location of the last eigenvalue in the leading
-           sub-matrix.  MIN(1,N) <= CUTPNT <= N.
-
-    Z      (input) REAL array, dimension (N)
-           On input this vector contains the updating vector (the last
-           row of the first sub-eigenvector matrix and the first row of
-           the second sub-eigenvector matrix).  The contents of Z are
-           destroyed during the updating process.
-
-    DLAMDA (output) REAL array, dimension (N)
-           Contains a copy of the first K eigenvalues which will be used
-           by SLAED3 to form the secular equation.
-
-    Q2     (output) COMPLEX array, dimension (LDQ2,N)
-           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
-           Contains a copy of the first K eigenvectors which will be used
-           by SLAED7 in a matrix multiply (SGEMM) to update the new
-           eigenvectors.
-
-    LDQ2   (input) INTEGER
-           The leading dimension of the array Q2.  LDQ2 >= max( 1, N ).
-
-    W      (output) REAL array, dimension (N)
-           This will hold the first k values of the final
-           deflation-altered z-vector and will be passed to SLAED3.
-
-    INDXP  (workspace) INTEGER array, dimension (N)
-           This will contain the permutation used to place deflated
-           values of D at the end of the array. On output INDXP(1:K)
-           points to the nondeflated D-values and INDXP(K+1:N)
-           points to the deflated eigenvalues.
-
-    INDX   (workspace) INTEGER array, dimension (N)
-           This will contain the permutation used to sort the contents of
-           D into ascending order.
-
-    INDXQ  (input) INTEGER array, dimension (N)
-           This contains the permutation which separately sorts the two
-           sub-problems in D into ascending order.  Note that elements in
-           the second half of this permutation must first have CUTPNT
-           added to their values in order to be accurate.
-
-    PERM   (output) INTEGER array, dimension (N)
-           Contains the permutations (from deflation and sorting) to be
-           applied to each eigenblock.
-
-    GIVPTR (output) INTEGER
-           Contains the number of Givens rotations which took place in
-           this subproblem.
-
-    GIVCOL (output) INTEGER array, dimension (2, N)
-           Each pair of numbers indicates a pair of columns to take place
-           in a Givens rotation.
-
-    GIVNUM (output) REAL array, dimension (2, N)
-           Each number indicates the S value to be used in the
-           corresponding Givens rotation.
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --d__;
-    --z__;
-    --dlamda;
-    q2_dim1 = *ldq2;
-    q2_offset = 1 + q2_dim1;
-    q2 -= q2_offset;
-    --w;
-    --indxp;
-    --indx;
-    --indxq;
-    --perm;
-    givcol -= 3;
-    givnum -= 3;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*n < 0) {
-	*info = -2;
-    } else if (*qsiz < *n) {
-	*info = -3;
-    } else if (*ldq < max(1,*n)) {
-	*info = -5;
-    } else if ((*cutpnt < min(1,*n)) || (*cutpnt > *n)) {
-	*info = -8;
-    } else if (*ldq2 < max(1,*n)) {
-	*info = -12;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CLAED8", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    n1 = *cutpnt;
-    n2 = *n - n1;
-    n1p1 = n1 + 1;
-
-    if (*rho < 0.f) {
-	sscal_(&n2, &c_b1290, &z__[n1p1], &c__1);
-    }
-
-/*     Normalize z so that norm(z) = 1 */
-
-    t = 1.f / sqrt(2.f);
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	indx[j] = j;
-/* L10: */
-    }
-    sscal_(n, &t, &z__[1], &c__1);
-    *rho = (r__1 = *rho * 2.f, dabs(r__1));
-
-/*     Sort the eigenvalues into increasing order */
-
-    i__1 = *n;
-    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
-	indxq[i__] += *cutpnt;
-/* L20: */
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dlamda[i__] = d__[indxq[i__]];
-	w[i__] = z__[indxq[i__]];
-/* L30: */
-    }
-    i__ = 1;
-    j = *cutpnt + 1;
-    slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	d__[i__] = dlamda[indx[i__]];
-	z__[i__] = w[indx[i__]];
-/* L40: */
-    }
-
-/*     Calculate the allowable deflation tolerance */
-
-    imax = isamax_(n, &z__[1], &c__1);
-    jmax = isamax_(n, &d__[1], &c__1);
-    eps = slamch_("Epsilon");
-    tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1));
-
-/*
-       If the rank-1 modifier is small enough, no more needs to be done
-       -- except to reorganize Q so that its columns correspond with the
-       elements in D.
-*/
-
-    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
-	*k = 0;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    perm[j] = indxq[indx[j]];
-	    ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
-		    , &c__1);
-/* L50: */
-	}
-	clacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
-	return 0;
-    }
-
-/*
-       If there are multiple eigenvalues then the problem deflates.  Here
-       the number of equal eigenvalues are found.  As each equal
-       eigenvalue is found, an elementary reflector is computed to rotate
-       the corresponding eigensubspace so that the corresponding
-       components of Z are zero in this new basis.
-*/
-
-    *k = 0;
-    *givptr = 0;
-    k2 = *n + 1;
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
-
-/*           Deflate due to small z component. */
-
-	    --k2;
-	    indxp[k2] = j;
-	    if (j == *n) {
-		goto L100;
-	    }
-	} else {
-	    jlam = j;
-	    goto L70;
-	}
-/* L60: */
-    }
-L70:
-    ++j;
-    if (j > *n) {
-	goto L90;
-    }
-    if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
-
-/*        Deflate due to small z component. */
-
-	--k2;
-	indxp[k2] = j;
-    } else {
-
-/*        Check if eigenvalues are close enough to allow deflation. */
-
-	s = z__[jlam];
-	c__ = z__[j];
-
-/*
-          Find sqrt(a**2+b**2) without overflow or
-          destructive underflow.
-*/
-
-	tau = slapy2_(&c__, &s);
-	t = d__[j] - d__[jlam];
-	c__ /= tau;
-	s = -s / tau;
-	if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
-
-/*           Deflation is possible. */
-
-	    z__[j] = tau;
-	    z__[jlam] = 0.f;
-
-/*           Record the appropriate Givens rotation */
-
-	    ++(*givptr);
-	    givcol[((*givptr) << (1)) + 1] = indxq[indx[jlam]];
-	    givcol[((*givptr) << (1)) + 2] = indxq[indx[j]];
-	    givnum[((*givptr) << (1)) + 1] = c__;
-	    givnum[((*givptr) << (1)) + 2] = s;
-	    csrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[
-		    indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
-	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
-	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
-	    d__[jlam] = t;
-	    --k2;
-	    i__ = 1;
-L80:
-	    if (k2 + i__ <= *n) {
-		if (d__[jlam] < d__[indxp[k2 + i__]]) {
-		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
-		    indxp[k2 + i__] = jlam;
-		    ++i__;
-		    goto L80;
-		} else {
-		    indxp[k2 + i__ - 1] = jlam;
-		}
-	    } else {
-		indxp[k2 + i__ - 1] = jlam;
-	    }
-	    jlam = j;
-	} else {
-	    ++(*k);
-	    w[*k] = z__[jlam];
-	    dlamda[*k] = d__[jlam];
-	    indxp[*k] = jlam;
-	    jlam = j;
-	}
-    }
-    goto L70;
-L90:
-
-/*     Record the last eigenvalue. */
-
-    ++(*k);
-    w[*k] = z__[jlam];
-    dlamda[*k] = d__[jlam];
-    indxp[*k] = jlam;
-
-L100:
-
-/*
-       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
-       and Q2 respectively.  The eigenvalues/vectors which were not
-       deflated go into the first K slots of DLAMDA and Q2 respectively,
-       while those which were deflated go into the last N - K slots.
-*/
-
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	jp = indxp[j];
-	dlamda[j] = d__[jp];
-	perm[j] = indxq[indx[jp]];
-	ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], &
-		c__1);
-/* L110: */
-    }
-
-/*
-       The deflated eigenvalues and their corresponding vectors go back
-       into the last N - K slots of D and Q respectively.
-*/
-
-    if (*k < *n) {
-	i__1 = *n - *k;
-	scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
-	i__1 = *n - *k;
-	clacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k +
-		1) * q_dim1 + 1], ldq);
-    }
-
-    return 0;
-
-/*     End of CLAED8 */
-
-} /* claed8_ */
-
-/* Subroutine */ int clahqr_(logical *wantt, logical *wantz, integer *n,
-	integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w,
-	integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer *
-	info)
-{
-    /* System generated locals */
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
-    real r__1, r__2, r__3, r__4, r__5, r__6;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-    void c_sqrt(complex *, complex *), r_cnjg(complex *, complex *);
-    double c_abs(complex *);
-
-    /* Local variables */
-    static integer i__, j, k, l, m;
-    static real s;
-    static complex t, u, v[2], x, y;
-    static integer i1, i2;
-    static complex t1;
-    static real t2;
-    static complex v2;
-    static real h10;
-    static complex h11;
-    static real h21;
-    static complex h22;
-    static integer nh, nz;
-    static complex h11s;
-    static integer itn, its;
-    static real ulp;
-    static complex sum;
-    static real tst1;
-    static complex temp;
-    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *), ccopy_(integer *, complex *, integer *, complex *,
-	    integer *);
-    static real rtemp, rwork[1];
-    extern /* Subroutine */ int clarfg_(integer *, complex *, complex *,
-	    integer *, complex *);
-    extern /* Complex */ VOID cladiv_(complex *, complex *, complex *);
-    extern doublereal slamch_(char *), clanhs_(char *, integer *,
-	    complex *, integer *, real *);
-    static real smlnum;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CLAHQR is an auxiliary routine called by CHSEQR to update the
-    eigenvalues and Schur decomposition already computed by CHSEQR, by
-    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
-
-    Arguments
-    =========
-
-    WANTT   (input) LOGICAL
-            = .TRUE. : the full Schur form T is required;
-            = .FALSE.: only eigenvalues are required.
-
-    WANTZ   (input) LOGICAL
-            = .TRUE. : the matrix of Schur vectors Z is required;
-            = .FALSE.: Schur vectors are not required.
-
-    N       (input) INTEGER
-            The order of the matrix H.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that H is already upper triangular in rows and
-            columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1).
-            CLAHQR works primarily with the Hessenberg submatrix in rows
-            and columns ILO to IHI, but applies transformations to all of
-            H if WANTT is .TRUE..
-            1 <= ILO <= max(1,IHI); IHI <= N.
-
-    H       (input/output) COMPLEX array, dimension (LDH,N)
-            On entry, the upper Hessenberg matrix H.
-            On exit, if WANTT is .TRUE., H is upper triangular in rows
-            and columns ILO:IHI, with any 2-by-2 diagonal blocks in
-            standard form. If WANTT is .FALSE., the contents of H are
-            unspecified on exit.
-
-    LDH     (input) INTEGER
-            The leading dimension of the array H. LDH >= max(1,N).
-
-    W       (output) COMPLEX array, dimension (N)
-            The computed eigenvalues ILO to IHI are stored in the
-            corresponding elements of W. If WANTT is .TRUE., the
-            eigenvalues are stored in the same order as on the diagonal
-            of the Schur form returned in H, with W(i) = H(i,i).
-
-    ILOZ    (input) INTEGER
-    IHIZ    (input) INTEGER
-            Specify the rows of Z to which transformations must be
-            applied if WANTZ is .TRUE..
-            1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
-
-    Z       (input/output) COMPLEX array, dimension (LDZ,N)
-            If WANTZ is .TRUE., on entry Z must contain the current
-            matrix Z of transformations accumulated by CHSEQR, and on
-            exit Z has been updated; transformations are applied only to
-            the submatrix Z(ILOZ:IHIZ,ILO:IHI).
-            If WANTZ is .FALSE., Z is not referenced.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z. LDZ >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            > 0: if INFO = i, CLAHQR failed to compute all the
-                 eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1)
-                 iterations; elements i+1:ihi of W contain those
-                 eigenvalues which have been successfully computed.
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    h_dim1 = *ldh;
-    h_offset = 1 + h_dim1;
-    h__ -= h_offset;
-    --w;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-
-    /* Function Body */
-    *info = 0;
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-    if (*ilo == *ihi) {
-	i__1 = *ilo;
-	i__2 = *ilo + *ilo * h_dim1;
-	w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
-	return 0;
-    }
-
-    nh = *ihi - *ilo + 1;
-    nz = *ihiz - *iloz + 1;
-
-/*
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
-
-    ulp = slamch_("Precision");
-    smlnum = slamch_("Safe minimum") / ulp;
-
-/*
-       I1 and I2 are the indices of the first row and last column of H
-       to which transformations must be applied. If eigenvalues only are
-       being computed, I1 and I2 are set inside the main loop.
-*/
-
-    if (*wantt) {
-	i1 = 1;
-	i2 = *n;
-    }
-
-/*     ITN is the total number of QR iterations allowed. */
-
-    itn = nh * 30;
-
-/*
-       The main loop begins here. I is the loop index and decreases from
-       IHI to ILO in steps of 1. Each iteration of the loop works
-       with the active submatrix in rows and columns L to I.
-       Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
-       H(L,L-1) is negligible so that the matrix splits.
-*/
-
-    i__ = *ihi;
-L10:
-    if (i__ < *ilo) {
-	goto L130;
-    }
-
-/*
-       Perform QR iterations on rows and columns ILO to I until a
-       submatrix of order 1 splits off at the bottom because a
-       subdiagonal element has become negligible.
-*/
-
-    l = *ilo;
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
-
-/*        Look for a single small subdiagonal element. */
-
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    i__3 = k - 1 + (k - 1) * h_dim1;
-	    i__4 = k + k * h_dim1;
-	    tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[k -
-		    1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__4]
-		    .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]),
-		    dabs(r__4)));
-	    if (tst1 == 0.f) {
-		i__3 = i__ - l + 1;
-		tst1 = clanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork);
-	    }
-	    i__3 = k + (k - 1) * h_dim1;
-/* Computing MAX */
-	    r__2 = ulp * tst1;
-	    if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) {
-		goto L30;
-	    }
-/* L20: */
-	}
-L30:
-	l = k;
-	if (l > *ilo) {
-
-/*           H(L,L-1) is negligible */
-
-	    i__2 = l + (l - 1) * h_dim1;
-	    h__[i__2].r = 0.f, h__[i__2].i = 0.f;
-	}
-
-/*        Exit from loop if a submatrix of order 1 has split off. */
-
-	if (l >= i__) {
-	    goto L120;
-	}
-
-/*
-          Now the active submatrix is in rows and columns L to I. If
-          eigenvalues only are being computed, only the active submatrix
-          need be transformed.
-*/
-
-	if (! (*wantt)) {
-	    i1 = l;
-	    i2 = i__;
-	}
-
-	if ((its == 10) || (its == 20)) {
-
-/*           Exceptional shift. */
-
-	    i__2 = i__ + (i__ - 1) * h_dim1;
-	    s = (r__1 = h__[i__2].r, dabs(r__1)) * .75f;
-	    i__2 = i__ + i__ * h_dim1;
-	    q__1.r = s + h__[i__2].r, q__1.i = h__[i__2].i;
-	    t.r = q__1.r, t.i = q__1.i;
-	} else {
-
-/*           Wilkinson's shift. */
-
-	    i__2 = i__ + i__ * h_dim1;
-	    t.r = h__[i__2].r, t.i = h__[i__2].i;
-	    i__2 = i__ - 1 + i__ * h_dim1;
-	    i__3 = i__ + (i__ - 1) * h_dim1;
-	    r__1 = h__[i__3].r;
-	    q__1.r = r__1 * h__[i__2].r, q__1.i = r__1 * h__[i__2].i;
-	    u.r = q__1.r, u.i = q__1.i;
-	    if ((u.r != 0.f) || (u.i != 0.f)) {
-		i__2 = i__ - 1 + (i__ - 1) * h_dim1;
-		q__2.r = h__[i__2].r - t.r, q__2.i = h__[i__2].i - t.i;
-		q__1.r = q__2.r * .5f, q__1.i = q__2.i * .5f;
-		x.r = q__1.r, x.i = q__1.i;
-		q__3.r = x.r * x.r - x.i * x.i, q__3.i = x.r * x.i + x.i *
-			x.r;
-		q__2.r = q__3.r + u.r, q__2.i = q__3.i + u.i;
-		c_sqrt(&q__1, &q__2);
-		y.r = q__1.r, y.i = q__1.i;
-		if (x.r * y.r + r_imag(&x) * r_imag(&y) < 0.f) {
-		    q__1.r = -y.r, q__1.i = -y.i;
-		    y.r = q__1.r, y.i = q__1.i;
-		}
-		q__3.r = x.r + y.r, q__3.i = x.i + y.i;
-		cladiv_(&q__2, &u, &q__3);
-		q__1.r = t.r - q__2.r, q__1.i = t.i - q__2.i;
-		t.r = q__1.r, t.i = q__1.i;
-	    }
-	}
-
-/*        Look for two consecutive small subdiagonal elements. */
-
-	i__2 = l + 1;
-	for (m = i__ - 1; m >= i__2; --m) {
-
-/*
-             Determine the effect of starting the single-shift QR
-             iteration at row M, and see if this would make H(M,M-1)
-             negligible.
-*/
-
-	    i__3 = m + m * h_dim1;
-	    h11.r = h__[i__3].r, h11.i = h__[i__3].i;
-	    i__3 = m + 1 + (m + 1) * h_dim1;
-	    h22.r = h__[i__3].r, h22.i = h__[i__3].i;
-	    q__1.r = h11.r - t.r, q__1.i = h11.i - t.i;
-	    h11s.r = q__1.r, h11s.i = q__1.i;
-	    i__3 = m + 1 + m * h_dim1;
-	    h21 = h__[i__3].r;
-	    s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(
-		    r__2)) + dabs(h21);
-	    q__1.r = h11s.r / s, q__1.i = h11s.i / s;
-	    h11s.r = q__1.r, h11s.i = q__1.i;
-	    h21 /= s;
-	    v[0].r = h11s.r, v[0].i = h11s.i;
-	    v[1].r = h21, v[1].i = 0.f;
-	    i__3 = m + (m - 1) * h_dim1;
-	    h10 = h__[i__3].r;
-	    tst1 = ((r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(
-		    r__2))) * ((r__3 = h11.r, dabs(r__3)) + (r__4 = r_imag(&
-		    h11), dabs(r__4)) + ((r__5 = h22.r, dabs(r__5)) + (r__6 =
-		    r_imag(&h22), dabs(r__6))));
-	    if ((r__1 = h10 * h21, dabs(r__1)) <= ulp * tst1) {
-		goto L50;
-	    }
-/* L40: */
-	}
-	i__2 = l + l * h_dim1;
-	h11.r = h__[i__2].r, h11.i = h__[i__2].i;
-	i__2 = l + 1 + (l + 1) * h_dim1;
-	h22.r = h__[i__2].r, h22.i = h__[i__2].i;
-	q__1.r = h11.r - t.r, q__1.i = h11.i - t.i;
-	h11s.r = q__1.r, h11s.i = q__1.i;
-	i__2 = l + 1 + l * h_dim1;
-	h21 = h__[i__2].r;
-	s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(r__2))
-		+ dabs(h21);
-	q__1.r = h11s.r / s, q__1.i = h11s.i / s;
-	h11s.r = q__1.r, h11s.i = q__1.i;
-	h21 /= s;
-	v[0].r = h11s.r, v[0].i = h11s.i;
-	v[1].r = h21, v[1].i = 0.f;
-L50:
-
-/*        Single-shift QR step */
-
-	i__2 = i__ - 1;
-	for (k = m; k <= i__2; ++k) {
-
-/*
-             The first iteration of this loop determines a reflection G
-             from the vector V and applies it from left and right to H,
-             thus creating a nonzero bulge below the subdiagonal.
-
-             Each subsequent iteration determines a reflection G to
-             restore the Hessenberg form in the (K-1)th column, and thus
-             chases the bulge one step toward the bottom of the active
-             submatrix.
-
-             V(2) is always real before the call to CLARFG, and hence
-             after the call T2 ( = T1*V(2) ) is also real.
-*/
-
-	    if (k > m) {
-		ccopy_(&c__2, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
-	    }
-	    clarfg_(&c__2, v, &v[1], &c__1, &t1);
-	    if (k > m) {
-		i__3 = k + (k - 1) * h_dim1;
-		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
-		i__3 = k + 1 + (k - 1) * h_dim1;
-		h__[i__3].r = 0.f, h__[i__3].i = 0.f;
-	    }
-	    v2.r = v[1].r, v2.i = v[1].i;
-	    q__1.r = t1.r * v2.r - t1.i * v2.i, q__1.i = t1.r * v2.i + t1.i *
-		    v2.r;
-	    t2 = q__1.r;
-
-/*
-             Apply G from the left to transform the rows of the matrix
-             in columns K to I2.
-*/
-
-	    i__3 = i2;
-	    for (j = k; j <= i__3; ++j) {
-		r_cnjg(&q__3, &t1);
-		i__4 = k + j * h_dim1;
-		q__2.r = q__3.r * h__[i__4].r - q__3.i * h__[i__4].i, q__2.i =
-			 q__3.r * h__[i__4].i + q__3.i * h__[i__4].r;
-		i__5 = k + 1 + j * h_dim1;
-		q__4.r = t2 * h__[i__5].r, q__4.i = t2 * h__[i__5].i;
-		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-		sum.r = q__1.r, sum.i = q__1.i;
-		i__4 = k + j * h_dim1;
-		i__5 = k + j * h_dim1;
-		q__1.r = h__[i__5].r - sum.r, q__1.i = h__[i__5].i - sum.i;
-		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
-		i__4 = k + 1 + j * h_dim1;
-		i__5 = k + 1 + j * h_dim1;
-		q__2.r = sum.r * v2.r - sum.i * v2.i, q__2.i = sum.r * v2.i +
-			sum.i * v2.r;
-		q__1.r = h__[i__5].r - q__2.r, q__1.i = h__[i__5].i - q__2.i;
-		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
-/* L60: */
-	    }
-
-/*
-             Apply G from the right to transform the columns of the
-             matrix in rows I1 to min(K+2,I).
-
-   Computing MIN
-*/
-	    i__4 = k + 2;
-	    i__3 = min(i__4,i__);
-	    for (j = i1; j <= i__3; ++j) {
-		i__4 = j + k * h_dim1;
-		q__2.r = t1.r * h__[i__4].r - t1.i * h__[i__4].i, q__2.i =
-			t1.r * h__[i__4].i + t1.i * h__[i__4].r;
-		i__5 = j + (k + 1) * h_dim1;
-		q__3.r = t2 * h__[i__5].r, q__3.i = t2 * h__[i__5].i;
-		q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-		sum.r = q__1.r, sum.i = q__1.i;
-		i__4 = j + k * h_dim1;
-		i__5 = j + k * h_dim1;
-		q__1.r = h__[i__5].r - sum.r, q__1.i = h__[i__5].i - sum.i;
-		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
-		i__4 = j + (k + 1) * h_dim1;
-		i__5 = j + (k + 1) * h_dim1;
-		r_cnjg(&q__3, &v2);
-		q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r *
-			q__3.i + sum.i * q__3.r;
-		q__1.r = h__[i__5].r - q__2.r, q__1.i = h__[i__5].i - q__2.i;
-		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
-/* L70: */
-	    }
-
-	    if (*wantz) {
-
-/*              Accumulate transformations in the matrix Z */
-
-		i__3 = *ihiz;
-		for (j = *iloz; j <= i__3; ++j) {
-		    i__4 = j + k * z_dim1;
-		    q__2.r = t1.r * z__[i__4].r - t1.i * z__[i__4].i, q__2.i =
-			     t1.r * z__[i__4].i + t1.i * z__[i__4].r;
-		    i__5 = j + (k + 1) * z_dim1;
-		    q__3.r = t2 * z__[i__5].r, q__3.i = t2 * z__[i__5].i;
-		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-		    sum.r = q__1.r, sum.i = q__1.i;
-		    i__4 = j + k * z_dim1;
-		    i__5 = j + k * z_dim1;
-		    q__1.r = z__[i__5].r - sum.r, q__1.i = z__[i__5].i -
-			    sum.i;
-		    z__[i__4].r = q__1.r, z__[i__4].i = q__1.i;
-		    i__4 = j + (k + 1) * z_dim1;
-		    i__5 = j + (k + 1) * z_dim1;
-		    r_cnjg(&q__3, &v2);
-		    q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r *
-			     q__3.i + sum.i * q__3.r;
-		    q__1.r = z__[i__5].r - q__2.r, q__1.i = z__[i__5].i -
-			    q__2.i;
-		    z__[i__4].r = q__1.r, z__[i__4].i = q__1.i;
-/* L80: */
-		}
-	    }
-
-	    if (k == m && m > l) {
-
-/*
-                If the QR step was started at row M > L because two
-                consecutive small subdiagonals were found, then extra
-                scaling must be performed to ensure that H(M,M-1) remains
-                real.
-*/
-
-		q__1.r = 1.f - t1.r, q__1.i = 0.f - t1.i;
-		temp.r = q__1.r, temp.i = q__1.i;
-		r__1 = c_abs(&temp);
-		q__1.r = temp.r / r__1, q__1.i = temp.i / r__1;
-		temp.r = q__1.r, temp.i = q__1.i;
-		i__3 = m + 1 + m * h_dim1;
-		i__4 = m + 1 + m * h_dim1;
-		r_cnjg(&q__2, &temp);
-		q__1.r = h__[i__4].r * q__2.r - h__[i__4].i * q__2.i, q__1.i =
-			 h__[i__4].r * q__2.i + h__[i__4].i * q__2.r;
-		h__[i__3].r = q__1.r, h__[i__3].i = q__1.i;
-		if (m + 2 <= i__) {
-		    i__3 = m + 2 + (m + 1) * h_dim1;
-		    i__4 = m + 2 + (m + 1) * h_dim1;
-		    q__1.r = h__[i__4].r * temp.r - h__[i__4].i * temp.i,
-			    q__1.i = h__[i__4].r * temp.i + h__[i__4].i *
-			    temp.r;
-		    h__[i__3].r = q__1.r, h__[i__3].i = q__1.i;
-		}
-		i__3 = i__;
-		for (j = m; j <= i__3; ++j) {
-		    if (j != m + 1) {
-			if (i2 > j) {
-			    i__4 = i2 - j;
-			    cscal_(&i__4, &temp, &h__[j + (j + 1) * h_dim1],
-				    ldh);
-			}
-			i__4 = j - i1;
-			r_cnjg(&q__1, &temp);
-			cscal_(&i__4, &q__1, &h__[i1 + j * h_dim1], &c__1);
-			if (*wantz) {
-			    r_cnjg(&q__1, &temp);
-			    cscal_(&nz, &q__1, &z__[*iloz + j * z_dim1], &
-				    c__1);
-			}
-		    }
-/* L90: */
-		}
-	    }
-/* L100: */
-	}
-
-/*        Ensure that H(I,I-1) is real. */
-
-	i__2 = i__ + (i__ - 1) * h_dim1;
-	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
-	if (r_imag(&temp) != 0.f) {
-	    rtemp = c_abs(&temp);
-	    i__2 = i__ + (i__ - 1) * h_dim1;
-	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
-	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
-	    temp.r = q__1.r, temp.i = q__1.i;
-	    if (i2 > i__) {
-		i__2 = i2 - i__;
-		r_cnjg(&q__1, &temp);
-		cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
-	    }
-	    i__2 = i__ - i1;
-	    cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
-	    if (*wantz) {
-		cscal_(&nz, &temp, &z__[*iloz + i__ * z_dim1], &c__1);
-	    }
-	}
-
-/* L110: */
-    }
-
-/*     Failure to converge in remaining number of iterations */
-
-    *info = i__;
-    return 0;
-
-L120:
-
-/*     H(I,I-1) is negligible: one eigenvalue has converged. */
-
-    i__1 = i__;
-    i__2 = i__ + i__ * h_dim1;
-    w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
-
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with new value of I.
-*/
-
-    itn -= its;
-    i__ = l - 1;
-    goto L10;
-
-L130:
-    return 0;
-
-/*     End of CLAHQR */
-
-} /* clahqr_ */
-
-/* Subroutine */ int clahrd_(integer *n, integer *k, integer *nb, complex *a,
-	integer *lda, complex *tau, complex *t, integer *ldt, complex *y,
-	integer *ldy)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
-	    i__3;
-    complex q__1;
-
-    /* Local variables */
-    static integer i__;
-    static complex ei;
-    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *), cgemv_(char *, integer *, integer *, complex *,
-	    complex *, integer *, complex *, integer *, complex *, complex *,
-	    integer *), ccopy_(integer *, complex *, integer *,
-	    complex *, integer *), caxpy_(integer *, complex *, complex *,
-	    integer *, complex *, integer *), ctrmv_(char *, char *, char *,
-	    integer *, complex *, integer *, complex *, integer *), clarfg_(integer *, complex *, complex *, integer
-	    *, complex *), clacgv_(integer *, complex *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1)
-    matrix A so that elements below the k-th subdiagonal are zero. The
-    reduction is performed by a unitary similarity transformation
-    Q' * A * Q. The routine returns the matrices V and T which determine
-    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
-
-    This is an auxiliary routine called by CGEHRD.
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The order of the matrix A.
-
-    K       (input) INTEGER
-            The offset for the reduction. Elements below the k-th
-            subdiagonal in the first NB columns are reduced to zero.
-
-    NB      (input) INTEGER
-            The number of columns to be reduced.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N-K+1)
-            On entry, the n-by-(n-k+1) general matrix A.
-            On exit, the elements on and above the k-th subdiagonal in
-            the first NB columns are overwritten with the corresponding
-            elements of the reduced matrix; the elements below the k-th
-            subdiagonal, with the array TAU, represent the matrix Q as a
-            product of elementary reflectors. The other columns of A are
-            unchanged. See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    TAU     (output) COMPLEX array, dimension (NB)
-            The scalar factors of the elementary reflectors. See Further
-            Details.
-
-    T       (output) COMPLEX array, dimension (LDT,NB)
-            The upper triangular matrix T.
-
-    LDT     (input) INTEGER
-            The leading dimension of the array T.  LDT >= NB.
-
-    Y       (output) COMPLEX array, dimension (LDY,NB)
-            The n-by-nb matrix Y.
-
-    LDY     (input) INTEGER
-            The leading dimension of the array Y. LDY >= max(1,N).
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of nb elementary reflectors
-
-       Q = H(1) H(2) . . . H(nb).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
-    A(i+k+1:n,i), and tau in TAU(i).
-
-    The elements of the vectors v together form the (n-k+1)-by-nb matrix
-    V which is needed, with T and Y, to apply the transformation to the
-    unreduced part of the matrix, using an update of the form:
-    A := (I - V*T*V') * (A - Y*V').
-
-    The contents of A on exit are illustrated by the following example
-    with n = 7, k = 3 and nb = 2:
-
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
-       ( h   h   a   a   a )
-       ( v1  h   a   a   a )
-       ( v1  v2  a   a   a )
-       ( v1  v2  a   a   a )
-
-    where a denotes an element of the original matrix A, h denotes a
-    modified element of the upper Hessenberg matrix H, and vi denotes an
-    element of the vector defining H(i).
-
-    =====================================================================
-
-
-       Quick return if possible
-*/
-
-    /* Parameter adjustments */
-    --tau;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-    y_dim1 = *ldy;
-    y_offset = 1 + y_dim1;
-    y -= y_offset;
-
-    /* Function Body */
-    if (*n <= 1) {
-	return 0;
-    }
-
-    i__1 = *nb;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (i__ > 1) {
-
-/*
-             Update A(1:n,i)
-
-             Compute i-th column of A - Y * V'
-*/
-
-	    i__2 = i__ - 1;
-	    clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
-	    i__2 = i__ - 1;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &a[*k
-		    + i__ - 1 + a_dim1], lda, &c_b56, &a[i__ * a_dim1 + 1], &
-		    c__1);
-	    i__2 = i__ - 1;
-	    clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
-
-/*
-             Apply I - V * T' * V' to this column (call it b) from the
-             left, using the last column of T as workspace
-
-             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
-                      ( V2 )             ( b2 )
-
-             where V1 is unit lower triangular
-
-             w := V1' * b1
-*/
-
-	    i__2 = i__ - 1;
-	    ccopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
-		    1], &c__1);
-	    i__2 = i__ - 1;
-	    ctrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a[*k + 1 +
-		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1);
-
-/*           w := w + V2'*b2 */
-
-	    i__2 = *n - *k - i__ + 1;
-	    i__3 = i__ - 1;
-	    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[*k + i__ +
-		    a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56,
-		    &t[*nb * t_dim1 + 1], &c__1);
-
-/*           w := T'*w */
-
-	    i__2 = i__ - 1;
-	    ctrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[
-		    t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1);
-
-/*           b2 := b2 - V2*w */
-
-	    i__2 = *n - *k - i__ + 1;
-	    i__3 = i__ - 1;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[*k + i__ + a_dim1],
-		     lda, &t[*nb * t_dim1 + 1], &c__1, &c_b56, &a[*k + i__ +
-		    i__ * a_dim1], &c__1);
-
-/*           b1 := b1 - V1*w */
-
-	    i__2 = i__ - 1;
-	    ctrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
-		    , lda, &t[*nb * t_dim1 + 1], &c__1);
-	    i__2 = i__ - 1;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    caxpy_(&i__2, &q__1, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__
-		    * a_dim1], &c__1);
-
-	    i__2 = *k + i__ - 1 + (i__ - 1) * a_dim1;
-	    a[i__2].r = ei.r, a[i__2].i = ei.i;
-	}
-
-/*
-          Generate the elementary reflector H(i) to annihilate
-          A(k+i+1:n,i)
-*/
-
-	i__2 = *k + i__ + i__ * a_dim1;
-	ei.r = a[i__2].r, ei.i = a[i__2].i;
-	i__2 = *n - *k - i__ + 1;
-/* Computing MIN */
-	i__3 = *k + i__ + 1;
-	clarfg_(&i__2, &ei, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[i__])
-		;
-	i__2 = *k + i__ + i__ * a_dim1;
-	a[i__2].r = 1.f, a[i__2].i = 0.f;
-
-/*        Compute  Y(1:n,i) */
-
-	i__2 = *n - *k - i__ + 1;
-	cgemv_("No transpose", n, &i__2, &c_b56, &a[(i__ + 1) * a_dim1 + 1],
-		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b55, &y[i__ *
-		y_dim1 + 1], &c__1);
-	i__2 = *n - *k - i__ + 1;
-	i__3 = i__ - 1;
-	cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[*k + i__ +
-		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b55, &t[
-		i__ * t_dim1 + 1], &c__1);
-	i__2 = i__ - 1;
-	q__1.r = -1.f, q__1.i = -0.f;
-	cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &t[i__ *
-		t_dim1 + 1], &c__1, &c_b56, &y[i__ * y_dim1 + 1], &c__1);
-	cscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
-
-/*        Compute T(1:i,i) */
-
-	i__2 = i__ - 1;
-	i__3 = i__;
-	q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
-	cscal_(&i__2, &q__1, &t[i__ * t_dim1 + 1], &c__1);
-	i__2 = i__ - 1;
-	ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
-		&t[i__ * t_dim1 + 1], &c__1)
-		;
-	i__2 = i__ + i__ * t_dim1;
-	i__3 = i__;
-	t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
-
-/* L10: */
-    }
-    i__1 = *k + *nb + *nb * a_dim1;
-    a[i__1].r = ei.r, a[i__1].i = ei.i;
-
-    return 0;
-
-/*     End of CLAHRD */
-
-} /* clahrd_ */
-
-/* Subroutine */ int clals0_(integer *icompq, integer *nl, integer *nr,
-	integer *sqre, integer *nrhs, complex *b, integer *ldb, complex *bx,
-	integer *ldbx, integer *perm, integer *givptr, integer *givcol,
-	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
-	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
-	rwork, integer *info)
-{
-    /* System generated locals */
-    integer givcol_dim1, givcol_offset, difr_dim1, difr_offset, givnum_dim1,
-	    givnum_offset, poles_dim1, poles_offset, b_dim1, b_offset,
-	    bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-
-    /* Local variables */
-    static integer i__, j, m, n;
-    static real dj;
-    static integer nlp1, jcol;
-    static real temp;
-    static integer jrow;
-    extern doublereal snrm2_(integer *, real *, integer *);
-    static real diflj, difrj, dsigj;
-    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
-	    complex *, integer *), sgemv_(char *, integer *, integer *, real *
-	    , real *, integer *, real *, integer *, real *, real *, integer *), csrot_(integer *, complex *, integer *, complex *,
-	    integer *, real *, real *);
-    extern doublereal slamc3_(real *, real *);
-    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, complex *, integer *, integer *), csscal_(integer *, real *, complex *, integer *),
-	    clacpy_(char *, integer *, integer *, complex *, integer *,
-	    complex *, integer *), xerbla_(char *, integer *);
-    static real dsigjp;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       December 1, 1999
-
-
-    Purpose
-    =======
-
-    CLALS0 applies back the multiplying factors of either the left or the
-    right singular vector matrix of a diagonal matrix appended by a row
-    to the right hand side matrix B in solving the least squares problem
-    using the divide-and-conquer SVD approach.
-
-    For the left singular vector matrix, three types of orthogonal
-    matrices are involved:
-
-    (1L) Givens rotations: the number of such rotations is GIVPTR; the
-         pairs of columns/rows they were applied to are stored in GIVCOL;
-         and the C- and S-values of these rotations are stored in GIVNUM.
-
-    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
-         row, and for J=2:N, PERM(J)-th row of B is to be moved to the
-         J-th row.
-
-    (3L) The left singular vector matrix of the remaining matrix.
-
-    For the right singular vector matrix, four types of orthogonal
-    matrices are involved:
-
-    (1R) The right singular vector matrix of the remaining matrix.
-
-    (2R) If SQRE = 1, one extra Givens rotation to generate the right
-         null space.
-
-    (3R) The inverse transformation of (2L).
-
-    (4R) The inverse transformation of (1L).
-
-    Arguments
-    =========
-
-    ICOMPQ (input) INTEGER
-           Specifies whether singular vectors are to be computed in
-           factored form:
-           = 0: Left singular vector matrix.
-           = 1: Right singular vector matrix.
-
-    NL     (input) INTEGER
-           The row dimension of the upper block. NL >= 1.
-
-    NR     (input) INTEGER
-           The row dimension of the lower block. NR >= 1.
-
-    SQRE   (input) INTEGER
-           = 0: the lower block is an NR-by-NR square matrix.
-           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
-
-           The bidiagonal matrix has row dimension N = NL + NR + 1,
-           and column dimension M = N + SQRE.
-
-    NRHS   (input) INTEGER
-           The number of columns of B and BX. NRHS must be at least 1.
-
-    B      (input/output) COMPLEX array, dimension ( LDB, NRHS )
-           On input, B contains the right hand sides of the least
-           squares problem in rows 1 through M. On output, B contains
-           the solution X in rows 1 through N.
-
-    LDB    (input) INTEGER
-           The leading dimension of B. LDB must be at least
-           max(1,MAX( M, N ) ).
-
-    BX     (workspace) COMPLEX array, dimension ( LDBX, NRHS )
-
-    LDBX   (input) INTEGER
-           The leading dimension of BX.
-
-    PERM   (input) INTEGER array, dimension ( N )
-           The permutations (from deflation and sorting) applied
-           to the two blocks.
-
-    GIVPTR (input) INTEGER
-           The number of Givens rotations which took place in this
-           subproblem.
-
-    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
-           Each pair of numbers indicates a pair of rows/columns
-           involved in a Givens rotation.
-
-    LDGCOL (input) INTEGER
-           The leading dimension of GIVCOL, must be at least N.
-
-    GIVNUM (input) REAL array, dimension ( LDGNUM, 2 )
-           Each number indicates the C or S value used in the
-           corresponding Givens rotation.
-
-    LDGNUM (input) INTEGER
-           The leading dimension of arrays DIFR, POLES and
-           GIVNUM, must be at least K.
-
-    POLES  (input) REAL array, dimension ( LDGNUM, 2 )
-           On entry, POLES(1:K, 1) contains the new singular
-           values obtained from solving the secular equation, and
-           POLES(1:K, 2) is an array containing the poles in the secular
-           equation.
-
-    DIFL   (input) REAL array, dimension ( K ).
-           On entry, DIFL(I) is the distance between I-th updated
-           (undeflated) singular value and the I-th (undeflated) old
-           singular value.
-
-    DIFR   (input) REAL array, dimension ( LDGNUM, 2 ).
-           On entry, DIFR(I, 1) contains the distances between I-th
-           updated (undeflated) singular value and the I+1-th
-           (undeflated) old singular value. And DIFR(I, 2) is the
-           normalizing factor for the I-th right singular vector.
-
-    Z      (input) REAL array, dimension ( K )
-           Contain the components of the deflation-adjusted updating row
-           vector.
-
-    K      (input) INTEGER
-           Contains the dimension of the non-deflated matrix,
-           This is the order of the related secular equation. 1 <= K <=N.
-
-    C      (input) REAL
-           C contains garbage if SQRE =0 and the C-value of a Givens
-           rotation related to the right null space if SQRE = 1.
-
-    S      (input) REAL
-           S contains garbage if SQRE =0 and the S-value of a Givens
-           rotation related to the right null space if SQRE = 1.
-
-    RWORK  (workspace) REAL array, dimension
-           ( K*(1+NRHS) + 2*NRHS )
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    bx_dim1 = *ldbx;
-    bx_offset = 1 + bx_dim1;
-    bx -= bx_offset;
-    --perm;
-    givcol_dim1 = *ldgcol;
-    givcol_offset = 1 + givcol_dim1;
-    givcol -= givcol_offset;
-    difr_dim1 = *ldgnum;
-    difr_offset = 1 + difr_dim1;
-    difr -= difr_offset;
-    poles_dim1 = *ldgnum;
-    poles_offset = 1 + poles_dim1;
-    poles -= poles_offset;
-    givnum_dim1 = *ldgnum;
-    givnum_offset = 1 + givnum_dim1;
-    givnum -= givnum_offset;
-    --difl;
-    --z__;
-    --rwork;
-
-    /* Function Body */
-    *info = 0;
-
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*nl < 1) {
-	*info = -2;
-    } else if (*nr < 1) {
-	*info = -3;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
-	*info = -4;
-    }
-
-    n = *nl + *nr + 1;
-
-    if (*nrhs < 1) {
-	*info = -5;
-    } else if (*ldb < n) {
-	*info = -7;
-    } else if (*ldbx < n) {
-	*info = -9;
-    } else if (*givptr < 0) {
-	*info = -11;
-    } else if (*ldgcol < n) {
-	*info = -13;
-    } else if (*ldgnum < n) {
-	*info = -15;
-    } else if (*k < 1) {
-	*info = -20;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CLALS0", &i__1);
-	return 0;
-    }
-
-    m = n + *sqre;
-    nlp1 = *nl + 1;
-
-    if (*icompq == 0) {
-
-/*
-          Apply back orthogonal transformations from the left.
-
-          Step (1L): apply back the Givens rotations performed.
-*/
-
-	i__1 = *givptr;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    csrot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
-		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
-		    i__ + ((givnum_dim1) << (1))], &givnum[i__ + givnum_dim1])
-		    ;
-/* L10: */
-	}
-
-/*        Step (2L): permute rows of B. */
-
-	ccopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
-	i__1 = n;
-	for (i__ = 2; i__ <= i__1; ++i__) {
-	    ccopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1],
-		    ldbx);
-/* L20: */
-	}
-
-/*
-          Step (3L): apply the inverse of the left singular vector
-          matrix to BX.
-*/
-
-	if (*k == 1) {
-	    ccopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
-	    if (z__[1] < 0.f) {
-		csscal_(nrhs, &c_b1290, &b[b_offset], ldb);
-	    }
-	} else {
-	    i__1 = *k;
-	    for (j = 1; j <= i__1; ++j) {
-		diflj = difl[j];
-		dj = poles[j + poles_dim1];
-		dsigj = -poles[j + ((poles_dim1) << (1))];
-		if (j < *k) {
-		    difrj = -difr[j + difr_dim1];
-		    dsigjp = -poles[j + 1 + ((poles_dim1) << (1))];
-		}
-		if ((z__[j] == 0.f) || (poles[j + ((poles_dim1) << (1))] ==
-			0.f)) {
-		    rwork[j] = 0.f;
-		} else {
-		    rwork[j] = -poles[j + ((poles_dim1) << (1))] * z__[j] /
-			    diflj / (poles[j + ((poles_dim1) << (1))] + dj);
-		}
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    if ((z__[i__] == 0.f) || (poles[i__ + ((poles_dim1) << (1)
-			    )] == 0.f)) {
-			rwork[i__] = 0.f;
-		    } else {
-			rwork[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
-				i__] / (slamc3_(&poles[i__ + ((poles_dim1) <<
-				(1))], &dsigj) - diflj) / (poles[i__ + ((
-				poles_dim1) << (1))] + dj);
-		    }
-/* L30: */
-		}
-		i__2 = *k;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    if ((z__[i__] == 0.f) || (poles[i__ + ((poles_dim1) << (1)
-			    )] == 0.f)) {
-			rwork[i__] = 0.f;
-		    } else {
-			rwork[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
-				i__] / (slamc3_(&poles[i__ + ((poles_dim1) <<
-				(1))], &dsigjp) + difrj) / (poles[i__ + ((
-				poles_dim1) << (1))] + dj);
-		    }
-/* L40: */
-		}
-		rwork[1] = -1.f;
-		temp = snrm2_(k, &rwork[1], &c__1);
-
-/*
-                Since B and BX are complex, the following call to SGEMV
-                is performed in two steps (real and imaginary parts).
-
-                CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO,
-      $                     B( J, 1 ), LDB )
-*/
-
-		i__ = *k + ((*nrhs) << (1));
-		i__2 = *nrhs;
-		for (jcol = 1; jcol <= i__2; ++jcol) {
-		    i__3 = *k;
-		    for (jrow = 1; jrow <= i__3; ++jrow) {
-			++i__;
-			i__4 = jrow + jcol * bx_dim1;
-			rwork[i__] = bx[i__4].r;
-/* L50: */
-		    }
-/* L60: */
-		}
-		sgemv_("T", k, nrhs, &c_b1011, &rwork[*k + 1 + ((*nrhs) << (1)
-			)], k, &rwork[1], &c__1, &c_b320, &rwork[*k + 1], &
-			c__1);
-		i__ = *k + ((*nrhs) << (1));
-		i__2 = *nrhs;
-		for (jcol = 1; jcol <= i__2; ++jcol) {
-		    i__3 = *k;
-		    for (jrow = 1; jrow <= i__3; ++jrow) {
-			++i__;
-			rwork[i__] = r_imag(&bx[jrow + jcol * bx_dim1]);
-/* L70: */
-		    }
-/* L80: */
-		}
-		sgemv_("T", k, nrhs, &c_b1011, &rwork[*k + 1 + ((*nrhs) << (1)
-			)], k, &rwork[1], &c__1, &c_b320, &rwork[*k + 1 + *
-			nrhs], &c__1);
-		i__2 = *nrhs;
-		for (jcol = 1; jcol <= i__2; ++jcol) {
-		    i__3 = j + jcol * b_dim1;
-		    i__4 = jcol + *k;
-		    i__5 = jcol + *k + *nrhs;
-		    q__1.r = rwork[i__4], q__1.i = rwork[i__5];
-		    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L90: */
-		}
-		clascl_("G", &c__0, &c__0, &temp, &c_b1011, &c__1, nrhs, &b[j
-			+ b_dim1], ldb, info);
-/* L100: */
-	    }
-	}
-
-/*        Move the deflated rows of BX to B also. */
-
-	if (*k < max(m,n)) {
-	    i__1 = n - *k;
-	    clacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1
-		    + b_dim1], ldb);
-	}
-    } else {
-
-/*
-          Apply back the right orthogonal transformations.
-
-          Step (1R): apply back the new right singular vector matrix
-          to B.
-*/
-
-	if (*k == 1) {
-	    ccopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
-	} else {
-	    i__1 = *k;
-	    for (j = 1; j <= i__1; ++j) {
-		dsigj = poles[j + ((poles_dim1) << (1))];
-		if (z__[j] == 0.f) {
-		    rwork[j] = 0.f;
-		} else {
-		    rwork[j] = -z__[j] / difl[j] / (dsigj + poles[j +
-			    poles_dim1]) / difr[j + ((difr_dim1) << (1))];
-		}
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    if (z__[j] == 0.f) {
-			rwork[i__] = 0.f;
-		    } else {
-			r__1 = -poles[i__ + 1 + ((poles_dim1) << (1))];
-			rwork[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr[
-				i__ + difr_dim1]) / (dsigj + poles[i__ +
-				poles_dim1]) / difr[i__ + ((difr_dim1) << (1))
-				];
-		    }
-/* L110: */
-		}
-		i__2 = *k;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    if (z__[j] == 0.f) {
-			rwork[i__] = 0.f;
-		    } else {
-			r__1 = -poles[i__ + ((poles_dim1) << (1))];
-			rwork[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[
-				i__]) / (dsigj + poles[i__ + poles_dim1]) /
-				difr[i__ + ((difr_dim1) << (1))];
-		    }
-/* L120: */
-		}
-
-/*
-                Since B and BX are complex, the following call to SGEMV
-                is performed in two steps (real and imaginary parts).
-
-                CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO,
-      $                     BX( J, 1 ), LDBX )
-*/
-
-		i__ = *k + ((*nrhs) << (1));
-		i__2 = *nrhs;
-		for (jcol = 1; jcol <= i__2; ++jcol) {
-		    i__3 = *k;
-		    for (jrow = 1; jrow <= i__3; ++jrow) {
-			++i__;
-			i__4 = jrow + jcol * b_dim1;
-			rwork[i__] = b[i__4].r;
-/* L130: */
-		    }
-/* L140: */
-		}
-		sgemv_("T", k, nrhs, &c_b1011, &rwork[*k + 1 + ((*nrhs) << (1)
-			)], k, &rwork[1], &c__1, &c_b320, &rwork[*k + 1], &
-			c__1);
-		i__ = *k + ((*nrhs) << (1));
-		i__2 = *nrhs;
-		for (jcol = 1; jcol <= i__2; ++jcol) {
-		    i__3 = *k;
-		    for (jrow = 1; jrow <= i__3; ++jrow) {
-			++i__;
-			rwork[i__] = r_imag(&b[jrow + jcol * b_dim1]);
-/* L150: */
-		    }
-/* L160: */
-		}
-		sgemv_("T", k, nrhs, &c_b1011, &rwork[*k + 1 + ((*nrhs) << (1)
-			)], k, &rwork[1], &c__1, &c_b320, &rwork[*k + 1 + *
-			nrhs], &c__1);
-		i__2 = *nrhs;
-		for (jcol = 1; jcol <= i__2; ++jcol) {
-		    i__3 = j + jcol * bx_dim1;
-		    i__4 = jcol + *k;
-		    i__5 = jcol + *k + *nrhs;
-		    q__1.r = rwork[i__4], q__1.i = rwork[i__5];
-		    bx[i__3].r = q__1.r, bx[i__3].i = q__1.i;
-/* L170: */
-		}
-/* L180: */
-	    }
-	}
-
-/*
-          Step (2R): if SQRE = 1, apply back the rotation that is
-          related to the right null space of the subproblem.
-*/
-
-	if (*sqre == 1) {
-	    ccopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
-	    csrot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__,
-		    s);
-	}
-	if (*k < max(m,n)) {
-	    i__1 = n - *k;
-	    clacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 +
-		    bx_dim1], ldbx);
-	}
-
-/*        Step (3R): permute rows of B. */
-
-	ccopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
-	if (*sqre == 1) {
-	    ccopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
-	}
-	i__1 = n;
-	for (i__ = 2; i__ <= i__1; ++i__) {
-	    ccopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1],
-		    ldb);
-/* L190: */
-	}
-
-/*        Step (4R): apply back the Givens rotations performed. */
-
-	for (i__ = *givptr; i__ >= 1; --i__) {
-	    r__1 = -givnum[i__ + givnum_dim1];
-	    csrot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
-		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
-		    i__ + ((givnum_dim1) << (1))], &r__1);
-/* L200: */
-	}
-    }
-
-    return 0;
-
-/*     End of CLALS0 */
-
-} /* clals0_ */
-
-/* Subroutine */ int clalsa_(integer *icompq, integer *smlsiz, integer *n,
-	integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx,
-	real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr,
-	real *z__, real *poles, integer *givptr, integer *givcol, integer *
-	ldgcol, integer *perm, real *givnum, real *c__, real *s, real *rwork,
-	integer *iwork, integer *info)
-{
-    /* System generated locals */
-    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1,
-	    difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset,
-	    poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset,
-	    z_dim1, z_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1,
-	    i__2, i__3, i__4, i__5, i__6;
-    complex q__1;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-    integer pow_ii(integer *, integer *);
-
-    /* Local variables */
-    static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl,
-	    ndb1, nlp1, lvl2, nrp1, jcol, nlvl, sqre, jrow, jimag, jreal,
-	    inode, ndiml;
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
-    static integer ndimr;
-    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
-	    complex *, integer *), clals0_(integer *, integer *, integer *,
-	    integer *, integer *, complex *, integer *, complex *, integer *,
-	    integer *, integer *, integer *, integer *, real *, integer *,
-	    real *, real *, real *, real *, integer *, real *, real *, real *,
-	     integer *), xerbla_(char *, integer *), slasdt_(integer *
-	    , integer *, integer *, integer *, integer *, integer *, integer *
-	    );
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CLALSA is an itermediate step in solving the least squares problem
-    by computing the SVD of the coefficient matrix in compact form (The
-    singular vectors are computed as products of simple orthorgonal
-    matrices.).
-
-    If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector
-    matrix of an upper bidiagonal matrix to the right hand side; and if
-    ICOMPQ = 1, CLALSA applies the right singular vector matrix to the
-    right hand side. The singular vector matrices were generated in
-    compact form by CLALSA.
-
-    Arguments
-    =========
-
-    ICOMPQ (input) INTEGER
-           Specifies whether the left or the right singular vector
-           matrix is involved.
-           = 0: Left singular vector matrix
-           = 1: Right singular vector matrix
-
-    SMLSIZ (input) INTEGER
-           The maximum size of the subproblems at the bottom of the
-           computation tree.
-
-    N      (input) INTEGER
-           The row and column dimensions of the upper bidiagonal matrix.
-
-    NRHS   (input) INTEGER
-           The number of columns of B and BX. NRHS must be at least 1.
-
-    B      (input) COMPLEX array, dimension ( LDB, NRHS )
-           On input, B contains the right hand sides of the least
-           squares problem in rows 1 through M. On output, B contains
-           the solution X in rows 1 through N.
-
-    LDB    (input) INTEGER
-           The leading dimension of B in the calling subprogram.
-           LDB must be at least max(1,MAX( M, N ) ).
-
-    BX     (output) COMPLEX array, dimension ( LDBX, NRHS )
-           On exit, the result of applying the left or right singular
-           vector matrix to B.
-
-    LDBX   (input) INTEGER
-           The leading dimension of BX.
-
-    U      (input) REAL array, dimension ( LDU, SMLSIZ ).
-           On entry, U contains the left singular vector matrices of all
-           subproblems at the bottom level.
-
-    LDU    (input) INTEGER, LDU = > N.
-           The leading dimension of arrays U, VT, DIFL, DIFR,
-           POLES, GIVNUM, and Z.
-
-    VT     (input) REAL array, dimension ( LDU, SMLSIZ+1 ).
-           On entry, VT' contains the right singular vector matrices of
-           all subproblems at the bottom level.
-
-    K      (input) INTEGER array, dimension ( N ).
-
-    DIFL   (input) REAL array, dimension ( LDU, NLVL ).
-           where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
-
-    DIFR   (input) REAL array, dimension ( LDU, 2 * NLVL ).
-           On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
-           distances between singular values on the I-th level and
-           singular values on the (I -1)-th level, and DIFR(*, 2 * I)
-           record the normalizing factors of the right singular vectors
-           matrices of subproblems on I-th level.
-
-    Z      (input) REAL array, dimension ( LDU, NLVL ).
-           On entry, Z(1, I) contains the components of the deflation-
-           adjusted updating row vector for subproblems on the I-th
-           level.
-
-    POLES  (input) REAL array, dimension ( LDU, 2 * NLVL ).
-           On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
-           singular values involved in the secular equations on the I-th
-           level.
-
-    GIVPTR (input) INTEGER array, dimension ( N ).
-           On entry, GIVPTR( I ) records the number of Givens
-           rotations performed on the I-th problem on the computation
-           tree.
-
-    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
-           On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
-           locations of Givens rotations performed on the I-th level on
-           the computation tree.
-
-    LDGCOL (input) INTEGER, LDGCOL = > N.
-           The leading dimension of arrays GIVCOL and PERM.
-
-    PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
-           On entry, PERM(*, I) records permutations done on the I-th
-           level of the computation tree.
-
-    GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ).
-           On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
-           values of Givens rotations performed on the I-th level on the
-           computation tree.
-
-    C      (input) REAL array, dimension ( N ).
-           On entry, if the I-th subproblem is not square,
-           C( I ) contains the C-value of a Givens rotation related to
-           the right null space of the I-th subproblem.
-
-    S      (input) REAL array, dimension ( N ).
-           On entry, if the I-th subproblem is not square,
-           S( I ) contains the S-value of a Givens rotation related to
-           the right null space of the I-th subproblem.
-
-    RWORK  (workspace) REAL array, dimension at least
-           max ( N, (SMLSZ+1)*NRHS*3 ).
-
-    IWORK  (workspace) INTEGER array.
-           The dimension must be at least 3 * N
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    bx_dim1 = *ldbx;
-    bx_offset = 1 + bx_dim1;
-    bx -= bx_offset;
-    givnum_dim1 = *ldu;
-    givnum_offset = 1 + givnum_dim1;
-    givnum -= givnum_offset;
-    poles_dim1 = *ldu;
-    poles_offset = 1 + poles_dim1;
-    poles -= poles_offset;
-    z_dim1 = *ldu;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    difr_dim1 = *ldu;
-    difr_offset = 1 + difr_dim1;
-    difr -= difr_offset;
-    difl_dim1 = *ldu;
-    difl_offset = 1 + difl_dim1;
-    difl -= difl_offset;
-    vt_dim1 = *ldu;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    --k;
-    --givptr;
-    perm_dim1 = *ldgcol;
-    perm_offset = 1 + perm_dim1;
-    perm -= perm_offset;
-    givcol_dim1 = *ldgcol;
-    givcol_offset = 1 + givcol_dim1;
-    givcol -= givcol_offset;
-    --c__;
-    --s;
-    --rwork;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*smlsiz < 3) {
-	*info = -2;
-    } else if (*n < *smlsiz) {
-	*info = -3;
-    } else if (*nrhs < 1) {
-	*info = -4;
-    } else if (*ldb < *n) {
-	*info = -6;
-    } else if (*ldbx < *n) {
-	*info = -8;
-    } else if (*ldu < *n) {
-	*info = -10;
-    } else if (*ldgcol < *n) {
-	*info = -19;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CLALSA", &i__1);
-	return 0;
-    }
-
-/*     Book-keeping and  setting up the computation tree. */
-
-    inode = 1;
-    ndiml = inode + *n;
-    ndimr = ndiml + *n;
-
-    slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
-	    smlsiz);
-
-/*
-       The following code applies back the left singular vector factors.
-       For applying back the right singular vector factors, go to 170.
-*/
-
-    if (*icompq == 1) {
-	goto L170;
-    }
-
-/*
-       The nodes on the bottom level of the tree were solved
-       by SLASDQ. The corresponding left and right singular vector
-       matrices are in explicit form. First apply back the left
-       singular vector matrices.
-*/
-
-    ndb1 = (nd + 1) / 2;
-    i__1 = nd;
-    for (i__ = ndb1; i__ <= i__1; ++i__) {
-
-/*
-          IC : center row of each node
-          NL : number of rows of left  subproblem
-          NR : number of rows of right subproblem
-          NLF: starting row of the left   subproblem
-          NRF: starting row of the right  subproblem
-*/
-
-	i1 = i__ - 1;
-	ic = iwork[inode + i1];
-	nl = iwork[ndiml + i1];
-	nr = iwork[ndimr + i1];
-	nlf = ic - nl;
-	nrf = ic + 1;
-
-/*
-          Since B and BX are complex, the following call to SGEMM
-          is performed in two steps (real and imaginary parts).
-
-          CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
-       $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
-*/
-
-	j = (nl * *nrhs) << (1);
-	i__2 = *nrhs;
-	for (jcol = 1; jcol <= i__2; ++jcol) {
-	    i__3 = nlf + nl - 1;
-	    for (jrow = nlf; jrow <= i__3; ++jrow) {
-		++j;
-		i__4 = jrow + jcol * b_dim1;
-		rwork[j] = b[i__4].r;
-/* L10: */
-	    }
-/* L20: */
-	}
-	sgemm_("T", "N", &nl, nrhs, &nl, &c_b1011, &u[nlf + u_dim1], ldu, &
-		rwork[((nl * *nrhs) << (1)) + 1], &nl, &c_b320, &rwork[1], &
-		nl);
-	j = (nl * *nrhs) << (1);
-	i__2 = *nrhs;
-	for (jcol = 1; jcol <= i__2; ++jcol) {
-	    i__3 = nlf + nl - 1;
-	    for (jrow = nlf; jrow <= i__3; ++jrow) {
-		++j;
-		rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
-/* L30: */
-	    }
-/* L40: */
-	}
-	sgemm_("T", "N", &nl, nrhs, &nl, &c_b1011, &u[nlf + u_dim1], ldu, &
-		rwork[((nl * *nrhs) << (1)) + 1], &nl, &c_b320, &rwork[nl * *
-		nrhs + 1], &nl);
-	jreal = 0;
-	jimag = nl * *nrhs;
-	i__2 = *nrhs;
-	for (jcol = 1; jcol <= i__2; ++jcol) {
-	    i__3 = nlf + nl - 1;
-	    for (jrow = nlf; jrow <= i__3; ++jrow) {
-		++jreal;
-		++jimag;
-		i__4 = jrow + jcol * bx_dim1;
-		i__5 = jreal;
-		i__6 = jimag;
-		q__1.r = rwork[i__5], q__1.i = rwork[i__6];
-		bx[i__4].r = q__1.r, bx[i__4].i = q__1.i;
-/* L50: */
-	    }
-/* L60: */
-	}
-
-/*
-          Since B and BX are complex, the following call to SGEMM
-          is performed in two steps (real and imaginary parts).
-
-          CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
-      $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
-*/
-
-	j = (nr * *nrhs) << (1);
-	i__2 = *nrhs;
-	for (jcol = 1; jcol <= i__2; ++jcol) {
-	    i__3 = nrf + nr - 1;
-	    for (jrow = nrf; jrow <= i__3; ++jrow) {
-		++j;
-		i__4 = jrow + jcol * b_dim1;
-		rwork[j] = b[i__4].r;
-/* L70: */
-	    }
-/* L80: */
-	}
-	sgemm_("T", "N", &nr, nrhs, &nr, &c_b1011, &u[nrf + u_dim1], ldu, &
-		rwork[((nr * *nrhs) << (1)) + 1], &nr, &c_b320, &rwork[1], &
-		nr);
-	j = (nr * *nrhs) << (1);
-	i__2 = *nrhs;
-	for (jcol = 1; jcol <= i__2; ++jcol) {
-	    i__3 = nrf + nr - 1;
-	    for (jrow = nrf; jrow <= i__3; ++jrow) {
-		++j;
-		rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
-/* L90: */
-	    }
-/* L100: */
-	}
-	sgemm_("T", "N", &nr, nrhs, &nr, &c_b1011, &u[nrf + u_dim1], ldu, &
-		rwork[((nr * *nrhs) << (1)) + 1], &nr, &c_b320, &rwork[nr * *
-		nrhs + 1], &nr);
-	jreal = 0;
-	jimag = nr * *nrhs;
-	i__2 = *nrhs;
-	for (jcol = 1; jcol <= i__2; ++jcol) {
-	    i__3 = nrf + nr - 1;
-	    for (jrow = nrf; jrow <= i__3; ++jrow) {
-		++jreal;
-		++jimag;
-		i__4 = jrow + jcol * bx_dim1;
-		i__5 = jreal;
-		i__6 = jimag;
-		q__1.r = rwork[i__5], q__1.i = rwork[i__6];
-		bx[i__4].r = q__1.r, bx[i__4].i = q__1.i;
-/* L110: */
-	    }
-/* L120: */
-	}
-
-/* L130: */
-    }
-
-/*
-       Next copy the rows of B that correspond to unchanged rows
-       in the bidiagonal matrix to BX.
-*/
-
-    i__1 = nd;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	ic = iwork[inode + i__ - 1];
-	ccopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx);
-/* L140: */
-    }
-
-/*
-       Finally go through the left singular vector matrices of all
-       the other subproblems bottom-up on the tree.
-*/
-
-    j = pow_ii(&c__2, &nlvl);
-    sqre = 0;
-
-    for (lvl = nlvl; lvl >= 1; --lvl) {
-	lvl2 = ((lvl) << (1)) - 1;
-
-/*
-          find the first node LF and last node LL on
-          the current level LVL
-*/
-
-	if (lvl == 1) {
-	    lf = 1;
-	    ll = 1;
-	} else {
-	    i__1 = lvl - 1;
-	    lf = pow_ii(&c__2, &i__1);
-	    ll = ((lf) << (1)) - 1;
-	}
-	i__1 = ll;
-	for (i__ = lf; i__ <= i__1; ++i__) {
-	    im1 = i__ - 1;
-	    ic = iwork[inode + im1];
-	    nl = iwork[ndiml + im1];
-	    nr = iwork[ndimr + im1];
-	    nlf = ic - nl;
-	    nrf = ic + 1;
-	    --j;
-	    clals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, &
-		    b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], &
-		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
-		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
-		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
-		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
-		    j], &s[j], &rwork[1], info);
-/* L150: */
-	}
-/* L160: */
-    }
-    goto L330;
-
-/*     ICOMPQ = 1: applying back the right singular vector factors. */
-
-L170:
-
-/*
-       First now go through the right singular vector matrices of all
-       the tree nodes top-down.
-*/
-
-    j = 0;
-    i__1 = nlvl;
-    for (lvl = 1; lvl <= i__1; ++lvl) {
-	lvl2 = ((lvl) << (1)) - 1;
-
-/*
-          Find the first node LF and last node LL on
-          the current level LVL.
-*/
-
-	if (lvl == 1) {
-	    lf = 1;
-	    ll = 1;
-	} else {
-	    i__2 = lvl - 1;
-	    lf = pow_ii(&c__2, &i__2);
-	    ll = ((lf) << (1)) - 1;
-	}
-	i__2 = lf;
-	for (i__ = ll; i__ >= i__2; --i__) {
-	    im1 = i__ - 1;
-	    ic = iwork[inode + im1];
-	    nl = iwork[ndiml + im1];
-	    nr = iwork[ndimr + im1];
-	    nlf = ic - nl;
-	    nrf = ic + 1;
-	    if (i__ == ll) {
-		sqre = 0;
-	    } else {
-		sqre = 1;
-	    }
-	    ++j;
-	    clals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[
-		    nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], &
-		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
-		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
-		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
-		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
-		    j], &s[j], &rwork[1], info);
-/* L180: */
-	}
-/* L190: */
-    }
-
-/*
-       The nodes on the bottom level of the tree were solved
-       by SLASDQ. The corresponding right singular vector
-       matrices are in explicit form. Apply them back.
-*/
-
-    ndb1 = (nd + 1) / 2;
-    i__1 = nd;
-    for (i__ = ndb1; i__ <= i__1; ++i__) {
-	i1 = i__ - 1;
-	ic = iwork[inode + i1];
-	nl = iwork[ndiml + i1];
-	nr = iwork[ndimr + i1];
-	nlp1 = nl + 1;
-	if (i__ == nd) {
-	    nrp1 = nr;
-	} else {
-	    nrp1 = nr + 1;
-	}
-	nlf = ic - nl;
-	nrf = ic + 1;
-
-/*
-          Since B and BX are complex, the following call to SGEMM is
-          performed in two steps (real and imaginary parts).
-
-          CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
-      $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
-*/
-
-	j = (nlp1 * *nrhs) << (1);
-	i__2 = *nrhs;
-	for (jcol = 1; jcol <= i__2; ++jcol) {
-	    i__3 = nlf + nlp1 - 1;
-	    for (jrow = nlf; jrow <= i__3; ++jrow) {
-		++j;
-		i__4 = jrow + jcol * b_dim1;
-		rwork[j] = b[i__4].r;
-/* L200: */
-	    }
-/* L210: */
-	}
-	sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1011, &vt[nlf + vt_dim1],
-		ldu, &rwork[((nlp1 * *nrhs) << (1)) + 1], &nlp1, &c_b320, &
-		rwork[1], &nlp1);
-	j = (nlp1 * *nrhs) << (1);
-	i__2 = *nrhs;
-	for (jcol = 1; jcol <= i__2; ++jcol) {
-	    i__3 = nlf + nlp1 - 1;
-	    for (jrow = nlf; jrow <= i__3; ++jrow) {
-		++j;
-		rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
-/* L220: */
-	    }
-/* L230: */
-	}
-	sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1011, &vt[nlf + vt_dim1],
-		ldu, &rwork[((nlp1 * *nrhs) << (1)) + 1], &nlp1, &c_b320, &
-		rwork[nlp1 * *nrhs + 1], &nlp1);
-	jreal = 0;
-	jimag = nlp1 * *nrhs;
-	i__2 = *nrhs;
-	for (jcol = 1; jcol <= i__2; ++jcol) {
-	    i__3 = nlf + nlp1 - 1;
-	    for (jrow = nlf; jrow <= i__3; ++jrow) {
-		++jreal;
-		++jimag;
-		i__4 = jrow + jcol * bx_dim1;
-		i__5 = jreal;
-		i__6 = jimag;
-		q__1.r = rwork[i__5], q__1.i = rwork[i__6];
-		bx[i__4].r = q__1.r, bx[i__4].i = q__1.i;
-/* L240: */
-	    }
-/* L250: */
-	}
-
-/*
-          Since B and BX are complex, the following call to SGEMM is
-          performed in two steps (real and imaginary parts).
-
-          CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
-      $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
-*/
-
-	j = (nrp1 * *nrhs) << (1);
-	i__2 = *nrhs;
-	for (jcol = 1; jcol <= i__2; ++jcol) {
-	    i__3 = nrf + nrp1 - 1;
-	    for (jrow = nrf; jrow <= i__3; ++jrow) {
-		++j;
-		i__4 = jrow + jcol * b_dim1;
-		rwork[j] = b[i__4].r;
-/* L260: */
-	    }
-/* L270: */
-	}
-	sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1011, &vt[nrf + vt_dim1],
-		ldu, &rwork[((nrp1 * *nrhs) << (1)) + 1], &nrp1, &c_b320, &
-		rwork[1], &nrp1);
-	j = (nrp1 * *nrhs) << (1);
-	i__2 = *nrhs;
-	for (jcol = 1; jcol <= i__2; ++jcol) {
-	    i__3 = nrf + nrp1 - 1;
-	    for (jrow = nrf; jrow <= i__3; ++jrow) {
-		++j;
-		rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
-/* L280: */
-	    }
-/* L290: */
-	}
-	sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1011, &vt[nrf + vt_dim1],
-		ldu, &rwork[((nrp1 * *nrhs) << (1)) + 1], &nrp1, &c_b320, &
-		rwork[nrp1 * *nrhs + 1], &nrp1);
-	jreal = 0;
-	jimag = nrp1 * *nrhs;
-	i__2 = *nrhs;
-	for (jcol = 1; jcol <= i__2; ++jcol) {
-	    i__3 = nrf + nrp1 - 1;
-	    for (jrow = nrf; jrow <= i__3; ++jrow) {
-		++jreal;
-		++jimag;
-		i__4 = jrow + jcol * bx_dim1;
-		i__5 = jreal;
-		i__6 = jimag;
-		q__1.r = rwork[i__5], q__1.i = rwork[i__6];
-		bx[i__4].r = q__1.r, bx[i__4].i = q__1.i;
-/* L300: */
-	    }
-/* L310: */
-	}
-
-/* L320: */
-    }
-
-L330:
-
-    return 0;
-
-/*     End of CLALSA */
-
-} /* clalsa_ */
-
-/* Subroutine */ int clalsd_(char *uplo, integer *smlsiz, integer *n, integer
-	*nrhs, real *d__, real *e, complex *b, integer *ldb, real *rcond,
-	integer *rank, complex *work, real *rwork, integer *iwork, integer *
-	info)
-{
-    /* System generated locals */
-    integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-    real r__1;
-    complex q__1;
-
-    /* Builtin functions */
-    double r_imag(complex *), log(doublereal), r_sign(real *, real *);
-
-    /* Local variables */
-    static integer c__, i__, j, k;
-    static real r__;
-    static integer s, u, z__;
-    static real cs;
-    static integer bx;
-    static real sn;
-    static integer st, vt, nm1, st1;
-    static real eps;
-    static integer iwk;
-    static real tol;
-    static integer difl, difr, jcol, irwb, perm, nsub, nlvl, sqre, bxst, jrow,
-	     irwu, jimag, jreal;
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
-    static integer irwib;
-    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
-	    complex *, integer *);
-    static integer poles, sizei, irwrb, nsize;
-    extern /* Subroutine */ int csrot_(integer *, complex *, integer *,
-	    complex *, integer *, real *, real *);
-    static integer irwvt, icmpq1, icmpq2;
-    extern /* Subroutine */ int clalsa_(integer *, integer *, integer *,
-	    integer *, complex *, integer *, complex *, integer *, real *,
-	    integer *, real *, integer *, real *, real *, real *, real *,
-	    integer *, integer *, integer *, integer *, real *, real *, real *
-	    , real *, integer *, integer *), clascl_(char *, integer *,
-	    integer *, real *, real *, integer *, integer *, complex *,
-	    integer *, integer *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int slasda_(integer *, integer *, integer *,
-	    integer *, real *, real *, real *, integer *, real *, integer *,
-	    real *, real *, real *, real *, integer *, integer *, integer *,
-	    integer *, real *, real *, real *, real *, integer *, integer *),
-	    clacpy_(char *, integer *, integer *, complex *, integer *,
-	    complex *, integer *), claset_(char *, integer *, integer
-	    *, complex *, complex *, complex *, integer *), xerbla_(
-	    char *, integer *), slascl_(char *, integer *, integer *,
-	    real *, real *, integer *, integer *, real *, integer *, integer *
-	    );
-    extern integer isamax_(integer *, real *, integer *);
-    static integer givcol;
-    extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer
-	    *, integer *, integer *, real *, real *, real *, integer *, real *
-	    , integer *, real *, integer *, real *, integer *),
-	    slaset_(char *, integer *, integer *, real *, real *, real *,
-	    integer *), slartg_(real *, real *, real *, real *, real *
-	    );
-    static real orgnrm;
-    static integer givnum;
-    extern doublereal slanst_(char *, integer *, real *, real *);
-    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
-    static integer givptr, nrwork, irwwrk, smlszp;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    CLALSD uses the singular value decomposition of A to solve the least
-    squares problem of finding X to minimize the Euclidean norm of each
-    column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
-    are N-by-NRHS. The solution X overwrites B.
-
-    The singular values of A smaller than RCOND times the largest
-    singular value are treated as zero in solving the least squares
-    problem; in this case a minimum norm solution is returned.
-    The actual singular values are returned in D in ascending order.
-
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
-
-    Arguments
-    =========
-
-    UPLO   (input) CHARACTER*1
-           = 'U': D and E define an upper bidiagonal matrix.
-           = 'L': D and E define a  lower bidiagonal matrix.
-
-    SMLSIZ (input) INTEGER
-           The maximum size of the subproblems at the bottom of the
-           computation tree.
-
-    N      (input) INTEGER
-           The dimension of the  bidiagonal matrix.  N >= 0.
-
-    NRHS   (input) INTEGER
-           The number of columns of B. NRHS must be at least 1.
-
-    D      (input/output) REAL array, dimension (N)
-           On entry D contains the main diagonal of the bidiagonal
-           matrix. On exit, if INFO = 0, D contains its singular values.
-
-    E      (input) REAL array, dimension (N-1)
-           Contains the super-diagonal entries of the bidiagonal matrix.
-           On exit, E has been destroyed.
-
-    B      (input/output) COMPLEX array, dimension (LDB,NRHS)
-           On input, B contains the right hand sides of the least
-           squares problem. On output, B contains the solution X.
-
-    LDB    (input) INTEGER
-           The leading dimension of B in the calling subprogram.
-           LDB must be at least max(1,N).
-
-    RCOND  (input) REAL
-           The singular values of A less than or equal to RCOND times
-           the largest singular value are treated as zero in solving
-           the least squares problem. If RCOND is negative,
-           machine precision is used instead.
-           For example, if diag(S)*X=B were the least squares problem,
-           where diag(S) is a diagonal matrix of singular values, the
-           solution would be X(i) = B(i) / S(i) if S(i) is greater than
-           RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
-           RCOND*max(S).
-
-    RANK   (output) INTEGER
-           The number of singular values of A greater than RCOND times
-           the largest singular value.
-
-    WORK   (workspace) COMPLEX array, dimension at least
-           (N * NRHS).
-
-    RWORK  (workspace) REAL array, dimension at least
-           (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2),
-           where
-           NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
-
-    IWORK  (workspace) INTEGER array, dimension at least
-           (3*N*NLVL + 11*N).
-
-    INFO   (output) INTEGER
-           = 0:  successful exit.
-           < 0:  if INFO = -i, the i-th argument had an illegal value.
-           > 0:  The algorithm failed to compute an singular value while
-                 working on the submatrix lying in rows and columns
-                 INFO/(N+1) through MOD(INFO,N+1).
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    --work;
-    --rwork;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*n < 0) {
-	*info = -3;
-    } else if (*nrhs < 1) {
-	*info = -4;
-    } else if ((*ldb < 1) || (*ldb < *n)) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CLALSD", &i__1);
-	return 0;
-    }
-
-    eps = slamch_("Epsilon");
-
-/*     Set up the tolerance. */
-
-    if ((*rcond <= 0.f) || (*rcond >= 1.f)) {
-	*rcond = eps;
-    }
-
-    *rank = 0;
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-	return 0;
-    } else if (*n == 1) {
-	if (d__[1] == 0.f) {
-	    claset_("A", &c__1, nrhs, &c_b55, &c_b55, &b[b_offset], ldb);
-	} else {
-	    *rank = 1;
-	    clascl_("G", &c__0, &c__0, &d__[1], &c_b1011, &c__1, nrhs, &b[
-		    b_offset], ldb, info);
-	    d__[1] = dabs(d__[1]);
-	}
-	return 0;
-    }
-
-/*     Rotate the matrix if it is lower bidiagonal. */
-
-    if (*(unsigned char *)uplo == 'L') {
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
-	    d__[i__] = r__;
-	    e[i__] = sn * d__[i__ + 1];
-	    d__[i__ + 1] = cs * d__[i__ + 1];
-	    if (*nrhs == 1) {
-		csrot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], &
-			c__1, &cs, &sn);
-	    } else {
-		rwork[((i__) << (1)) - 1] = cs;
-		rwork[i__ * 2] = sn;
-	    }
-/* L10: */
-	}
-	if (*nrhs > 1) {
-	    i__1 = *nrhs;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-		i__2 = *n - 1;
-		for (j = 1; j <= i__2; ++j) {
-		    cs = rwork[((j) << (1)) - 1];
-		    sn = rwork[j * 2];
-		    csrot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__
-			    * b_dim1], &c__1, &cs, &sn);
-/* L20: */
-		}
-/* L30: */
-	    }
-	}
-    }
-
-/*     Scale. */
-
-    nm1 = *n - 1;
-    orgnrm = slanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.f) {
-	claset_("A", n, nrhs, &c_b55, &c_b55, &b[b_offset], ldb);
-	return 0;
-    }
-
-    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, &c__1, &d__[1], n, info);
-    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &nm1, &c__1, &e[1], &nm1,
-	    info);
-
-/*
-       If N is smaller than the minimum divide size SMLSIZ, then solve
-       the problem with another solver.
-*/
-
-    if (*n <= *smlsiz) {
-	irwu = 1;
-	irwvt = irwu + *n * *n;
-	irwwrk = irwvt + *n * *n;
-	irwrb = irwwrk;
-	irwib = irwrb + *n * *nrhs;
-	irwb = irwib + *n * *nrhs;
-	slaset_("A", n, n, &c_b320, &c_b1011, &rwork[irwu], n);
-	slaset_("A", n, n, &c_b320, &c_b1011, &rwork[irwvt], n);
-	slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &rwork[irwvt], n,
-		&rwork[irwu], n, &rwork[irwwrk], &c__1, &rwork[irwwrk], info);
-	if (*info != 0) {
-	    return 0;
-	}
-
-/*
-          In the real version, B is passed to SLASDQ and multiplied
-          internally by Q'. Here B is complex and that product is
-          computed below in two steps (real and imaginary parts).
-*/
-
-	j = irwb - 1;
-	i__1 = *nrhs;
-	for (jcol = 1; jcol <= i__1; ++jcol) {
-	    i__2 = *n;
-	    for (jrow = 1; jrow <= i__2; ++jrow) {
-		++j;
-		i__3 = jrow + jcol * b_dim1;
-		rwork[j] = b[i__3].r;
-/* L40: */
-	    }
-/* L50: */
-	}
-	sgemm_("T", "N", n, nrhs, n, &c_b1011, &rwork[irwu], n, &rwork[irwb],
-		n, &c_b320, &rwork[irwrb], n);
-	j = irwb - 1;
-	i__1 = *nrhs;
-	for (jcol = 1; jcol <= i__1; ++jcol) {
-	    i__2 = *n;
-	    for (jrow = 1; jrow <= i__2; ++jrow) {
-		++j;
-		rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
-/* L60: */
-	    }
-/* L70: */
-	}
-	sgemm_("T", "N", n, nrhs, n, &c_b1011, &rwork[irwu], n, &rwork[irwb],
-		n, &c_b320, &rwork[irwib], n);
-	jreal = irwrb - 1;
-	jimag = irwib - 1;
-	i__1 = *nrhs;
-	for (jcol = 1; jcol <= i__1; ++jcol) {
-	    i__2 = *n;
-	    for (jrow = 1; jrow <= i__2; ++jrow) {
-		++jreal;
-		++jimag;
-		i__3 = jrow + jcol * b_dim1;
-		i__4 = jreal;
-		i__5 = jimag;
-		q__1.r = rwork[i__4], q__1.i = rwork[i__5];
-		b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L80: */
-	    }
-/* L90: */
-	}
-
-	tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1));
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (d__[i__] <= tol) {
-		claset_("A", &c__1, nrhs, &c_b55, &c_b55, &b[i__ + b_dim1],
-			ldb);
-	    } else {
-		clascl_("G", &c__0, &c__0, &d__[i__], &c_b1011, &c__1, nrhs, &
-			b[i__ + b_dim1], ldb, info);
-		++(*rank);
-	    }
-/* L100: */
-	}
-
-/*
-          Since B is complex, the following call to SGEMM is performed
-          in two steps (real and imaginary parts). That is for V * B
-          (in the real version of the code V' is stored in WORK).
-
-          CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
-      $               WORK( NWORK ), N )
-*/
-
-	j = irwb - 1;
-	i__1 = *nrhs;
-	for (jcol = 1; jcol <= i__1; ++jcol) {
-	    i__2 = *n;
-	    for (jrow = 1; jrow <= i__2; ++jrow) {
-		++j;
-		i__3 = jrow + jcol * b_dim1;
-		rwork[j] = b[i__3].r;
-/* L110: */
-	    }
-/* L120: */
-	}
-	sgemm_("T", "N", n, nrhs, n, &c_b1011, &rwork[irwvt], n, &rwork[irwb],
-		 n, &c_b320, &rwork[irwrb], n);
-	j = irwb - 1;
-	i__1 = *nrhs;
-	for (jcol = 1; jcol <= i__1; ++jcol) {
-	    i__2 = *n;
-	    for (jrow = 1; jrow <= i__2; ++jrow) {
-		++j;
-		rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
-/* L130: */
-	    }
-/* L140: */
-	}
-	sgemm_("T", "N", n, nrhs, n, &c_b1011, &rwork[irwvt], n, &rwork[irwb],
-		 n, &c_b320, &rwork[irwib], n);
-	jreal = irwrb - 1;
-	jimag = irwib - 1;
-	i__1 = *nrhs;
-	for (jcol = 1; jcol <= i__1; ++jcol) {
-	    i__2 = *n;
-	    for (jrow = 1; jrow <= i__2; ++jrow) {
-		++jreal;
-		++jimag;
-		i__3 = jrow + jcol * b_dim1;
-		i__4 = jreal;
-		i__5 = jimag;
-		q__1.r = rwork[i__4], q__1.i = rwork[i__5];
-		b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L150: */
-	    }
-/* L160: */
-	}
-
-/*        Unscale. */
-
-	slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, n, &c__1, &d__[1], n,
-		info);
-	slasrt_("D", n, &d__[1], info);
-	clascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, nrhs, &b[b_offset],
-		ldb, info);
-
-	return 0;
-    }
-
-/*     Book-keeping and setting up some constants. */
-
-    nlvl = (integer) (log((real) (*n) / (real) (*smlsiz + 1)) / log(2.f)) + 1;
-
-    smlszp = *smlsiz + 1;
-
-    u = 1;
-    vt = *smlsiz * *n + 1;
-    difl = vt + smlszp * *n;
-    difr = difl + nlvl * *n;
-    z__ = difr + ((nlvl * *n) << (1));
-    c__ = z__ + nlvl * *n;
-    s = c__ + *n;
-    poles = s + *n;
-    givnum = poles + ((nlvl) << (1)) * *n;
-    nrwork = givnum + ((nlvl) << (1)) * *n;
-    bx = 1;
-
-    irwrb = nrwork;
-    irwib = irwrb + *smlsiz * *nrhs;
-    irwb = irwib + *smlsiz * *nrhs;
-
-    sizei = *n + 1;
-    k = sizei + *n;
-    givptr = k + *n;
-    perm = givptr + *n;
-    givcol = perm + nlvl * *n;
-    iwk = givcol + ((nlvl * *n) << (1));
-
-    st = 1;
-    sqre = 0;
-    icmpq1 = 1;
-    icmpq2 = 0;
-    nsub = 0;
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((r__1 = d__[i__], dabs(r__1)) < eps) {
-	    d__[i__] = r_sign(&eps, &d__[i__]);
-	}
-/* L170: */
-    }
-
-    i__1 = nm1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (((r__1 = e[i__], dabs(r__1)) < eps) || (i__ == nm1)) {
-	    ++nsub;
-	    iwork[nsub] = st;
-
-/*
-             Subproblem found. First determine its size and then
-             apply divide and conquer on it.
-*/
-
-	    if (i__ < nm1) {
-
-/*              A subproblem with E(I) small for I < NM1. */
-
-		nsize = i__ - st + 1;
-		iwork[sizei + nsub - 1] = nsize;
-	    } else if ((r__1 = e[i__], dabs(r__1)) >= eps) {
-
-/*              A subproblem with E(NM1) not too small but I = NM1. */
-
-		nsize = *n - st + 1;
-		iwork[sizei + nsub - 1] = nsize;
-	    } else {
-
-/*
-                A subproblem with E(NM1) small. This implies an
-                1-by-1 subproblem at D(N), which is not solved
-                explicitly.
-*/
-
-		nsize = i__ - st + 1;
-		iwork[sizei + nsub - 1] = nsize;
-		++nsub;
-		iwork[nsub] = *n;
-		iwork[sizei + nsub - 1] = 1;
-		ccopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n);
-	    }
-	    st1 = st - 1;
-	    if (nsize == 1) {
-
-/*
-                This is a 1-by-1 subproblem and is not solved
-                explicitly.
-*/
-
-		ccopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n);
-	    } else if (nsize <= *smlsiz) {
-
-/*              This is a small subproblem and is solved by SLASDQ. */
-
-		slaset_("A", &nsize, &nsize, &c_b320, &c_b1011, &rwork[vt +
-			st1], n);
-		slaset_("A", &nsize, &nsize, &c_b320, &c_b1011, &rwork[u +
-			st1], n);
-		slasdq_("U", &c__0, &nsize, &nsize, &nsize, &c__0, &d__[st], &
-			e[st], &rwork[vt + st1], n, &rwork[u + st1], n, &
-			rwork[nrwork], &c__1, &rwork[nrwork], info)
-			;
-		if (*info != 0) {
-		    return 0;
-		}
-
-/*
-                In the real version, B is passed to SLASDQ and multiplied
-                internally by Q'. Here B is complex and that product is
-                computed below in two steps (real and imaginary parts).
-*/
-
-		j = irwb - 1;
-		i__2 = *nrhs;
-		for (jcol = 1; jcol <= i__2; ++jcol) {
-		    i__3 = st + nsize - 1;
-		    for (jrow = st; jrow <= i__3; ++jrow) {
-			++j;
-			i__4 = jrow + jcol * b_dim1;
-			rwork[j] = b[i__4].r;
-/* L180: */
-		    }
-/* L190: */
-		}
-		sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1011, &rwork[u +
-			st1], n, &rwork[irwb], &nsize, &c_b320, &rwork[irwrb],
-			 &nsize);
-		j = irwb - 1;
-		i__2 = *nrhs;
-		for (jcol = 1; jcol <= i__2; ++jcol) {
-		    i__3 = st + nsize - 1;
-		    for (jrow = st; jrow <= i__3; ++jrow) {
-			++j;
-			rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
-/* L200: */
-		    }
-/* L210: */
-		}
-		sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1011, &rwork[u +
-			st1], n, &rwork[irwb], &nsize, &c_b320, &rwork[irwib],
-			 &nsize);
-		jreal = irwrb - 1;
-		jimag = irwib - 1;
-		i__2 = *nrhs;
-		for (jcol = 1; jcol <= i__2; ++jcol) {
-		    i__3 = st + nsize - 1;
-		    for (jrow = st; jrow <= i__3; ++jrow) {
-			++jreal;
-			++jimag;
-			i__4 = jrow + jcol * b_dim1;
-			i__5 = jreal;
-			i__6 = jimag;
-			q__1.r = rwork[i__5], q__1.i = rwork[i__6];
-			b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L220: */
-		    }
-/* L230: */
-		}
-
-		clacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx +
-			st1], n);
-	    } else {
-
-/*              A large problem. Solve it using divide and conquer. */
-
-		slasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], &
-			rwork[u + st1], n, &rwork[vt + st1], &iwork[k + st1],
-			&rwork[difl + st1], &rwork[difr + st1], &rwork[z__ +
-			st1], &rwork[poles + st1], &iwork[givptr + st1], &
-			iwork[givcol + st1], n, &iwork[perm + st1], &rwork[
-			givnum + st1], &rwork[c__ + st1], &rwork[s + st1], &
-			rwork[nrwork], &iwork[iwk], info);
-		if (*info != 0) {
-		    return 0;
-		}
-		bxst = bx + st1;
-		clalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, &
-			work[bxst], n, &rwork[u + st1], n, &rwork[vt + st1], &
-			iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1]
-			, &rwork[z__ + st1], &rwork[poles + st1], &iwork[
-			givptr + st1], &iwork[givcol + st1], n, &iwork[perm +
-			st1], &rwork[givnum + st1], &rwork[c__ + st1], &rwork[
-			s + st1], &rwork[nrwork], &iwork[iwk], info);
-		if (*info != 0) {
-		    return 0;
-		}
-	    }
-	    st = i__ + 1;
-	}
-/* L240: */
-    }
-
-/*     Apply the singular values and treat the tiny ones as zero. */
-
-    tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1));
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*
-          Some of the elements in D can be negative because 1-by-1
-          subproblems were not solved explicitly.
-*/
-
-	if ((r__1 = d__[i__], dabs(r__1)) <= tol) {
-	    claset_("A", &c__1, nrhs, &c_b55, &c_b55, &work[bx + i__ - 1], n);
-	} else {
-	    ++(*rank);
-	    clascl_("G", &c__0, &c__0, &d__[i__], &c_b1011, &c__1, nrhs, &
-		    work[bx + i__ - 1], n, info);
-	}
-	d__[i__] = (r__1 = d__[i__], dabs(r__1));
-/* L250: */
-    }
-
-/*     Now apply back the right singular vectors. */
-
-    icmpq2 = 1;
-    i__1 = nsub;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	st = iwork[i__];
-	st1 = st - 1;
-	nsize = iwork[sizei + i__ - 1];
-	bxst = bx + st1;
-	if (nsize == 1) {
-	    ccopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb);
-	} else if (nsize <= *smlsiz) {
-
-/*
-             Since B and BX are complex, the following call to SGEMM
-             is performed in two steps (real and imaginary parts).
-
-             CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
-      $                  RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO,
-      $                  B( ST, 1 ), LDB )
-*/
-
-	    j = bxst - *n - 1;
-	    jreal = irwb - 1;
-	    i__2 = *nrhs;
-	    for (jcol = 1; jcol <= i__2; ++jcol) {
-		j += *n;
-		i__3 = nsize;
-		for (jrow = 1; jrow <= i__3; ++jrow) {
-		    ++jreal;
-		    i__4 = j + jrow;
-		    rwork[jreal] = work[i__4].r;
-/* L260: */
-		}
-/* L270: */
-	    }
-	    sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1011, &rwork[vt + st1],
-		     n, &rwork[irwb], &nsize, &c_b320, &rwork[irwrb], &nsize);
-	    j = bxst - *n - 1;
-	    jimag = irwb - 1;
-	    i__2 = *nrhs;
-	    for (jcol = 1; jcol <= i__2; ++jcol) {
-		j += *n;
-		i__3 = nsize;
-		for (jrow = 1; jrow <= i__3; ++jrow) {
-		    ++jimag;
-		    rwork[jimag] = r_imag(&work[j + jrow]);
-/* L280: */
-		}
-/* L290: */
-	    }
-	    sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1011, &rwork[vt + st1],
-		     n, &rwork[irwb], &nsize, &c_b320, &rwork[irwib], &nsize);
-	    jreal = irwrb - 1;
-	    jimag = irwib - 1;
-	    i__2 = *nrhs;
-	    for (jcol = 1; jcol <= i__2; ++jcol) {
-		i__3 = st + nsize - 1;
-		for (jrow = st; jrow <= i__3; ++jrow) {
-		    ++jreal;
-		    ++jimag;
-		    i__4 = jrow + jcol * b_dim1;
-		    i__5 = jreal;
-		    i__6 = jimag;
-		    q__1.r = rwork[i__5], q__1.i = rwork[i__6];
-		    b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L300: */
-		}
-/* L310: */
-	    }
-	} else {
-	    clalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st +
-		    b_dim1], ldb, &rwork[u + st1], n, &rwork[vt + st1], &
-		    iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1], &
-		    rwork[z__ + st1], &rwork[poles + st1], &iwork[givptr +
-		    st1], &iwork[givcol + st1], n, &iwork[perm + st1], &rwork[
-		    givnum + st1], &rwork[c__ + st1], &rwork[s + st1], &rwork[
-		    nrwork], &iwork[iwk], info);
-	    if (*info != 0) {
-		return 0;
-	    }
-	}
-/* L320: */
-    }
-
-/*     Unscale and sort the singular values. */
-
-    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, n, &c__1, &d__[1], n, info);
-    slasrt_("D", n, &d__[1], info);
-    clascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, nrhs, &b[b_offset], ldb,
-	    info);
-
-    return 0;
-
-/*     End of CLALSD */
-
-} /* clalsd_ */
-
-doublereal clange_(char *norm, integer *m, integer *n, complex *a, integer *
-	lda, real *work)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    real ret_val, r__1, r__2;
-
-    /* Builtin functions */
-    double c_abs(complex *), sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, j;
-    static real sum, scale;
-    extern logical lsame_(char *, char *);
-    static real value;
-    extern /* Subroutine */ int classq_(integer *, complex *, integer *, real
-	    *, real *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    CLANGE  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    complex matrix A.
-
-    Description
-    ===========
-
-    CLANGE returns the value
-
-       CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
-
-    Arguments
-    =========
-
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in CLANGE as described
-            above.
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.  When M = 0,
-            CLANGE is set to zero.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.  When N = 0,
-            CLANGE is set to zero.
-
-    A       (input) COMPLEX array, dimension (LDA,N)
-            The m by n matrix A.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(M,1).
-
-    WORK    (workspace) REAL array, dimension (LWORK),
-            where LWORK >= M when NORM = 'I'; otherwise, WORK is not
-            referenced.
-
-   =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --work;
-
-    /* Function Body */
-    if (min(*m,*n) == 0) {
-	value = 0.f;
-    } else if (lsame_(norm, "M")) {
-
-/*        Find max(abs(A(i,j))). */
-
-	value = 0.f;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
-		value = dmax(r__1,r__2);
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else if ((lsame_(norm, "O")) || (*(unsigned char *
-	    )norm == '1')) {
-
-/*        Find norm1(A). */
-
-	value = 0.f;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = 0.f;
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		sum += c_abs(&a[i__ + j * a_dim1]);
-/* L30: */
-	    }
-	    value = dmax(value,sum);
-/* L40: */
-	}
-    } else if (lsame_(norm, "I")) {
-
-/*        Find normI(A). */
-
-	i__1 = *m;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    work[i__] = 0.f;
-/* L50: */
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[i__] += c_abs(&a[i__ + j * a_dim1]);
-/* L60: */
-	    }
-/* L70: */
-	}
-	value = 0.f;
-	i__1 = *m;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	    r__1 = value, r__2 = work[i__];
-	    value = dmax(r__1,r__2);
-/* L80: */
-	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
-
-/*        Find normF(A). */
-
-	scale = 0.f;
-	sum = 1.f;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    classq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L90: */
-	}
-	value = scale * sqrt(sum);
-    }
-
-    ret_val = value;
-    return ret_val;
-
-/*     End of CLANGE */
-
-} /* clange_ */
-
-doublereal clanhe_(char *norm, char *uplo, integer *n, complex *a, integer *
-	lda, real *work)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    real ret_val, r__1, r__2, r__3;
-
-    /* Builtin functions */
-    double c_abs(complex *), sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, j;
-    static real sum, absa, scale;
-    extern logical lsame_(char *, char *);
-    static real value;
-    extern /* Subroutine */ int classq_(integer *, complex *, integer *, real
-	    *, real *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    CLANHE  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    complex hermitian matrix A.
-
-    Description
-    ===========
-
-    CLANHE returns the value
-
-       CLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
-
-    Arguments
-    =========
-
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in CLANHE as described
-            above.
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the upper or lower triangular part of the
-            hermitian matrix A is to be referenced.
-            = 'U':  Upper triangular part of A is referenced
-            = 'L':  Lower triangular part of A is referenced
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, CLANHE is
-            set to zero.
-
-    A       (input) COMPLEX array, dimension (LDA,N)
-            The hermitian matrix A.  If UPLO = 'U', the leading n by n
-            upper triangular part of A contains the upper triangular part
-            of the matrix A, and the strictly lower triangular part of A
-            is not referenced.  If UPLO = 'L', the leading n by n lower
-            triangular part of A contains the lower triangular part of
-            the matrix A, and the strictly upper triangular part of A is
-            not referenced. Note that the imaginary parts of the diagonal
-            elements need not be set and are assumed to be zero.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(N,1).
-
-    WORK    (workspace) REAL array, dimension (LWORK),
-            where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-            WORK is not referenced.
-
-   =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --work;
-
-    /* Function Body */
-    if (*n == 0) {
-	value = 0.f;
-    } else if (lsame_(norm, "M")) {
-
-/*        Find max(abs(A(i,j))). */
-
-	value = 0.f;
-	if (lsame_(uplo, "U")) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		    r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
-		    value = dmax(r__1,r__2);
-/* L10: */
-		}
-/* Computing MAX */
-		i__2 = j + j * a_dim1;
-		r__2 = value, r__3 = (r__1 = a[i__2].r, dabs(r__1));
-		value = dmax(r__2,r__3);
-/* L20: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-/* Computing MAX */
-		i__2 = j + j * a_dim1;
-		r__2 = value, r__3 = (r__1 = a[i__2].r, dabs(r__1));
-		value = dmax(r__2,r__3);
-		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		    r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
-		    value = dmax(r__1,r__2);
-/* L30: */
-		}
-/* L40: */
-	    }
-	}
-    } else if (((lsame_(norm, "I")) || (lsame_(norm,
-	    "O"))) || (*(unsigned char *)norm == '1')) {
-
-/*        Find normI(A) ( = norm1(A), since A is hermitian). */
-
-	value = 0.f;
-	if (lsame_(uplo, "U")) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		sum = 0.f;
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    absa = c_abs(&a[i__ + j * a_dim1]);
-		    sum += absa;
-		    work[i__] += absa;
-/* L50: */
-		}
-		i__2 = j + j * a_dim1;
-		work[j] = sum + (r__1 = a[i__2].r, dabs(r__1));
-/* L60: */
-	    }
-	    i__1 = *n;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-		r__1 = value, r__2 = work[i__];
-		value = dmax(r__1,r__2);
-/* L70: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-		work[i__] = 0.f;
-/* L80: */
-	    }
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j + j * a_dim1;
-		sum = work[j] + (r__1 = a[i__2].r, dabs(r__1));
-		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    absa = c_abs(&a[i__ + j * a_dim1]);
-		    sum += absa;
-		    work[i__] += absa;
-/* L90: */
-		}
-		value = dmax(value,sum);
-/* L100: */
-	    }
-	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
-
-/*        Find normF(A). */
-
-	scale = 0.f;
-	sum = 1.f;
-	if (lsame_(uplo, "U")) {
-	    i__1 = *n;
-	    for (j = 2; j <= i__1; ++j) {
-		i__2 = j - 1;
-		classq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L110: */
-	    }
-	} else {
-	    i__1 = *n - 1;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *n - j;
-		classq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum);
-/* L120: */
-	    }
-	}
-	sum *= 2;
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = i__ + i__ * a_dim1;
-	    if (a[i__2].r != 0.f) {
-		i__2 = i__ + i__ * a_dim1;
-		absa = (r__1 = a[i__2].r, dabs(r__1));
-		if (scale < absa) {
-/* Computing 2nd power */
-		    r__1 = scale / absa;
-		    sum = sum * (r__1 * r__1) + 1.f;
-		    scale = absa;
-		} else {
-/* Computing 2nd power */
-		    r__1 = absa / scale;
-		    sum += r__1 * r__1;
-		}
-	    }
-/* L130: */
-	}
-	value = scale * sqrt(sum);
-    }
-
-    ret_val = value;
-    return ret_val;
-
-/*     End of CLANHE */
-
-} /* clanhe_ */
-
-doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
-	work)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    real ret_val, r__1, r__2;
-
-    /* Builtin functions */
-    double c_abs(complex *), sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, j;
-    static real sum, scale;
-    extern logical lsame_(char *, char *);
-    static real value;
-    extern /* Subroutine */ int classq_(integer *, complex *, integer *, real
-	    *, real *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    CLANHS  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    Hessenberg matrix A.
-
-    Description
-    ===========
-
-    CLANHS returns the value
-
-       CLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
-
-    Arguments
-    =========
-
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in CLANHS as described
-            above.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, CLANHS is
-            set to zero.
-
-    A       (input) COMPLEX array, dimension (LDA,N)
-            The n by n upper Hessenberg matrix A; the part of A below the
-            first sub-diagonal is not referenced.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(N,1).
-
-    WORK    (workspace) REAL array, dimension (LWORK),
-            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-            referenced.
-
-   =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --work;
-
-    /* Function Body */
-    if (*n == 0) {
-	value = 0.f;
-    } else if (lsame_(norm, "M")) {
-
-/*        Find max(abs(A(i,j))). */
-
-	value = 0.f;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
-		value = dmax(r__1,r__2);
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else if ((lsame_(norm, "O")) || (*(unsigned char *
-	    )norm == '1')) {
-
-/*        Find norm1(A). */
-
-	value = 0.f;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = 0.f;
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		sum += c_abs(&a[i__ + j * a_dim1]);
-/* L30: */
-	    }
-	    value = dmax(value,sum);
-/* L40: */
-	}
-    } else if (lsame_(norm, "I")) {
-
-/*        Find normI(A). */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    work[i__] = 0.f;
-/* L50: */
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[i__] += c_abs(&a[i__ + j * a_dim1]);
-/* L60: */
-	    }
-/* L70: */
-	}
-	value = 0.f;
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	    r__1 = value, r__2 = work[i__];
-	    value = dmax(r__1,r__2);
-/* L80: */
-	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
-
-/*        Find normF(A). */
-
-	scale = 0.f;
-	sum = 1.f;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    classq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L90: */
-	}
-	value = scale * sqrt(sum);
-    }
-
-    ret_val = value;
-    return ret_val;
-
-/*     End of CLANHS */
-
-} /* clanhs_ */
-
-/* Subroutine */ int clarcm_(integer *m, integer *n, real *a, integer *lda,
-	complex *b, integer *ldb, complex *c__, integer *ldc, real *rwork)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
-	    i__3, i__4, i__5;
-    real r__1;
-    complex q__1;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-
-    /* Local variables */
-    static integer i__, j, l;
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CLARCM performs a very simple matrix-matrix multiplication:
-             C := A * B,
-    where A is M by M and real; B is M by N and complex;
-    C is M by N and complex.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A and of the matrix C.
-            M >= 0.
-
-    N       (input) INTEGER
-            The number of columns and rows of the matrix B and
-            the number of columns of the matrix C.
-            N >= 0.
-
-    A       (input) REAL array, dimension (LDA, M)
-            A contains the M by M matrix A.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A. LDA >=max(1,M).
-
-    B       (input) REAL array, dimension (LDB, N)
-            B contains the M by N matrix B.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B. LDB >=max(1,M).
-
-    C       (input) COMPLEX array, dimension (LDC, N)
-            C contains the M by N matrix C.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >=max(1,M).
-
-    RWORK   (workspace) REAL array, dimension (2*M*N)
-
-    =====================================================================
-
-
-       Quick return if possible.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --rwork;
-
-    /* Function Body */
-    if ((*m == 0) || (*n == 0)) {
-	return 0;
-    }
-
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *m;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * b_dim1;
-	    rwork[(j - 1) * *m + i__] = b[i__3].r;
-/* L10: */
-	}
-/* L20: */
-    }
-
-    l = *m * *n + 1;
-    sgemm_("N", "N", m, n, m, &c_b1011, &a[a_offset], lda, &rwork[1], m, &
-	    c_b320, &rwork[l], m);
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *m;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * c_dim1;
-	    i__4 = l + (j - 1) * *m + i__ - 1;
-	    c__[i__3].r = rwork[i__4], c__[i__3].i = 0.f;
-/* L30: */
-	}
-/* L40: */
-    }
-
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *m;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    rwork[(j - 1) * *m + i__] = r_imag(&b[i__ + j * b_dim1]);
-/* L50: */
-	}
-/* L60: */
-    }
-    sgemm_("N", "N", m, n, m, &c_b1011, &a[a_offset], lda, &rwork[1], m, &
-	    c_b320, &rwork[l], m);
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *m;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * c_dim1;
-	    i__4 = i__ + j * c_dim1;
-	    r__1 = c__[i__4].r;
-	    i__5 = l + (j - 1) * *m + i__ - 1;
-	    q__1.r = r__1, q__1.i = rwork[i__5];
-	    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L70: */
-	}
-/* L80: */
-    }
-
-    return 0;
-
-/*     End of CLARCM */
-
-} /* clarcm_ */
-
-/* Subroutine */ int clarf_(char *side, integer *m, integer *n, complex *v,
-	integer *incv, complex *tau, complex *c__, integer *ldc, complex *
-	work)
-{
-    /* System generated locals */
-    integer c_dim1, c_offset;
-    complex q__1;
-
-    /* Local variables */
-    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *,
-	    complex *, integer *, complex *, integer *, complex *, integer *),
-	     cgemv_(char *, integer *, integer *, complex *, complex *,
-	    integer *, complex *, integer *, complex *, complex *, integer *);
-    extern logical lsame_(char *, char *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CLARF applies a complex elementary reflector H to a complex M-by-N
-    matrix C, from either the left or the right. H is represented in the
-    form
-
-          H = I - tau * v * v'
-
-    where tau is a complex scalar and v is a complex vector.
-
-    If tau = 0, then H is taken to be the unit matrix.
-
-    To apply H' (the conjugate transpose of H), supply conjg(tau) instead
-    tau.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': form  H * C
-            = 'R': form  C * H
-
-    M       (input) INTEGER
-            The number of rows of the matrix C.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C.
-
-    V       (input) COMPLEX array, dimension
-                       (1 + (M-1)*abs(INCV)) if SIDE = 'L'
-                    or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
-            The vector v in the representation of H. V is not used if
-            TAU = 0.
-
-    INCV    (input) INTEGER
-            The increment between elements of v. INCV <> 0.
-
-    TAU     (input) COMPLEX
-            The value tau in the representation of H.
-
-    C       (input/output) COMPLEX array, dimension (LDC,N)
-            On entry, the M-by-N matrix C.
-            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
-            or C * H if SIDE = 'R'.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace) COMPLEX array, dimension
-                           (N) if SIDE = 'L'
-                        or (M) if SIDE = 'R'
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --v;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    if (lsame_(side, "L")) {
-
-/*        Form  H * C */
-
-	if ((tau->r != 0.f) || (tau->i != 0.f)) {
-
-/*           w := C' * v */
-
-	    cgemv_("Conjugate transpose", m, n, &c_b56, &c__[c_offset], ldc, &
-		    v[1], incv, &c_b55, &work[1], &c__1);
-
-/*           C := C - v * w' */
-
-	    q__1.r = -tau->r, q__1.i = -tau->i;
-	    cgerc_(m, n, &q__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
-		    ldc);
-	}
-    } else {
-
-/*        Form  C * H */
-
-	if ((tau->r != 0.f) || (tau->i != 0.f)) {
-
-/*           w := C * v */
-
-	    cgemv_("No transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1],
-		    incv, &c_b55, &work[1], &c__1);
-
-/*           C := C - w * v' */
-
-	    q__1.r = -tau->r, q__1.i = -tau->i;
-	    cgerc_(m, n, &q__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
-		    ldc);
-	}
-    }
-    return 0;
-
-/*     End of CLARF */
-
-} /* clarf_ */
-
-/* Subroutine */ int clarfb_(char *side, char *trans, char *direct, char *
-	storev, integer *m, integer *n, integer *k, complex *v, integer *ldv,
-	complex *t, integer *ldt, complex *c__, integer *ldc, complex *work,
-	integer *ldwork)
-{
-    /* System generated locals */
-    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
-	    work_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j;
-    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
-	    integer *, complex *, complex *, integer *, complex *, integer *,
-	    complex *, complex *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
-	    complex *, integer *), ctrmm_(char *, char *, char *, char *,
-	    integer *, integer *, complex *, complex *, integer *, complex *,
-	    integer *), clacgv_(integer *,
-	    complex *, integer *);
-    static char transt[1];
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CLARFB applies a complex block reflector H or its transpose H' to a
-    complex M-by-N matrix C, from either the left or the right.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply H or H' from the Left
-            = 'R': apply H or H' from the Right
-
-    TRANS   (input) CHARACTER*1
-            = 'N': apply H (No transpose)
-            = 'C': apply H' (Conjugate transpose)
-
-    DIRECT  (input) CHARACTER*1
-            Indicates how H is formed from a product of elementary
-            reflectors
-            = 'F': H = H(1) H(2) . . . H(k) (Forward)
-            = 'B': H = H(k) . . . H(2) H(1) (Backward)
-
-    STOREV  (input) CHARACTER*1
-            Indicates how the vectors which define the elementary
-            reflectors are stored:
-            = 'C': Columnwise
-            = 'R': Rowwise
-
-    M       (input) INTEGER
-            The number of rows of the matrix C.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C.
-
-    K       (input) INTEGER
-            The order of the matrix T (= the number of elementary
-            reflectors whose product defines the block reflector).
-
-    V       (input) COMPLEX array, dimension
-                                  (LDV,K) if STOREV = 'C'
-                                  (LDV,M) if STOREV = 'R' and SIDE = 'L'
-                                  (LDV,N) if STOREV = 'R' and SIDE = 'R'
-            The matrix V. See further details.
-
-    LDV     (input) INTEGER
-            The leading dimension of the array V.
-            If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
-            if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
-            if STOREV = 'R', LDV >= K.
-
-    T       (input) COMPLEX array, dimension (LDT,K)
-            The triangular K-by-K matrix T in the representation of the
-            block reflector.
-
-    LDT     (input) INTEGER
-            The leading dimension of the array T. LDT >= K.
-
-    C       (input/output) COMPLEX array, dimension (LDC,N)
-            On entry, the M-by-N matrix C.
-            On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace) COMPLEX array, dimension (LDWORK,K)
-
-    LDWORK  (input) INTEGER
-            The leading dimension of the array WORK.
-            If SIDE = 'L', LDWORK >= max(1,N);
-            if SIDE = 'R', LDWORK >= max(1,M).
-
-    =====================================================================
-
-
-       Quick return if possible
-*/
-
-    /* Parameter adjustments */
-    v_dim1 = *ldv;
-    v_offset = 1 + v_dim1;
-    v -= v_offset;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    work_dim1 = *ldwork;
-    work_offset = 1 + work_dim1;
-    work -= work_offset;
-
-    /* Function Body */
-    if ((*m <= 0) || (*n <= 0)) {
-	return 0;
-    }
-
-    if (lsame_(trans, "N")) {
-	*(unsigned char *)transt = 'C';
-    } else {
-	*(unsigned char *)transt = 'N';
-    }
-
-    if (lsame_(storev, "C")) {
-
-	if (lsame_(direct, "F")) {
-
-/*
-             Let  V =  ( V1 )    (first K rows)
-                       ( V2 )
-             where  V1  is unit lower triangular.
-*/
-
-	    if (lsame_(side, "L")) {
-
-/*
-                Form  H * C  or  H' * C  where  C = ( C1 )
-                                                    ( C2 )
-
-                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
-
-                W := C1'
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
-		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
-/* L10: */
-		}
-
-/*              W := W * V1 */
-
-		ctrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b56,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
-
-/*                 W := W + C2'*V2 */
-
-		    i__1 = *m - *k;
-		    cgemm_("Conjugate transpose", "No transpose", n, k, &i__1,
-			     &c_b56, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 +
-			    v_dim1], ldv, &c_b56, &work[work_offset], ldwork);
-		}
-
-/*              W := W * T'  or  W * T */
-
-		ctrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - V * W' */
-
-		if (*m > *k) {
-
-/*                 C2 := C2 - V2 * W' */
-
-		    i__1 = *m - *k;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "Conjugate transpose", &i__1, n, k,
-			     &q__1, &v[*k + 1 + v_dim1], ldv, &work[
-			    work_offset], ldwork, &c_b56, &c__[*k + 1 +
-			    c_dim1], ldc);
-		}
-
-/*              W := W * V1' */
-
-		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k,
-			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
-
-/*              C1 := C1 - W' */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = j + i__ * c_dim1;
-			i__4 = j + i__ * c_dim1;
-			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
-			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
-				q__2.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L20: */
-		    }
-/* L30: */
-		}
-
-	    } else if (lsame_(side, "R")) {
-
-/*
-                Form  C * H  or  C * H'  where  C = ( C1  C2 )
-
-                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
-
-                W := C1
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
-			    work_dim1 + 1], &c__1);
-/* L40: */
-		}
-
-/*              W := W * V1 */
-
-		ctrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b56,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
-
-/*                 W := W + C2 * V2 */
-
-		    i__1 = *n - *k;
-		    cgemm_("No transpose", "No transpose", m, k, &i__1, &
-			    c_b56, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
-			    1 + v_dim1], ldv, &c_b56, &work[work_offset],
-			    ldwork);
-		}
-
-/*              W := W * T  or  W * T' */
-
-		ctrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - W * V' */
-
-		if (*n > *k) {
-
-/*                 C2 := C2 - W * V2' */
-
-		    i__1 = *n - *k;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "Conjugate transpose", m, &i__1, k,
-			     &q__1, &work[work_offset], ldwork, &v[*k + 1 +
-			    v_dim1], ldv, &c_b56, &c__[(*k + 1) * c_dim1 + 1],
-			     ldc);
-		}
-
-/*              W := W * V1' */
-
-		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k,
-			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
-
-/*              C1 := C1 - W */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			i__5 = i__ + j * work_dim1;
-			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
-				i__4].i - work[i__5].i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L50: */
-		    }
-/* L60: */
-		}
-	    }
-
-	} else {
-
-/*
-             Let  V =  ( V1 )
-                       ( V2 )    (last K rows)
-             where  V2  is unit upper triangular.
-*/
-
-	    if (lsame_(side, "L")) {
-
-/*
-                Form  H * C  or  H' * C  where  C = ( C1 )
-                                                    ( C2 )
-
-                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
-
-                W := C2'
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
-		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
-/* L70: */
-		}
-
-/*              W := W * V2 */
-
-		ctrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b56,
-			 &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-		if (*m > *k) {
-
-/*                 W := W + C1'*V1 */
-
-		    i__1 = *m - *k;
-		    cgemm_("Conjugate transpose", "No transpose", n, k, &i__1,
-			     &c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, &
-			    c_b56, &work[work_offset], ldwork);
-		}
-
-/*              W := W * T'  or  W * T */
-
-		ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - V * W' */
-
-		if (*m > *k) {
-
-/*                 C1 := C1 - V1 * W' */
-
-		    i__1 = *m - *k;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "Conjugate transpose", &i__1, n, k,
-			     &q__1, &v[v_offset], ldv, &work[work_offset],
-			    ldwork, &c_b56, &c__[c_offset], ldc);
-		}
-
-/*              W := W * V2' */
-
-		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k,
-			&c_b56, &v[*m - *k + 1 + v_dim1], ldv, &work[
-			work_offset], ldwork);
-
-/*              C2 := C2 - W' */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = *m - *k + j + i__ * c_dim1;
-			i__4 = *m - *k + j + i__ * c_dim1;
-			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
-			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
-				q__2.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L80: */
-		    }
-/* L90: */
-		}
-
-	    } else if (lsame_(side, "R")) {
-
-/*
-                Form  C * H  or  C * H'  where  C = ( C1  C2 )
-
-                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
-
-                W := C2
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
-/* L100: */
-		}
-
-/*              W := W * V2 */
-
-		ctrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b56,
-			 &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-		if (*n > *k) {
-
-/*                 W := W + C1 * V1 */
-
-		    i__1 = *n - *k;
-		    cgemm_("No transpose", "No transpose", m, k, &i__1, &
-			    c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, &
-			    c_b56, &work[work_offset], ldwork);
-		}
-
-/*              W := W * T  or  W * T' */
-
-		ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - W * V' */
-
-		if (*n > *k) {
-
-/*                 C1 := C1 - W * V1' */
-
-		    i__1 = *n - *k;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "Conjugate transpose", m, &i__1, k,
-			     &q__1, &work[work_offset], ldwork, &v[v_offset],
-			    ldv, &c_b56, &c__[c_offset], ldc);
-		}
-
-/*              W := W * V2' */
-
-		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k,
-			&c_b56, &v[*n - *k + 1 + v_dim1], ldv, &work[
-			work_offset], ldwork);
-
-/*              C2 := C2 - W */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + (*n - *k + j) * c_dim1;
-			i__4 = i__ + (*n - *k + j) * c_dim1;
-			i__5 = i__ + j * work_dim1;
-			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
-				i__4].i - work[i__5].i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L110: */
-		    }
-/* L120: */
-		}
-	    }
-	}
-
-    } else if (lsame_(storev, "R")) {
-
-	if (lsame_(direct, "F")) {
-
-/*
-             Let  V =  ( V1  V2 )    (V1: first K columns)
-             where  V1  is unit upper triangular.
-*/
-
-	    if (lsame_(side, "L")) {
-
-/*
-                Form  H * C  or  H' * C  where  C = ( C1 )
-                                                    ( C2 )
-
-                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
-
-                W := C1'
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
-		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
-/* L130: */
-		}
-
-/*              W := W * V1' */
-
-		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k,
-			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
-
-/*                 W := W + C2'*V2' */
-
-		    i__1 = *m - *k;
-		    cgemm_("Conjugate transpose", "Conjugate transpose", n, k,
-			     &i__1, &c_b56, &c__[*k + 1 + c_dim1], ldc, &v[(*
-			    k + 1) * v_dim1 + 1], ldv, &c_b56, &work[
-			    work_offset], ldwork);
-		}
-
-/*              W := W * T'  or  W * T */
-
-		ctrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - V' * W' */
-
-		if (*m > *k) {
-
-/*                 C2 := C2 - V2' * W' */
-
-		    i__1 = *m - *k;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("Conjugate transpose", "Conjugate transpose", &
-			    i__1, n, k, &q__1, &v[(*k + 1) * v_dim1 + 1], ldv,
-			     &work[work_offset], ldwork, &c_b56, &c__[*k + 1
-			    + c_dim1], ldc);
-		}
-
-/*              W := W * V1 */
-
-		ctrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b56,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-
-/*              C1 := C1 - W' */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = j + i__ * c_dim1;
-			i__4 = j + i__ * c_dim1;
-			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
-			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
-				q__2.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L140: */
-		    }
-/* L150: */
-		}
-
-	    } else if (lsame_(side, "R")) {
-
-/*
-                Form  C * H  or  C * H'  where  C = ( C1  C2 )
-
-                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
-
-                W := C1
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
-			    work_dim1 + 1], &c__1);
-/* L160: */
-		}
-
-/*              W := W * V1' */
-
-		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k,
-			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
-
-/*                 W := W + C2 * V2' */
-
-		    i__1 = *n - *k;
-		    cgemm_("No transpose", "Conjugate transpose", m, k, &i__1,
-			     &c_b56, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k
-			    + 1) * v_dim1 + 1], ldv, &c_b56, &work[
-			    work_offset], ldwork);
-		}
-
-/*              W := W * T  or  W * T' */
-
-		ctrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - W * V */
-
-		if (*n > *k) {
-
-/*                 C2 := C2 - W * V2 */
-
-		    i__1 = *n - *k;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "No transpose", m, &i__1, k, &q__1,
-			     &work[work_offset], ldwork, &v[(*k + 1) * v_dim1
-			    + 1], ldv, &c_b56, &c__[(*k + 1) * c_dim1 + 1],
-			    ldc);
-		}
-
-/*              W := W * V1 */
-
-		ctrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b56,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-
-/*              C1 := C1 - W */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + j * c_dim1;
-			i__4 = i__ + j * c_dim1;
-			i__5 = i__ + j * work_dim1;
-			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
-				i__4].i - work[i__5].i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L170: */
-		    }
-/* L180: */
-		}
-
-	    }
-
-	} else {
-
-/*
-             Let  V =  ( V1  V2 )    (V2: last K columns)
-             where  V2  is unit lower triangular.
-*/
-
-	    if (lsame_(side, "L")) {
-
-/*
-                Form  H * C  or  H' * C  where  C = ( C1 )
-                                                    ( C2 )
-
-                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
-
-                W := C2'
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
-		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
-/* L190: */
-		}
-
-/*              W := W * V2' */
-
-		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k,
-			&c_b56, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-		if (*m > *k) {
-
-/*                 W := W + C1'*V1' */
-
-		    i__1 = *m - *k;
-		    cgemm_("Conjugate transpose", "Conjugate transpose", n, k,
-			     &i__1, &c_b56, &c__[c_offset], ldc, &v[v_offset],
-			     ldv, &c_b56, &work[work_offset], ldwork);
-		}
-
-/*              W := W * T'  or  W * T */
-
-		ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - V' * W' */
-
-		if (*m > *k) {
-
-/*                 C1 := C1 - V1' * W' */
-
-		    i__1 = *m - *k;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("Conjugate transpose", "Conjugate transpose", &
-			    i__1, n, k, &q__1, &v[v_offset], ldv, &work[
-			    work_offset], ldwork, &c_b56, &c__[c_offset], ldc);
-		}
-
-/*              W := W * V2 */
-
-		ctrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b56,
-			 &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-
-/*              C2 := C2 - W' */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = *m - *k + j + i__ * c_dim1;
-			i__4 = *m - *k + j + i__ * c_dim1;
-			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
-			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
-				q__2.i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L200: */
-		    }
-/* L210: */
-		}
-
-	    } else if (lsame_(side, "R")) {
-
-/*
-                Form  C * H  or  C * H'  where  C = ( C1  C2 )
-
-                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
-
-                W := C2
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
-/* L220: */
-		}
-
-/*              W := W * V2' */
-
-		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k,
-			&c_b56, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-		if (*n > *k) {
-
-/*                 W := W + C1 * V1' */
-
-		    i__1 = *n - *k;
-		    cgemm_("No transpose", "Conjugate transpose", m, k, &i__1,
-			     &c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, &
-			    c_b56, &work[work_offset], ldwork);
-		}
-
-/*              W := W * T  or  W * T' */
-
-		ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - W * V */
-
-		if (*n > *k) {
-
-/*                 C1 := C1 - W * V1 */
-
-		    i__1 = *n - *k;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "No transpose", m, &i__1, k, &q__1,
-			     &work[work_offset], ldwork, &v[v_offset], ldv, &
-			    c_b56, &c__[c_offset], ldc);
-		}
-
-/*              W := W * V2 */
-
-		ctrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b56,
-			 &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-
-/*              C1 := C1 - W */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + (*n - *k + j) * c_dim1;
-			i__4 = i__ + (*n - *k + j) * c_dim1;
-			i__5 = i__ + j * work_dim1;
-			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
-				i__4].i - work[i__5].i;
-			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L230: */
-		    }
-/* L240: */
-		}
-
-	    }
-
-	}
-    }
-
-    return 0;
-
-/*     End of CLARFB */
-
-} /* clarfb_ */
-
-/* Subroutine */ int clarfg_(integer *n, complex *alpha, complex *x, integer *
-	incx, complex *tau)
-{
-    /* System generated locals */
-    integer i__1;
-    real r__1, r__2;
-    complex q__1, q__2;
-
-    /* Builtin functions */
-    double r_imag(complex *), r_sign(real *, real *);
-
-    /* Local variables */
-    static integer j, knt;
-    static real beta;
-    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *);
-    static real alphi, alphr, xnorm;
-    extern doublereal scnrm2_(integer *, complex *, integer *), slapy3_(real *
-	    , real *, real *);
-    extern /* Complex */ VOID cladiv_(complex *, complex *, complex *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
-	    *);
-    static real safmin, rsafmn;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CLARFG generates a complex elementary reflector H of order n, such
-    that
-
-          H' * ( alpha ) = ( beta ),   H' * H = I.
-               (   x   )   (   0  )
-
-    where alpha and beta are scalars, with beta real, and x is an
-    (n-1)-element complex vector. H is represented in the form
-
-          H = I - tau * ( 1 ) * ( 1 v' ) ,
-                        ( v )
-
-    where tau is a complex scalar and v is a complex (n-1)-element
-    vector. Note that H is not hermitian.
-
-    If the elements of x are all zero and alpha is real, then tau = 0
-    and H is taken to be the unit matrix.
-
-    Otherwise  1 <= real(tau) <= 2  and  abs(tau-1) <= 1 .
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The order of the elementary reflector.
-
-    ALPHA   (input/output) COMPLEX
-            On entry, the value alpha.
-            On exit, it is overwritten with the value beta.
-
-    X       (input/output) COMPLEX array, dimension
-                           (1+(N-2)*abs(INCX))
-            On entry, the vector x.
-            On exit, it is overwritten with the vector v.
-
-    INCX    (input) INTEGER
-            The increment between elements of X. INCX > 0.
-
-    TAU     (output) COMPLEX
-            The value tau.
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if (*n <= 0) {
-	tau->r = 0.f, tau->i = 0.f;
-	return 0;
-    }
-
-    i__1 = *n - 1;
-    xnorm = scnrm2_(&i__1, &x[1], incx);
-    alphr = alpha->r;
-    alphi = r_imag(alpha);
-
-    if (xnorm == 0.f && alphi == 0.f) {
-
-/*        H  =  I */
-
-	tau->r = 0.f, tau->i = 0.f;
-    } else {
-
-/*        general case */
-
-	r__1 = slapy3_(&alphr, &alphi, &xnorm);
-	beta = -r_sign(&r__1, &alphr);
-	safmin = slamch_("S") / slamch_("E");
-	rsafmn = 1.f / safmin;
-
-	if (dabs(beta) < safmin) {
-
-/*           XNORM, BETA may be inaccurate; scale X and recompute them */
-
-	    knt = 0;
-L10:
-	    ++knt;
-	    i__1 = *n - 1;
-	    csscal_(&i__1, &rsafmn, &x[1], incx);
-	    beta *= rsafmn;
-	    alphi *= rsafmn;
-	    alphr *= rsafmn;
-	    if (dabs(beta) < safmin) {
-		goto L10;
-	    }
-
-/*           New BETA is at most 1, at least SAFMIN */
-
-	    i__1 = *n - 1;
-	    xnorm = scnrm2_(&i__1, &x[1], incx);
-	    q__1.r = alphr, q__1.i = alphi;
-	    alpha->r = q__1.r, alpha->i = q__1.i;
-	    r__1 = slapy3_(&alphr, &alphi, &xnorm);
-	    beta = -r_sign(&r__1, &alphr);
-	    r__1 = (beta - alphr) / beta;
-	    r__2 = -alphi / beta;
-	    q__1.r = r__1, q__1.i = r__2;
-	    tau->r = q__1.r, tau->i = q__1.i;
-	    q__2.r = alpha->r - beta, q__2.i = alpha->i;
-	    cladiv_(&q__1, &c_b56, &q__2);
-	    alpha->r = q__1.r, alpha->i = q__1.i;
-	    i__1 = *n - 1;
-	    cscal_(&i__1, alpha, &x[1], incx);
-
-/*           If ALPHA is subnormal, it may lose relative accuracy */
-
-	    alpha->r = beta, alpha->i = 0.f;
-	    i__1 = knt;
-	    for (j = 1; j <= i__1; ++j) {
-		q__1.r = safmin * alpha->r, q__1.i = safmin * alpha->i;
-		alpha->r = q__1.r, alpha->i = q__1.i;
-/* L20: */
-	    }
-	} else {
-	    r__1 = (beta - alphr) / beta;
-	    r__2 = -alphi / beta;
-	    q__1.r = r__1, q__1.i = r__2;
-	    tau->r = q__1.r, tau->i = q__1.i;
-	    q__2.r = alpha->r - beta, q__2.i = alpha->i;
-	    cladiv_(&q__1, &c_b56, &q__2);
-	    alpha->r = q__1.r, alpha->i = q__1.i;
-	    i__1 = *n - 1;
-	    cscal_(&i__1, alpha, &x[1], incx);
-	    alpha->r = beta, alpha->i = 0.f;
-	}
-    }
-
-    return 0;
-
-/*     End of CLARFG */
-
-} /* clarfg_ */
-
-/* Subroutine */ int clarft_(char *direct, char *storev, integer *n, integer *
-	k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt)
-{
-    /* System generated locals */
-    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4;
-    complex q__1;
-
-    /* Local variables */
-    static integer i__, j;
-    static complex vii;
-    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
-	    , complex *, integer *, complex *, integer *, complex *, complex *
-	    , integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *,
-	    complex *, integer *, complex *, integer *), clacgv_(integer *, complex *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CLARFT forms the triangular factor T of a complex block reflector H
-    of order n, which is defined as a product of k elementary reflectors.
-
-    If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
-
-    If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
-
-    If STOREV = 'C', the vector which defines the elementary reflector
-    H(i) is stored in the i-th column of the array V, and
-
-       H  =  I - V * T * V'
-
-    If STOREV = 'R', the vector which defines the elementary reflector
-    H(i) is stored in the i-th row of the array V, and
-
-       H  =  I - V' * T * V
-
-    Arguments
-    =========
-
-    DIRECT  (input) CHARACTER*1
-            Specifies the order in which the elementary reflectors are
-            multiplied to form the block reflector:
-            = 'F': H = H(1) H(2) . . . H(k) (Forward)
-            = 'B': H = H(k) . . . H(2) H(1) (Backward)
-
-    STOREV  (input) CHARACTER*1
-            Specifies how the vectors which define the elementary
-            reflectors are stored (see also Further Details):
-            = 'C': columnwise
-            = 'R': rowwise
-
-    N       (input) INTEGER
-            The order of the block reflector H. N >= 0.
-
-    K       (input) INTEGER
-            The order of the triangular factor T (= the number of
-            elementary reflectors). K >= 1.
-
-    V       (input/output) COMPLEX array, dimension
-                                 (LDV,K) if STOREV = 'C'
-                                 (LDV,N) if STOREV = 'R'
-            The matrix V. See further details.
-
-    LDV     (input) INTEGER
-            The leading dimension of the array V.
-            If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
-
-    TAU     (input) COMPLEX array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i).
-
-    T       (output) COMPLEX array, dimension (LDT,K)
-            The k by k triangular factor T of the block reflector.
-            If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
-            lower triangular. The rest of the array is not used.
-
-    LDT     (input) INTEGER
-            The leading dimension of the array T. LDT >= K.
-
-    Further Details
-    ===============
-
-    The shape of the matrix V and the storage of the vectors which define
-    the H(i) is best illustrated by the following example with n = 5 and
-    k = 3. The elements equal to 1 are not stored; the corresponding
-    array elements are modified but restored on exit. The rest of the
-    array is not used.
-
-    DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
-
-                 V = (  1       )                 V = (  1 v1 v1 v1 v1 )
-                     ( v1  1    )                     (     1 v2 v2 v2 )
-                     ( v1 v2  1 )                     (        1 v3 v3 )
-                     ( v1 v2 v3 )
-                     ( v1 v2 v3 )
-
-    DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
-
-                 V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
-                     ( v1 v2 v3 )                     ( v2 v2 v2  1    )
-                     (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
-                     (     1 v3 )
-                     (        1 )
-
-    =====================================================================
-
-
-       Quick return if possible
-*/
-
-    /* Parameter adjustments */
-    v_dim1 = *ldv;
-    v_offset = 1 + v_dim1;
-    v -= v_offset;
-    --tau;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-
-    /* Function Body */
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (lsame_(direct, "F")) {
-	i__1 = *k;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = i__;
-	    if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) {
-
-/*              H(i)  =  I */
-
-		i__2 = i__;
-		for (j = 1; j <= i__2; ++j) {
-		    i__3 = j + i__ * t_dim1;
-		    t[i__3].r = 0.f, t[i__3].i = 0.f;
-/* L10: */
-		}
-	    } else {
-
-/*              general case */
-
-		i__2 = i__ + i__ * v_dim1;
-		vii.r = v[i__2].r, vii.i = v[i__2].i;
-		i__2 = i__ + i__ * v_dim1;
-		v[i__2].r = 1.f, v[i__2].i = 0.f;
-		if (lsame_(storev, "C")) {
-
-/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
-
-		    i__2 = *n - i__ + 1;
-		    i__3 = i__ - 1;
-		    i__4 = i__;
-		    q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
-		    cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__
-			    + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, &
-			    c_b55, &t[i__ * t_dim1 + 1], &c__1);
-		} else {
-
-/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
-
-		    if (i__ < *n) {
-			i__2 = *n - i__;
-			clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv);
-		    }
-		    i__2 = i__ - 1;
-		    i__3 = *n - i__ + 1;
-		    i__4 = i__;
-		    q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
-		    cgemv_("No transpose", &i__2, &i__3, &q__1, &v[i__ *
-			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
-			    c_b55, &t[i__ * t_dim1 + 1], &c__1);
-		    if (i__ < *n) {
-			i__2 = *n - i__;
-			clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv);
-		    }
-		}
-		i__2 = i__ + i__ * v_dim1;
-		v[i__2].r = vii.r, v[i__2].i = vii.i;
-
-/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
-
-		i__2 = i__ - 1;
-		ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
-			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
-		i__2 = i__ + i__ * t_dim1;
-		i__3 = i__;
-		t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
-	    }
-/* L20: */
-	}
-    } else {
-	for (i__ = *k; i__ >= 1; --i__) {
-	    i__1 = i__;
-	    if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) {
-
-/*              H(i)  =  I */
-
-		i__1 = *k;
-		for (j = i__; j <= i__1; ++j) {
-		    i__2 = j + i__ * t_dim1;
-		    t[i__2].r = 0.f, t[i__2].i = 0.f;
-/* L30: */
-		}
-	    } else {
-
-/*              general case */
-
-		if (i__ < *k) {
-		    if (lsame_(storev, "C")) {
-			i__1 = *n - *k + i__ + i__ * v_dim1;
-			vii.r = v[i__1].r, vii.i = v[i__1].i;
-			i__1 = *n - *k + i__ + i__ * v_dim1;
-			v[i__1].r = 1.f, v[i__1].i = 0.f;
-
-/*
-                      T(i+1:k,i) :=
-                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
-*/
-
-			i__1 = *n - *k + i__;
-			i__2 = *k - i__;
-			i__3 = i__;
-			q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
-			cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[
-				(i__ + 1) * v_dim1 + 1], ldv, &v[i__ * v_dim1
-				+ 1], &c__1, &c_b55, &t[i__ + 1 + i__ *
-				t_dim1], &c__1);
-			i__1 = *n - *k + i__ + i__ * v_dim1;
-			v[i__1].r = vii.r, v[i__1].i = vii.i;
-		    } else {
-			i__1 = i__ + (*n - *k + i__) * v_dim1;
-			vii.r = v[i__1].r, vii.i = v[i__1].i;
-			i__1 = i__ + (*n - *k + i__) * v_dim1;
-			v[i__1].r = 1.f, v[i__1].i = 0.f;
-
-/*
-                      T(i+1:k,i) :=
-                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
-*/
-
-			i__1 = *n - *k + i__ - 1;
-			clacgv_(&i__1, &v[i__ + v_dim1], ldv);
-			i__1 = *k - i__;
-			i__2 = *n - *k + i__;
-			i__3 = i__;
-			q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
-			cgemv_("No transpose", &i__1, &i__2, &q__1, &v[i__ +
-				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
-				c_b55, &t[i__ + 1 + i__ * t_dim1], &c__1);
-			i__1 = *n - *k + i__ - 1;
-			clacgv_(&i__1, &v[i__ + v_dim1], ldv);
-			i__1 = i__ + (*n - *k + i__) * v_dim1;
-			v[i__1].r = vii.r, v[i__1].i = vii.i;
-		    }
-
-/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
-
-		    i__1 = *k - i__;
-		    ctrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
-			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
-			     t_dim1], &c__1)
-			    ;
-		}
-		i__1 = i__ + i__ * t_dim1;
-		i__2 = i__;
-		t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
-	    }
-/* L40: */
-	}
-    }
-    return 0;
-
-/*     End of CLARFT */
-
-} /* clarft_ */
-
-/* Subroutine */ int clarfx_(char *side, integer *m, integer *n, complex *v,
-	complex *tau, complex *c__, integer *ldc, complex *work)
-{
-    /* System generated locals */
-    integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8,
-	    i__9, i__10, i__11;
-    complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8, q__9, q__10,
-	    q__11, q__12, q__13, q__14, q__15, q__16, q__17, q__18, q__19;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer j;
-    static complex t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6,
-	     v7, v8, v9, t10, v10, sum;
-    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *,
-	    complex *, integer *, complex *, integer *, complex *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
-	    , complex *, integer *, complex *, integer *, complex *, complex *
-	    , integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CLARFX applies a complex elementary reflector H to a complex m by n
-    matrix C, from either the left or the right. H is represented in the
-    form
-
-          H = I - tau * v * v'
-
-    where tau is a complex scalar and v is a complex vector.
-
-    If tau = 0, then H is taken to be the unit matrix
-
-    This version uses inline code if H has order < 11.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': form  H * C
-            = 'R': form  C * H
-
-    M       (input) INTEGER
-            The number of rows of the matrix C.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C.
-
-    V       (input) COMPLEX array, dimension (M) if SIDE = 'L'
-                                          or (N) if SIDE = 'R'
-            The vector v in the representation of H.
-
-    TAU     (input) COMPLEX
-            The value tau in the representation of H.
-
-    C       (input/output) COMPLEX array, dimension (LDC,N)
-            On entry, the m by n matrix C.
-            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
-            or C * H if SIDE = 'R'.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDA >= max(1,M).
-
-    WORK    (workspace) COMPLEX array, dimension (N) if SIDE = 'L'
-                                              or (M) if SIDE = 'R'
-            WORK is not referenced if H has order < 11.
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --v;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    if (tau->r == 0.f && tau->i == 0.f) {
-	return 0;
-    }
-    if (lsame_(side, "L")) {
-
-/*        Form  H * C, where H has order m. */
-
-	switch (*m) {
-	    case 1:  goto L10;
-	    case 2:  goto L30;
-	    case 3:  goto L50;
-	    case 4:  goto L70;
-	    case 5:  goto L90;
-	    case 6:  goto L110;
-	    case 7:  goto L130;
-	    case 8:  goto L150;
-	    case 9:  goto L170;
-	    case 10:  goto L190;
-	}
-
-/*
-          Code for general M
-
-          w := C'*v
-*/
-
-	cgemv_("Conjugate transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1]
-		, &c__1, &c_b55, &work[1], &c__1);
-
-/*        C := C - tau * v * w' */
-
-	q__1.r = -tau->r, q__1.i = -tau->i;
-	cgerc_(m, n, &q__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset],
-		ldc);
-	goto L410;
-L10:
-
-/*        Special code for 1 x 1 Householder */
-
-	q__3.r = tau->r * v[1].r - tau->i * v[1].i, q__3.i = tau->r * v[1].i
-		+ tau->i * v[1].r;
-	r_cnjg(&q__4, &v[1]);
-	q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i
-		+ q__3.i * q__4.r;
-	q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i;
-	t1.r = q__1.r, t1.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, q__1.i = t1.r *
-		    c__[i__3].i + t1.i * c__[i__3].r;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L20: */
-	}
-	goto L410;
-L30:
-
-/*        Special code for 2 x 2 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__2.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__3.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L40: */
-	}
-	goto L410;
-L50:
-
-/*        Special code for 3 x 3 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__3.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__4.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__5.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L60: */
-	}
-	goto L410;
-L70:
-
-/*        Special code for 4 x 4 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__4.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__5.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__3.r = q__4.r + q__5.r, q__3.i = q__4.i + q__5.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__6.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__2.r = q__3.r + q__6.r, q__2.i = q__3.i + q__6.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__7.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    q__1.r = q__2.r + q__7.r, q__1.i = q__2.i + q__7.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L80: */
-	}
-	goto L410;
-L90:
-
-/*        Special code for 5 x 5 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	r_cnjg(&q__1, &v[5]);
-	v5.r = q__1.r, v5.i = q__1.i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__5.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__6.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__4.r = q__5.r + q__6.r, q__4.i = q__5.i + q__6.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__7.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__8.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    q__2.r = q__3.r + q__8.r, q__2.i = q__3.i + q__8.i;
-	    i__6 = j * c_dim1 + 5;
-	    q__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__9.i = v5.r *
-		    c__[i__6].i + v5.i * c__[i__6].r;
-	    q__1.r = q__2.r + q__9.r, q__1.i = q__2.i + q__9.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L100: */
-	}
-	goto L410;
-L110:
-
-/*        Special code for 6 x 6 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	r_cnjg(&q__1, &v[5]);
-	v5.r = q__1.r, v5.i = q__1.i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	r_cnjg(&q__1, &v[6]);
-	v6.r = q__1.r, v6.i = q__1.i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__6.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__7.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__5.r = q__6.r + q__7.r, q__5.i = q__6.i + q__7.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__8.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__4.r = q__5.r + q__8.r, q__4.i = q__5.i + q__8.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__9.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    q__3.r = q__4.r + q__9.r, q__3.i = q__4.i + q__9.i;
-	    i__6 = j * c_dim1 + 5;
-	    q__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__10.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__2.r = q__3.r + q__10.r, q__2.i = q__3.i + q__10.i;
-	    i__7 = j * c_dim1 + 6;
-	    q__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__11.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__1.r = q__2.r + q__11.r, q__1.i = q__2.i + q__11.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L120: */
-	}
-	goto L410;
-L130:
-
-/*        Special code for 7 x 7 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	r_cnjg(&q__1, &v[5]);
-	v5.r = q__1.r, v5.i = q__1.i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	r_cnjg(&q__1, &v[6]);
-	v6.r = q__1.r, v6.i = q__1.i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	r_cnjg(&q__1, &v[7]);
-	v7.r = q__1.r, v7.i = q__1.i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__7.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__8.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__6.r = q__7.r + q__8.r, q__6.i = q__7.i + q__8.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__9.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__5.r = q__6.r + q__9.r, q__5.i = q__6.i + q__9.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__10.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__4.r = q__5.r + q__10.r, q__4.i = q__5.i + q__10.i;
-	    i__6 = j * c_dim1 + 5;
-	    q__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__11.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__3.r = q__4.r + q__11.r, q__3.i = q__4.i + q__11.i;
-	    i__7 = j * c_dim1 + 6;
-	    q__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__12.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__2.r = q__3.r + q__12.r, q__2.i = q__3.i + q__12.i;
-	    i__8 = j * c_dim1 + 7;
-	    q__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__13.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__1.r = q__2.r + q__13.r, q__1.i = q__2.i + q__13.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 7;
-	    i__3 = j * c_dim1 + 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L140: */
-	}
-	goto L410;
-L150:
-
-/*        Special code for 8 x 8 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	r_cnjg(&q__1, &v[5]);
-	v5.r = q__1.r, v5.i = q__1.i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	r_cnjg(&q__1, &v[6]);
-	v6.r = q__1.r, v6.i = q__1.i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	r_cnjg(&q__1, &v[7]);
-	v7.r = q__1.r, v7.i = q__1.i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	r_cnjg(&q__1, &v[8]);
-	v8.r = q__1.r, v8.i = q__1.i;
-	r_cnjg(&q__2, &v8);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t8.r = q__1.r, t8.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__8.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__9.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__7.r = q__8.r + q__9.r, q__7.i = q__8.i + q__9.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__10.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    q__6.r = q__7.r + q__10.r, q__6.i = q__7.i + q__10.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__11.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__5.r = q__6.r + q__11.r, q__5.i = q__6.i + q__11.i;
-	    i__6 = j * c_dim1 + 5;
-	    q__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__12.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__4.r = q__5.r + q__12.r, q__4.i = q__5.i + q__12.i;
-	    i__7 = j * c_dim1 + 6;
-	    q__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__13.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__3.r = q__4.r + q__13.r, q__3.i = q__4.i + q__13.i;
-	    i__8 = j * c_dim1 + 7;
-	    q__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__14.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__2.r = q__3.r + q__14.r, q__2.i = q__3.i + q__14.i;
-	    i__9 = j * c_dim1 + 8;
-	    q__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__15.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    q__1.r = q__2.r + q__15.r, q__1.i = q__2.i + q__15.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 7;
-	    i__3 = j * c_dim1 + 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 8;
-	    i__3 = j * c_dim1 + 8;
-	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L160: */
-	}
-	goto L410;
-L170:
-
-/*        Special code for 9 x 9 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	r_cnjg(&q__1, &v[5]);
-	v5.r = q__1.r, v5.i = q__1.i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	r_cnjg(&q__1, &v[6]);
-	v6.r = q__1.r, v6.i = q__1.i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	r_cnjg(&q__1, &v[7]);
-	v7.r = q__1.r, v7.i = q__1.i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	r_cnjg(&q__1, &v[8]);
-	v8.r = q__1.r, v8.i = q__1.i;
-	r_cnjg(&q__2, &v8);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t8.r = q__1.r, t8.i = q__1.i;
-	r_cnjg(&q__1, &v[9]);
-	v9.r = q__1.r, v9.i = q__1.i;
-	r_cnjg(&q__2, &v9);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t9.r = q__1.r, t9.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__9.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__10.i = v2.r
-		    * c__[i__3].i + v2.i * c__[i__3].r;
-	    q__8.r = q__9.r + q__10.r, q__8.i = q__9.i + q__10.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__11.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    q__7.r = q__8.r + q__11.r, q__7.i = q__8.i + q__11.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__12.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__6.r = q__7.r + q__12.r, q__6.i = q__7.i + q__12.i;
-	    i__6 = j * c_dim1 + 5;
-	    q__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__13.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__5.r = q__6.r + q__13.r, q__5.i = q__6.i + q__13.i;
-	    i__7 = j * c_dim1 + 6;
-	    q__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__14.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__4.r = q__5.r + q__14.r, q__4.i = q__5.i + q__14.i;
-	    i__8 = j * c_dim1 + 7;
-	    q__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__15.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__3.r = q__4.r + q__15.r, q__3.i = q__4.i + q__15.i;
-	    i__9 = j * c_dim1 + 8;
-	    q__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__16.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    q__2.r = q__3.r + q__16.r, q__2.i = q__3.i + q__16.i;
-	    i__10 = j * c_dim1 + 9;
-	    q__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__17.i =
-		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
-	    q__1.r = q__2.r + q__17.r, q__1.i = q__2.i + q__17.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 7;
-	    i__3 = j * c_dim1 + 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 8;
-	    i__3 = j * c_dim1 + 8;
-	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 9;
-	    i__3 = j * c_dim1 + 9;
-	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
-		    sum.i * t9.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L180: */
-	}
-	goto L410;
-L190:
-
-/*        Special code for 10 x 10 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	r_cnjg(&q__1, &v[5]);
-	v5.r = q__1.r, v5.i = q__1.i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	r_cnjg(&q__1, &v[6]);
-	v6.r = q__1.r, v6.i = q__1.i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	r_cnjg(&q__1, &v[7]);
-	v7.r = q__1.r, v7.i = q__1.i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	r_cnjg(&q__1, &v[8]);
-	v8.r = q__1.r, v8.i = q__1.i;
-	r_cnjg(&q__2, &v8);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t8.r = q__1.r, t8.i = q__1.i;
-	r_cnjg(&q__1, &v[9]);
-	v9.r = q__1.r, v9.i = q__1.i;
-	r_cnjg(&q__2, &v9);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t9.r = q__1.r, t9.i = q__1.i;
-	r_cnjg(&q__1, &v[10]);
-	v10.r = q__1.r, v10.i = q__1.i;
-	r_cnjg(&q__2, &v10);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t10.r = q__1.r, t10.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__10.i = v1.r
-		    * c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__11.i = v2.r
-		    * c__[i__3].i + v2.i * c__[i__3].r;
-	    q__9.r = q__10.r + q__11.r, q__9.i = q__10.i + q__11.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__12.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    q__8.r = q__9.r + q__12.r, q__8.i = q__9.i + q__12.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__13.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__7.r = q__8.r + q__13.r, q__7.i = q__8.i + q__13.i;
-	    i__6 = j * c_dim1 + 5;
-	    q__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__14.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__6.r = q__7.r + q__14.r, q__6.i = q__7.i + q__14.i;
-	    i__7 = j * c_dim1 + 6;
-	    q__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__15.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__5.r = q__6.r + q__15.r, q__5.i = q__6.i + q__15.i;
-	    i__8 = j * c_dim1 + 7;
-	    q__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__16.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__4.r = q__5.r + q__16.r, q__4.i = q__5.i + q__16.i;
-	    i__9 = j * c_dim1 + 8;
-	    q__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__17.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    q__3.r = q__4.r + q__17.r, q__3.i = q__4.i + q__17.i;
-	    i__10 = j * c_dim1 + 9;
-	    q__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__18.i =
-		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
-	    q__2.r = q__3.r + q__18.r, q__2.i = q__3.i + q__18.i;
-	    i__11 = j * c_dim1 + 10;
-	    q__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, q__19.i =
-		    v10.r * c__[i__11].i + v10.i * c__[i__11].r;
-	    q__1.r = q__2.r + q__19.r, q__1.i = q__2.i + q__19.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 7;
-	    i__3 = j * c_dim1 + 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 8;
-	    i__3 = j * c_dim1 + 8;
-	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 9;
-	    i__3 = j * c_dim1 + 9;
-	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
-		    sum.i * t9.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 10;
-	    i__3 = j * c_dim1 + 10;
-	    q__2.r = sum.r * t10.r - sum.i * t10.i, q__2.i = sum.r * t10.i +
-		    sum.i * t10.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L200: */
-	}
-	goto L410;
-    } else {
-
-/*        Form  C * H, where H has order n. */
-
-	switch (*n) {
-	    case 1:  goto L210;
-	    case 2:  goto L230;
-	    case 3:  goto L250;
-	    case 4:  goto L270;
-	    case 5:  goto L290;
-	    case 6:  goto L310;
-	    case 7:  goto L330;
-	    case 8:  goto L350;
-	    case 9:  goto L370;
-	    case 10:  goto L390;
-	}
-
-/*
-          Code for general N
-
-          w := C * v
-*/
-
-	cgemv_("No transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1], &
-		c__1, &c_b55, &work[1], &c__1);
-
-/*        C := C - tau * w * v' */
-
-	q__1.r = -tau->r, q__1.i = -tau->i;
-	cgerc_(m, n, &q__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset],
-		ldc);
-	goto L410;
-L210:
-
-/*        Special code for 1 x 1 Householder */
-
-	q__3.r = tau->r * v[1].r - tau->i * v[1].i, q__3.i = tau->r * v[1].i
-		+ tau->i * v[1].r;
-	r_cnjg(&q__4, &v[1]);
-	q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i
-		+ q__3.i * q__4.r;
-	q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i;
-	t1.r = q__1.r, t1.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, q__1.i = t1.r *
-		    c__[i__3].i + t1.i * c__[i__3].r;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L220: */
-	}
-	goto L410;
-L230:
-
-/*        Special code for 2 x 2 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__2.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
-	    q__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__3.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L240: */
-	}
-	goto L410;
-L250:
-
-/*        Special code for 3 x 3 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__3.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
-	    q__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__4.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__5.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L260: */
-	}
-	goto L410;
-L270:
-
-/*        Special code for 4 x 4 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__4.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
-	    q__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__5.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__3.r = q__4.r + q__5.r, q__3.i = q__4.i + q__5.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__6.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__2.r = q__3.r + q__6.r, q__2.i = q__3.i + q__6.i;
-	    i__5 = j + ((c_dim1) << (2));
-	    q__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__7.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    q__1.r = q__2.r + q__7.r, q__1.i = q__2.i + q__7.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L280: */
-	}
-	goto L410;
-L290:
-
-/*        Special code for 5 x 5 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__5.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
-	    q__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__6.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__4.r = q__5.r + q__6.r, q__4.i = q__5.i + q__6.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__7.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i;
-	    i__5 = j + ((c_dim1) << (2));
-	    q__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__8.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    q__2.r = q__3.r + q__8.r, q__2.i = q__3.i + q__8.i;
-	    i__6 = j + c_dim1 * 5;
-	    q__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__9.i = v5.r *
-		    c__[i__6].i + v5.i * c__[i__6].r;
-	    q__1.r = q__2.r + q__9.r, q__1.i = q__2.i + q__9.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L300: */
-	}
-	goto L410;
-L310:
-
-/*        Special code for 6 x 6 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__6.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
-	    q__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__7.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__5.r = q__6.r + q__7.r, q__5.i = q__6.i + q__7.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__8.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__4.r = q__5.r + q__8.r, q__4.i = q__5.i + q__8.i;
-	    i__5 = j + ((c_dim1) << (2));
-	    q__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__9.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    q__3.r = q__4.r + q__9.r, q__3.i = q__4.i + q__9.i;
-	    i__6 = j + c_dim1 * 5;
-	    q__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__10.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__2.r = q__3.r + q__10.r, q__2.i = q__3.i + q__10.i;
-	    i__7 = j + c_dim1 * 6;
-	    q__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__11.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__1.r = q__2.r + q__11.r, q__1.i = q__2.i + q__11.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L320: */
-	}
-	goto L410;
-L330:
-
-/*        Special code for 7 x 7 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	v7.r = v[7].r, v7.i = v[7].i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__7.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
-	    q__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__8.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__6.r = q__7.r + q__8.r, q__6.i = q__7.i + q__8.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__9.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__5.r = q__6.r + q__9.r, q__5.i = q__6.i + q__9.i;
-	    i__5 = j + ((c_dim1) << (2));
-	    q__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__10.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__4.r = q__5.r + q__10.r, q__4.i = q__5.i + q__10.i;
-	    i__6 = j + c_dim1 * 5;
-	    q__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__11.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__3.r = q__4.r + q__11.r, q__3.i = q__4.i + q__11.i;
-	    i__7 = j + c_dim1 * 6;
-	    q__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__12.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__2.r = q__3.r + q__12.r, q__2.i = q__3.i + q__12.i;
-	    i__8 = j + c_dim1 * 7;
-	    q__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__13.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__1.r = q__2.r + q__13.r, q__1.i = q__2.i + q__13.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 7;
-	    i__3 = j + c_dim1 * 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L340: */
-	}
-	goto L410;
-L350:
-
-/*        Special code for 8 x 8 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	v7.r = v[7].r, v7.i = v[7].i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	v8.r = v[8].r, v8.i = v[8].i;
-	r_cnjg(&q__2, &v8);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t8.r = q__1.r, t8.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__8.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
-	    q__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__9.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__7.r = q__8.r + q__9.r, q__7.i = q__8.i + q__9.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__10.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    q__6.r = q__7.r + q__10.r, q__6.i = q__7.i + q__10.i;
-	    i__5 = j + ((c_dim1) << (2));
-	    q__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__11.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__5.r = q__6.r + q__11.r, q__5.i = q__6.i + q__11.i;
-	    i__6 = j + c_dim1 * 5;
-	    q__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__12.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__4.r = q__5.r + q__12.r, q__4.i = q__5.i + q__12.i;
-	    i__7 = j + c_dim1 * 6;
-	    q__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__13.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__3.r = q__4.r + q__13.r, q__3.i = q__4.i + q__13.i;
-	    i__8 = j + c_dim1 * 7;
-	    q__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__14.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__2.r = q__3.r + q__14.r, q__2.i = q__3.i + q__14.i;
-	    i__9 = j + ((c_dim1) << (3));
-	    q__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__15.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    q__1.r = q__2.r + q__15.r, q__1.i = q__2.i + q__15.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 7;
-	    i__3 = j + c_dim1 * 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (3));
-	    i__3 = j + ((c_dim1) << (3));
-	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L360: */
-	}
-	goto L410;
-L370:
-
-/*        Special code for 9 x 9 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	v7.r = v[7].r, v7.i = v[7].i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	v8.r = v[8].r, v8.i = v[8].i;
-	r_cnjg(&q__2, &v8);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t8.r = q__1.r, t8.i = q__1.i;
-	v9.r = v[9].r, v9.i = v[9].i;
-	r_cnjg(&q__2, &v9);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t9.r = q__1.r, t9.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__9.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
-	    q__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__10.i = v2.r
-		    * c__[i__3].i + v2.i * c__[i__3].r;
-	    q__8.r = q__9.r + q__10.r, q__8.i = q__9.i + q__10.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__11.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    q__7.r = q__8.r + q__11.r, q__7.i = q__8.i + q__11.i;
-	    i__5 = j + ((c_dim1) << (2));
-	    q__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__12.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__6.r = q__7.r + q__12.r, q__6.i = q__7.i + q__12.i;
-	    i__6 = j + c_dim1 * 5;
-	    q__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__13.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__5.r = q__6.r + q__13.r, q__5.i = q__6.i + q__13.i;
-	    i__7 = j + c_dim1 * 6;
-	    q__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__14.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__4.r = q__5.r + q__14.r, q__4.i = q__5.i + q__14.i;
-	    i__8 = j + c_dim1 * 7;
-	    q__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__15.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__3.r = q__4.r + q__15.r, q__3.i = q__4.i + q__15.i;
-	    i__9 = j + ((c_dim1) << (3));
-	    q__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__16.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    q__2.r = q__3.r + q__16.r, q__2.i = q__3.i + q__16.i;
-	    i__10 = j + c_dim1 * 9;
-	    q__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__17.i =
-		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
-	    q__1.r = q__2.r + q__17.r, q__1.i = q__2.i + q__17.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 7;
-	    i__3 = j + c_dim1 * 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (3));
-	    i__3 = j + ((c_dim1) << (3));
-	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 9;
-	    i__3 = j + c_dim1 * 9;
-	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
-		    sum.i * t9.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L380: */
-	}
-	goto L410;
-L390:
-
-/*        Special code for 10 x 10 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	v7.r = v[7].r, v7.i = v[7].i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	v8.r = v[8].r, v8.i = v[8].i;
-	r_cnjg(&q__2, &v8);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t8.r = q__1.r, t8.i = q__1.i;
-	v9.r = v[9].r, v9.i = v[9].i;
-	r_cnjg(&q__2, &v9);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t9.r = q__1.r, t9.i = q__1.i;
-	v10.r = v[10].r, v10.i = v[10].i;
-	r_cnjg(&q__2, &v10);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t10.r = q__1.r, t10.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__10.i = v1.r
-		    * c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
-	    q__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__11.i = v2.r
-		    * c__[i__3].i + v2.i * c__[i__3].r;
-	    q__9.r = q__10.r + q__11.r, q__9.i = q__10.i + q__11.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__12.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    q__8.r = q__9.r + q__12.r, q__8.i = q__9.i + q__12.i;
-	    i__5 = j + ((c_dim1) << (2));
-	    q__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__13.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__7.r = q__8.r + q__13.r, q__7.i = q__8.i + q__13.i;
-	    i__6 = j + c_dim1 * 5;
-	    q__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__14.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__6.r = q__7.r + q__14.r, q__6.i = q__7.i + q__14.i;
-	    i__7 = j + c_dim1 * 6;
-	    q__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__15.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__5.r = q__6.r + q__15.r, q__5.i = q__6.i + q__15.i;
-	    i__8 = j + c_dim1 * 7;
-	    q__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__16.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__4.r = q__5.r + q__16.r, q__4.i = q__5.i + q__16.i;
-	    i__9 = j + ((c_dim1) << (3));
-	    q__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__17.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    q__3.r = q__4.r + q__17.r, q__3.i = q__4.i + q__17.i;
-	    i__10 = j + c_dim1 * 9;
-	    q__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__18.i =
-		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
-	    q__2.r = q__3.r + q__18.r, q__2.i = q__3.i + q__18.i;
-	    i__11 = j + c_dim1 * 10;
-	    q__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, q__19.i =
-		    v10.r * c__[i__11].i + v10.i * c__[i__11].r;
-	    q__1.r = q__2.r + q__19.r, q__1.i = q__2.i + q__19.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 7;
-	    i__3 = j + c_dim1 * 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + ((c_dim1) << (3));
-	    i__3 = j + ((c_dim1) << (3));
-	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 9;
-	    i__3 = j + c_dim1 * 9;
-	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
-		    sum.i * t9.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 10;
-	    i__3 = j + c_dim1 * 10;
-	    q__2.r = sum.r * t10.r - sum.i * t10.i, q__2.i = sum.r * t10.i +
-		    sum.i * t10.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L400: */
-	}
-	goto L410;
-    }
-L410:
-    return 0;
-
-/*     End of CLARFX */
-
-} /* clarfx_ */
-
-/* Subroutine */ int clascl_(char *type__, integer *kl, integer *ku, real *
-	cfrom, real *cto, integer *m, integer *n, complex *a, integer *lda,
-	integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1;
-
-    /* Local variables */
-    static integer i__, j, k1, k2, k3, k4;
-    static real mul, cto1;
-    static logical done;
-    static real ctoc;
-    extern logical lsame_(char *, char *);
-    static integer itype;
-    static real cfrom1;
-    extern doublereal slamch_(char *);
-    static real cfromc;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static real bignum, smlnum;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    CLASCL multiplies the M by N complex matrix A by the real scalar
-    CTO/CFROM.  This is done without over/underflow as long as the final
-    result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
-    A may be full, upper triangular, lower triangular, upper Hessenberg,
-    or banded.
-
-    Arguments
-    =========
-
-    TYPE    (input) CHARACTER*1
-            TYPE indices the storage type of the input matrix.
-            = 'G':  A is a full matrix.
-            = 'L':  A is a lower triangular matrix.
-            = 'U':  A is an upper triangular matrix.
-            = 'H':  A is an upper Hessenberg matrix.
-            = 'B':  A is a symmetric band matrix with lower bandwidth KL
-                    and upper bandwidth KU and with the only the lower
-                    half stored.
-            = 'Q':  A is a symmetric band matrix with lower bandwidth KL
-                    and upper bandwidth KU and with the only the upper
-                    half stored.
-            = 'Z':  A is a band matrix with lower bandwidth KL and upper
-                    bandwidth KU.
-
-    KL      (input) INTEGER
-            The lower bandwidth of A.  Referenced only if TYPE = 'B',
-            'Q' or 'Z'.
-
-    KU      (input) INTEGER
-            The upper bandwidth of A.  Referenced only if TYPE = 'B',
-            'Q' or 'Z'.
-
-    CFROM   (input) REAL
-    CTO     (input) REAL
-            The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
-            without over/underflow if the final result CTO*A(I,J)/CFROM
-            can be represented without over/underflow.  CFROM must be
-            nonzero.
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,M)
-            The matrix to be multiplied by CTO/CFROM.  See TYPE for the
-            storage type.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    INFO    (output) INTEGER
-            0  - successful exit
-            <0 - if INFO = -i, the i-th argument had an illegal value.
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-
-    if (lsame_(type__, "G")) {
-	itype = 0;
-    } else if (lsame_(type__, "L")) {
-	itype = 1;
-    } else if (lsame_(type__, "U")) {
-	itype = 2;
-    } else if (lsame_(type__, "H")) {
-	itype = 3;
-    } else if (lsame_(type__, "B")) {
-	itype = 4;
-    } else if (lsame_(type__, "Q")) {
-	itype = 5;
-    } else if (lsame_(type__, "Z")) {
-	itype = 6;
-    } else {
-	itype = -1;
-    }
-
-    if (itype == -1) {
-	*info = -1;
-    } else if (*cfrom == 0.f) {
-	*info = -4;
-    } else if (*m < 0) {
-	*info = -6;
-    } else if (((*n < 0) || (itype == 4 && *n != *m)) || (itype == 5 && *n !=
-	    *m)) {
-	*info = -7;
-    } else if (itype <= 3 && *lda < max(1,*m)) {
-	*info = -9;
-    } else if (itype >= 4) {
-/* Computing MAX */
-	i__1 = *m - 1;
-	if ((*kl < 0) || (*kl > max(i__1,0))) {
-	    *info = -2;
-	} else /* if(complicated condition) */ {
-/* Computing MAX */
-	    i__1 = *n - 1;
-	    if (((*ku < 0) || (*ku > max(i__1,0))) || (((itype == 4) || (
-		    itype == 5)) && *kl != *ku)) {
-		*info = -3;
-	    } else if (((itype == 4 && *lda < *kl + 1) || (itype == 5 && *lda
-		    < *ku + 1)) || (itype == 6 && *lda < ((*kl) << (1)) + *ku
-		    + 1)) {
-		*info = -9;
-	    }
-	}
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CLASCL", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*n == 0) || (*m == 0)) {
-	return 0;
-    }
-
-/*     Get machine parameters */
-
-    smlnum = slamch_("S");
-    bignum = 1.f / smlnum;
-
-    cfromc = *cfrom;
-    ctoc = *cto;
-
-L10:
-    cfrom1 = cfromc * smlnum;
-    cto1 = ctoc / bignum;
-    if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) {
-	mul = smlnum;
-	done = FALSE_;
-	cfromc = cfrom1;
-    } else if (dabs(cto1) > dabs(cfromc)) {
-	mul = bignum;
-	done = FALSE_;
-	ctoc = cto1;
-    } else {
-	mul = ctoc / cfromc;
-	done = TRUE_;
-    }
-
-    if (itype == 0) {
-
-/*        Full matrix */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * a_dim1;
-		i__4 = i__ + j * a_dim1;
-		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
-		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L20: */
-	    }
-/* L30: */
-	}
-
-    } else if (itype == 1) {
-
-/*        Lower triangular matrix */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = j; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * a_dim1;
-		i__4 = i__ + j * a_dim1;
-		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
-		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L40: */
-	    }
-/* L50: */
-	}
-
-    } else if (itype == 2) {
-
-/*        Upper triangular matrix */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = min(j,*m);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * a_dim1;
-		i__4 = i__ + j * a_dim1;
-		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
-		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L60: */
-	    }
-/* L70: */
-	}
-
-    } else if (itype == 3) {
-
-/*        Upper Hessenberg matrix */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = j + 1;
-	    i__2 = min(i__3,*m);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * a_dim1;
-		i__4 = i__ + j * a_dim1;
-		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
-		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L80: */
-	    }
-/* L90: */
-	}
-
-    } else if (itype == 4) {
-
-/*        Lower half of a symmetric band matrix */
-
-	k3 = *kl + 1;
-	k4 = *n + 1;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = k3, i__4 = k4 - j;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * a_dim1;
-		i__4 = i__ + j * a_dim1;
-		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
-		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L100: */
-	    }
-/* L110: */
-	}
-
-    } else if (itype == 5) {
-
-/*        Upper half of a symmetric band matrix */
-
-	k1 = *ku + 2;
-	k3 = *ku + 1;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MAX */
-	    i__2 = k1 - j;
-	    i__3 = k3;
-	    for (i__ = max(i__2,1); i__ <= i__3; ++i__) {
-		i__2 = i__ + j * a_dim1;
-		i__4 = i__ + j * a_dim1;
-		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
-		a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-/* L120: */
-	    }
-/* L130: */
-	}
-
-    } else if (itype == 6) {
-
-/*        Band matrix */
-
-	k1 = *kl + *ku + 2;
-	k2 = *kl + 1;
-	k3 = ((*kl) << (1)) + *ku + 1;
-	k4 = *kl + *ku + 1 + *m;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MAX */
-	    i__3 = k1 - j;
-/* Computing MIN */
-	    i__4 = k3, i__5 = k4 - j;
-	    i__2 = min(i__4,i__5);
-	    for (i__ = max(i__3,k2); i__ <= i__2; ++i__) {
-		i__3 = i__ + j * a_dim1;
-		i__4 = i__ + j * a_dim1;
-		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
-		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L140: */
-	    }
-/* L150: */
-	}
-
-    }
-
-    if (! done) {
-	goto L10;
-    }
-
-    return 0;
-
-/*     End of CLASCL */
-
-} /* clascl_ */
-
-/* Subroutine */ int claset_(char *uplo, integer *m, integer *n, complex *
-	alpha, complex *beta, complex *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j;
-    extern logical lsame_(char *, char *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    CLASET initializes a 2-D array A to BETA on the diagonal and
-    ALPHA on the offdiagonals.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies the part of the matrix A to be set.
-            = 'U':      Upper triangular part is set. The lower triangle
-                        is unchanged.
-            = 'L':      Lower triangular part is set. The upper triangle
-                        is unchanged.
-            Otherwise:  All of the matrix A is set.
-
-    M       (input) INTEGER
-            On entry, M specifies the number of rows of A.
-
-    N       (input) INTEGER
-            On entry, N specifies the number of columns of A.
-
-    ALPHA   (input) COMPLEX
-            All the offdiagonal array elements are set to ALPHA.
-
-    BETA    (input) COMPLEX
-            All the diagonal array elements are set to BETA.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the m by n matrix A.
-            On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j;
-                     A(i,i) = BETA , 1 <= i <= min(m,n)
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    if (lsame_(uplo, "U")) {
-
-/*
-          Set the diagonal to BETA and the strictly upper triangular
-          part of the array to ALPHA.
-*/
-
-	i__1 = *n;
-	for (j = 2; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = j - 1;
-	    i__2 = min(i__3,*m);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * a_dim1;
-		a[i__3].r = alpha->r, a[i__3].i = alpha->i;
-/* L10: */
-	    }
-/* L20: */
-	}
-	i__1 = min(*n,*m);
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = i__ + i__ * a_dim1;
-	    a[i__2].r = beta->r, a[i__2].i = beta->i;
-/* L30: */
-	}
-
-    } else if (lsame_(uplo, "L")) {
-
-/*
-          Set the diagonal to BETA and the strictly lower triangular
-          part of the array to ALPHA.
-*/
-
-	i__1 = min(*m,*n);
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = j + 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * a_dim1;
-		a[i__3].r = alpha->r, a[i__3].i = alpha->i;
-/* L40: */
-	    }
-/* L50: */
-	}
-	i__1 = min(*n,*m);
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = i__ + i__ * a_dim1;
-	    a[i__2].r = beta->r, a[i__2].i = beta->i;
-/* L60: */
-	}
-
-    } else {
-
-/*
-          Set the array to BETA on the diagonal and ALPHA on the
-          offdiagonal.
-*/
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * a_dim1;
-		a[i__3].r = alpha->r, a[i__3].i = alpha->i;
-/* L70: */
-	    }
-/* L80: */
-	}
-	i__1 = min(*m,*n);
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = i__ + i__ * a_dim1;
-	    a[i__2].r = beta->r, a[i__2].i = beta->i;
-/* L90: */
-	}
-    }
-
-    return 0;
-
-/*     End of CLASET */
-
-} /* claset_ */
-
-/* Subroutine */ int clasr_(char *side, char *pivot, char *direct, integer *m,
-	 integer *n, real *c__, real *s, complex *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    complex q__1, q__2, q__3;
-
-    /* Local variables */
-    static integer i__, j, info;
-    static complex temp;
-    extern logical lsame_(char *, char *);
-    static real ctemp, stemp;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    CLASR   performs the transformation
-
-       A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
-
-       A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
-
-    where A is an m by n complex matrix and P is an orthogonal matrix,
-    consisting of a sequence of plane rotations determined by the
-    parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
-    and z = n when SIDE = 'R' or 'r' ):
-
-    When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
-
-       P = P( z - 1 )*...*P( 2 )*P( 1 ),
-
-    and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
-
-       P = P( 1 )*P( 2 )*...*P( z - 1 ),
-
-    where  P( k ) is a plane rotation matrix for the following planes:
-
-       when  PIVOT = 'V' or 'v'  ( Variable pivot ),
-          the plane ( k, k + 1 )
-
-       when  PIVOT = 'T' or 't'  ( Top pivot ),
-          the plane ( 1, k + 1 )
-
-       when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
-          the plane ( k, z )
-
-    c( k ) and s( k )  must contain the  cosine and sine that define the
-    matrix  P( k ).  The two by two plane rotation part of the matrix
-    P( k ), R( k ), is assumed to be of the form
-
-       R( k ) = (  c( k )  s( k ) ).
-                ( -s( k )  c( k ) )
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            Specifies whether the plane rotation matrix P is applied to
-            A on the left or the right.
-            = 'L':  Left, compute A := P*A
-            = 'R':  Right, compute A:= A*P'
-
-    DIRECT  (input) CHARACTER*1
-            Specifies whether P is a forward or backward sequence of
-            plane rotations.
-            = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
-            = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
-
-    PIVOT   (input) CHARACTER*1
-            Specifies the plane for which P(k) is a plane rotation
-            matrix.
-            = 'V':  Variable pivot, the plane (k,k+1)
-            = 'T':  Top pivot, the plane (1,k+1)
-            = 'B':  Bottom pivot, the plane (k,z)
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  If m <= 1, an immediate
-            return is effected.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  If n <= 1, an
-            immediate return is effected.
-
-    C, S    (input) REAL arrays, dimension
-                    (M-1) if SIDE = 'L'
-                    (N-1) if SIDE = 'R'
-            c(k) and s(k) contain the cosine and sine that define the
-            matrix P(k).  The two by two plane rotation part of the
-            matrix P(k), R(k), is assumed to be of the form
-            R( k ) = (  c( k )  s( k ) ).
-                     ( -s( k )  c( k ) )
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            The m by n matrix A.  On exit, A is overwritten by P*A if
-            SIDE = 'R' or by A*P' if SIDE = 'L'.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    --c__;
-    --s;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (! ((lsame_(side, "L")) || (lsame_(side, "R")))) {
-	info = 1;
-    } else if (! (((lsame_(pivot, "V")) || (lsame_(
-	    pivot, "T"))) || (lsame_(pivot, "B")))) {
-	info = 2;
-    } else if (! ((lsame_(direct, "F")) || (lsame_(
-	    direct, "B")))) {
-	info = 3;
-    } else if (*m < 0) {
-	info = 4;
-    } else if (*n < 0) {
-	info = 5;
-    } else if (*lda < max(1,*m)) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("CLASR ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*m == 0) || (*n == 0)) {
-	return 0;
-    }
-    if (lsame_(side, "L")) {
-
-/*        Form  P * A */
-
-	if (lsame_(pivot, "V")) {
-	    if (lsame_(direct, "F")) {
-		i__1 = *m - 1;
-		for (j = 1; j <= i__1; ++j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
-			i__2 = *n;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = j + 1 + i__ * a_dim1;
-			    temp.r = a[i__3].r, temp.i = a[i__3].i;
-			    i__3 = j + 1 + i__ * a_dim1;
-			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
-			    i__4 = j + i__ * a_dim1;
-			    q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[
-				    i__4].i;
-			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
-				    q__3.i;
-			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-			    i__3 = j + i__ * a_dim1;
-			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
-			    i__4 = j + i__ * a_dim1;
-			    q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[
-				    i__4].i;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L10: */
-			}
-		    }
-/* L20: */
-		}
-	    } else if (lsame_(direct, "B")) {
-		for (j = *m - 1; j >= 1; --j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
-			i__1 = *n;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    i__2 = j + 1 + i__ * a_dim1;
-			    temp.r = a[i__2].r, temp.i = a[i__2].i;
-			    i__2 = j + 1 + i__ * a_dim1;
-			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
-			    i__3 = j + i__ * a_dim1;
-			    q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[
-				    i__3].i;
-			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
-				    q__3.i;
-			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-			    i__2 = j + i__ * a_dim1;
-			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
-			    i__3 = j + i__ * a_dim1;
-			    q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[
-				    i__3].i;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-/* L30: */
-			}
-		    }
-/* L40: */
-		}
-	    }
-	} else if (lsame_(pivot, "T")) {
-	    if (lsame_(direct, "F")) {
-		i__1 = *m;
-		for (j = 2; j <= i__1; ++j) {
-		    ctemp = c__[j - 1];
-		    stemp = s[j - 1];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
-			i__2 = *n;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = j + i__ * a_dim1;
-			    temp.r = a[i__3].r, temp.i = a[i__3].i;
-			    i__3 = j + i__ * a_dim1;
-			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
-			    i__4 = i__ * a_dim1 + 1;
-			    q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[
-				    i__4].i;
-			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
-				    q__3.i;
-			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-			    i__3 = i__ * a_dim1 + 1;
-			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
-			    i__4 = i__ * a_dim1 + 1;
-			    q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[
-				    i__4].i;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L50: */
-			}
-		    }
-/* L60: */
-		}
-	    } else if (lsame_(direct, "B")) {
-		for (j = *m; j >= 2; --j) {
-		    ctemp = c__[j - 1];
-		    stemp = s[j - 1];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
-			i__1 = *n;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    i__2 = j + i__ * a_dim1;
-			    temp.r = a[i__2].r, temp.i = a[i__2].i;
-			    i__2 = j + i__ * a_dim1;
-			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
-			    i__3 = i__ * a_dim1 + 1;
-			    q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[
-				    i__3].i;
-			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
-				    q__3.i;
-			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-			    i__2 = i__ * a_dim1 + 1;
-			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
-			    i__3 = i__ * a_dim1 + 1;
-			    q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[
-				    i__3].i;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-/* L70: */
-			}
-		    }
-/* L80: */
-		}
-	    }
-	} else if (lsame_(pivot, "B")) {
-	    if (lsame_(direct, "F")) {
-		i__1 = *m - 1;
-		for (j = 1; j <= i__1; ++j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
-			i__2 = *n;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = j + i__ * a_dim1;
-			    temp.r = a[i__3].r, temp.i = a[i__3].i;
-			    i__3 = j + i__ * a_dim1;
-			    i__4 = *m + i__ * a_dim1;
-			    q__2.r = stemp * a[i__4].r, q__2.i = stemp * a[
-				    i__4].i;
-			    q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-			    i__3 = *m + i__ * a_dim1;
-			    i__4 = *m + i__ * a_dim1;
-			    q__2.r = ctemp * a[i__4].r, q__2.i = ctemp * a[
-				    i__4].i;
-			    q__3.r = stemp * temp.r, q__3.i = stemp * temp.i;
-			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
-				    q__3.i;
-			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L90: */
-			}
-		    }
-/* L100: */
-		}
-	    } else if (lsame_(direct, "B")) {
-		for (j = *m - 1; j >= 1; --j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
-			i__1 = *n;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    i__2 = j + i__ * a_dim1;
-			    temp.r = a[i__2].r, temp.i = a[i__2].i;
-			    i__2 = j + i__ * a_dim1;
-			    i__3 = *m + i__ * a_dim1;
-			    q__2.r = stemp * a[i__3].r, q__2.i = stemp * a[
-				    i__3].i;
-			    q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-			    i__2 = *m + i__ * a_dim1;
-			    i__3 = *m + i__ * a_dim1;
-			    q__2.r = ctemp * a[i__3].r, q__2.i = ctemp * a[
-				    i__3].i;
-			    q__3.r = stemp * temp.r, q__3.i = stemp * temp.i;
-			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
-				    q__3.i;
-			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-/* L110: */
-			}
-		    }
-/* L120: */
-		}
-	    }
-	}
-    } else if (lsame_(side, "R")) {
-
-/*        Form A * P' */
-
-	if (lsame_(pivot, "V")) {
-	    if (lsame_(direct, "F")) {
-		i__1 = *n - 1;
-		for (j = 1; j <= i__1; ++j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + (j + 1) * a_dim1;
-			    temp.r = a[i__3].r, temp.i = a[i__3].i;
-			    i__3 = i__ + (j + 1) * a_dim1;
-			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
-			    i__4 = i__ + j * a_dim1;
-			    q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[
-				    i__4].i;
-			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
-				    q__3.i;
-			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-			    i__3 = i__ + j * a_dim1;
-			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
-			    i__4 = i__ + j * a_dim1;
-			    q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[
-				    i__4].i;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L130: */
-			}
-		    }
-/* L140: */
-		}
-	    } else if (lsame_(direct, "B")) {
-		for (j = *n - 1; j >= 1; --j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    i__2 = i__ + (j + 1) * a_dim1;
-			    temp.r = a[i__2].r, temp.i = a[i__2].i;
-			    i__2 = i__ + (j + 1) * a_dim1;
-			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
-			    i__3 = i__ + j * a_dim1;
-			    q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[
-				    i__3].i;
-			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
-				    q__3.i;
-			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-			    i__2 = i__ + j * a_dim1;
-			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
-			    i__3 = i__ + j * a_dim1;
-			    q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[
-				    i__3].i;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-/* L150: */
-			}
-		    }
-/* L160: */
-		}
-	    }
-	} else if (lsame_(pivot, "T")) {
-	    if (lsame_(direct, "F")) {
-		i__1 = *n;
-		for (j = 2; j <= i__1; ++j) {
-		    ctemp = c__[j - 1];
-		    stemp = s[j - 1];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + j * a_dim1;
-			    temp.r = a[i__3].r, temp.i = a[i__3].i;
-			    i__3 = i__ + j * a_dim1;
-			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
-			    i__4 = i__ + a_dim1;
-			    q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[
-				    i__4].i;
-			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
-				    q__3.i;
-			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-			    i__3 = i__ + a_dim1;
-			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
-			    i__4 = i__ + a_dim1;
-			    q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[
-				    i__4].i;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L170: */
-			}
-		    }
-/* L180: */
-		}
-	    } else if (lsame_(direct, "B")) {
-		for (j = *n; j >= 2; --j) {
-		    ctemp = c__[j - 1];
-		    stemp = s[j - 1];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    i__2 = i__ + j * a_dim1;
-			    temp.r = a[i__2].r, temp.i = a[i__2].i;
-			    i__2 = i__ + j * a_dim1;
-			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
-			    i__3 = i__ + a_dim1;
-			    q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[
-				    i__3].i;
-			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
-				    q__3.i;
-			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-			    i__2 = i__ + a_dim1;
-			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
-			    i__3 = i__ + a_dim1;
-			    q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[
-				    i__3].i;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-/* L190: */
-			}
-		    }
-/* L200: */
-		}
-	    }
-	} else if (lsame_(pivot, "B")) {
-	    if (lsame_(direct, "F")) {
-		i__1 = *n - 1;
-		for (j = 1; j <= i__1; ++j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    i__3 = i__ + j * a_dim1;
-			    temp.r = a[i__3].r, temp.i = a[i__3].i;
-			    i__3 = i__ + j * a_dim1;
-			    i__4 = i__ + *n * a_dim1;
-			    q__2.r = stemp * a[i__4].r, q__2.i = stemp * a[
-				    i__4].i;
-			    q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-			    i__3 = i__ + *n * a_dim1;
-			    i__4 = i__ + *n * a_dim1;
-			    q__2.r = ctemp * a[i__4].r, q__2.i = ctemp * a[
-				    i__4].i;
-			    q__3.r = stemp * temp.r, q__3.i = stemp * temp.i;
-			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
-				    q__3.i;
-			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L210: */
-			}
-		    }
-/* L220: */
-		}
-	    } else if (lsame_(direct, "B")) {
-		for (j = *n - 1; j >= 1; --j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    i__2 = i__ + j * a_dim1;
-			    temp.r = a[i__2].r, temp.i = a[i__2].i;
-			    i__2 = i__ + j * a_dim1;
-			    i__3 = i__ + *n * a_dim1;
-			    q__2.r = stemp * a[i__3].r, q__2.i = stemp * a[
-				    i__3].i;
-			    q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i;
-			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
-				    q__3.i;
-			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-			    i__2 = i__ + *n * a_dim1;
-			    i__3 = i__ + *n * a_dim1;
-			    q__2.r = ctemp * a[i__3].r, q__2.i = ctemp * a[
-				    i__3].i;
-			    q__3.r = stemp * temp.r, q__3.i = stemp * temp.i;
-			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
-				    q__3.i;
-			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-/* L230: */
-			}
-		    }
-/* L240: */
-		}
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of CLASR */
-
-} /* clasr_ */
-
-/* Subroutine */ int classq_(integer *n, complex *x, integer *incx, real *
-	scale, real *sumsq)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    real r__1;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-
-    /* Local variables */
-    static integer ix;
-    static real temp1;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CLASSQ returns the values scl and ssq such that
-
-       ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
-
-    where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is
-    assumed to be at least unity and the value of ssq will then satisfy
-
-       1.0 .le. ssq .le. ( sumsq + 2*n ).
-
-    scale is assumed to be non-negative and scl returns the value
-
-       scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ),
-              i
-
-    scale and sumsq must be supplied in SCALE and SUMSQ respectively.
-    SCALE and SUMSQ are overwritten by scl and ssq respectively.
-
-    The routine makes only one pass through the vector X.
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The number of elements to be used from the vector X.
-
-    X       (input) COMPLEX array, dimension (N)
-            The vector x as described above.
-               x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
-
-    INCX    (input) INTEGER
-            The increment between successive values of the vector X.
-            INCX > 0.
-
-    SCALE   (input/output) REAL
-            On entry, the value  scale  in the equation above.
-            On exit, SCALE is overwritten with the value  scl .
-
-    SUMSQ   (input/output) REAL
-            On entry, the value  sumsq  in the equation above.
-            On exit, SUMSQ is overwritten with the value  ssq .
-
-   =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if (*n > 0) {
-	i__1 = (*n - 1) * *incx + 1;
-	i__2 = *incx;
-	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-	    i__3 = ix;
-	    if (x[i__3].r != 0.f) {
-		i__3 = ix;
-		temp1 = (r__1 = x[i__3].r, dabs(r__1));
-		if (*scale < temp1) {
-/* Computing 2nd power */
-		    r__1 = *scale / temp1;
-		    *sumsq = *sumsq * (r__1 * r__1) + 1;
-		    *scale = temp1;
-		} else {
-/* Computing 2nd power */
-		    r__1 = temp1 / *scale;
-		    *sumsq += r__1 * r__1;
-		}
-	    }
-	    if (r_imag(&x[ix]) != 0.f) {
-		temp1 = (r__1 = r_imag(&x[ix]), dabs(r__1));
-		if (*scale < temp1) {
-/* Computing 2nd power */
-		    r__1 = *scale / temp1;
-		    *sumsq = *sumsq * (r__1 * r__1) + 1;
-		    *scale = temp1;
-		} else {
-/* Computing 2nd power */
-		    r__1 = temp1 / *scale;
-		    *sumsq += r__1 * r__1;
-		}
-	    }
-/* L10: */
-	}
-    }
-
-    return 0;
-
-/*     End of CLASSQ */
-
-} /* classq_ */
-
-/* Subroutine */ int claswp_(integer *n, complex *a, integer *lda, integer *
-	k1, integer *k2, integer *ipiv, integer *incx)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-
-    /* Local variables */
-    static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc;
-    static complex temp;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CLASWP performs a series of row interchanges on the matrix A.
-    One row interchange is initiated for each of rows K1 through K2 of A.
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the matrix of column dimension N to which the row
-            interchanges will be applied.
-            On exit, the permuted matrix.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-
-    K1      (input) INTEGER
-            The first element of IPIV for which a row interchange will
-            be done.
-
-    K2      (input) INTEGER
-            The last element of IPIV for which a row interchange will
-            be done.
-
-    IPIV    (input) INTEGER array, dimension (M*abs(INCX))
-            The vector of pivot indices.  Only the elements in positions
-            K1 through K2 of IPIV are accessed.
-            IPIV(K) = L implies rows K and L are to be interchanged.
-
-    INCX    (input) INTEGER
-            The increment between successive values of IPIV.  If IPIV
-            is negative, the pivots are applied in reverse order.
-
-    Further Details
-    ===============
-
-    Modified by
-     R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA
-
-   =====================================================================
-
-
-       Interchange row I with row IPIV(I) for each of rows K1 through K2.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-
-    /* Function Body */
-    if (*incx > 0) {
-	ix0 = *k1;
-	i1 = *k1;
-	i2 = *k2;
-	inc = 1;
-    } else if (*incx < 0) {
-	ix0 = (1 - *k2) * *incx + 1;
-	i1 = *k2;
-	i2 = *k1;
-	inc = -1;
-    } else {
-	return 0;
-    }
-
-    n32 = (*n / 32) << (5);
-    if (n32 != 0) {
-	i__1 = n32;
-	for (j = 1; j <= i__1; j += 32) {
-	    ix = ix0;
-	    i__2 = i2;
-	    i__3 = inc;
-	    for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3)
-		    {
-		ip = ipiv[ix];
-		if (ip != i__) {
-		    i__4 = j + 31;
-		    for (k = j; k <= i__4; ++k) {
-			i__5 = i__ + k * a_dim1;
-			temp.r = a[i__5].r, temp.i = a[i__5].i;
-			i__5 = i__ + k * a_dim1;
-			i__6 = ip + k * a_dim1;
-			a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i;
-			i__5 = ip + k * a_dim1;
-			a[i__5].r = temp.r, a[i__5].i = temp.i;
-/* L10: */
-		    }
-		}
-		ix += *incx;
-/* L20: */
-	    }
-/* L30: */
-	}
-    }
-    if (n32 != *n) {
-	++n32;
-	ix = ix0;
-	i__1 = i2;
-	i__3 = inc;
-	for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) {
-	    ip = ipiv[ix];
-	    if (ip != i__) {
-		i__2 = *n;
-		for (k = n32; k <= i__2; ++k) {
-		    i__4 = i__ + k * a_dim1;
-		    temp.r = a[i__4].r, temp.i = a[i__4].i;
-		    i__4 = i__ + k * a_dim1;
-		    i__5 = ip + k * a_dim1;
-		    a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i;
-		    i__4 = ip + k * a_dim1;
-		    a[i__4].r = temp.r, a[i__4].i = temp.i;
-/* L40: */
-		}
-	    }
-	    ix += *incx;
-/* L50: */
-	}
-    }
-
-    return 0;
-
-/*     End of CLASWP */
-
-} /* claswp_ */
-
-/* Subroutine */ int clatrd_(char *uplo, integer *n, integer *nb, complex *a,
-	integer *lda, real *e, complex *tau, complex *w, integer *ldw)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Local variables */
-    static integer i__, iw;
-    static complex alpha;
-    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *);
-    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
-	    *, complex *, integer *);
-    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
-	    , complex *, integer *, complex *, integer *, complex *, complex *
-	    , integer *), chemv_(char *, integer *, complex *,
-	    complex *, integer *, complex *, integer *, complex *, complex *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *,
-	    integer *, complex *, integer *), clarfg_(integer *, complex *,
-	    complex *, integer *, complex *), clacgv_(integer *, complex *,
-	    integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CLATRD reduces NB rows and columns of a complex Hermitian matrix A to
-    Hermitian tridiagonal form by a unitary similarity
-    transformation Q' * A * Q, and returns the matrices V and W which are
-    needed to apply the transformation to the unreduced part of A.
-
-    If UPLO = 'U', CLATRD reduces the last NB rows and columns of a
-    matrix, of which the upper triangle is supplied;
-    if UPLO = 'L', CLATRD reduces the first NB rows and columns of a
-    matrix, of which the lower triangle is supplied.
-
-    This is an auxiliary routine called by CHETRD.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER
-            Specifies whether the upper or lower triangular part of the
-            Hermitian matrix A is stored:
-            = 'U': Upper triangular
-            = 'L': Lower triangular
-
-    N       (input) INTEGER
-            The order of the matrix A.
-
-    NB      (input) INTEGER
-            The number of rows and columns to be reduced.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
-            n-by-n upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading n-by-n lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
-            On exit:
-            if UPLO = 'U', the last NB columns have been reduced to
-              tridiagonal form, with the diagonal elements overwriting
-              the diagonal elements of A; the elements above the diagonal
-              with the array TAU, represent the unitary matrix Q as a
-              product of elementary reflectors;
-            if UPLO = 'L', the first NB columns have been reduced to
-              tridiagonal form, with the diagonal elements overwriting
-              the diagonal elements of A; the elements below the diagonal
-              with the array TAU, represent the  unitary matrix Q as a
-              product of elementary reflectors.
-            See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    E       (output) REAL array, dimension (N-1)
-            If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
-            elements of the last NB columns of the reduced matrix;
-            if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
-            the first NB columns of the reduced matrix.
-
-    TAU     (output) COMPLEX array, dimension (N-1)
-            The scalar factors of the elementary reflectors, stored in
-            TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
-            See Further Details.
-
-    W       (output) COMPLEX array, dimension (LDW,NB)
-            The n-by-nb matrix W required to update the unreduced part
-            of A.
-
-    LDW     (input) INTEGER
-            The leading dimension of the array W. LDW >= max(1,N).
-
-    Further Details
-    ===============
-
-    If UPLO = 'U', the matrix Q is represented as a product of elementary
-    reflectors
-
-       Q = H(n) H(n-1) . . . H(n-nb+1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
-    and tau in TAU(i-1).
-
-    If UPLO = 'L', the matrix Q is represented as a product of elementary
-    reflectors
-
-       Q = H(1) H(2) . . . H(nb).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a complex scalar, and v is a complex vector with
-    v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
-    and tau in TAU(i).
-
-    The elements of the vectors v together form the n-by-nb matrix V
-    which is needed, with W, to apply the transformation to the unreduced
-    part of the matrix, using a Hermitian rank-2k update of the form:
-    A := A - V*W' - W*V'.
-
-    The contents of A on exit are illustrated by the following examples
-    with n = 5 and nb = 2:
-
-    if UPLO = 'U':                       if UPLO = 'L':
-
-      (  a   a   a   v4  v5 )              (  d                  )
-      (      a   a   v4  v5 )              (  1   d              )
-      (          a   1   v5 )              (  v1  1   a          )
-      (              d   1  )              (  v1  v2  a   a      )
-      (                  d  )              (  v1  v2  a   a   a  )
-
-    where d denotes a diagonal element of the reduced matrix, a denotes
-    an element of the original matrix that is unchanged, and vi denotes
-    an element of the vector defining H(i).
-
-    =====================================================================
-
-
-       Quick return if possible
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --e;
-    --tau;
-    w_dim1 = *ldw;
-    w_offset = 1 + w_dim1;
-    w -= w_offset;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-
-    if (lsame_(uplo, "U")) {
-
-/*        Reduce last NB columns of upper triangle */
-
-	i__1 = *n - *nb + 1;
-	for (i__ = *n; i__ >= i__1; --i__) {
-	    iw = i__ - *n + *nb;
-	    if (i__ < *n) {
-
-/*              Update A(1:i,i) */
-
-		i__2 = i__ + i__ * a_dim1;
-		i__3 = i__ + i__ * a_dim1;
-		r__1 = a[i__3].r;
-		a[i__2].r = r__1, a[i__2].i = 0.f;
-		i__2 = *n - i__;
-		clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw);
-		i__2 = *n - i__;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__, &i__2, &q__1, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
-			c_b56, &a[i__ * a_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw);
-		i__2 = *n - i__;
-		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
-		i__2 = *n - i__;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__, &i__2, &q__1, &w[(iw + 1) *
-			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b56, &a[i__ * a_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
-		i__2 = i__ + i__ * a_dim1;
-		i__3 = i__ + i__ * a_dim1;
-		r__1 = a[i__3].r;
-		a[i__2].r = r__1, a[i__2].i = 0.f;
-	    }
-	    if (i__ > 1) {
-
-/*
-                Generate elementary reflector H(i) to annihilate
-                A(1:i-2,i)
-*/
-
-		i__2 = i__ - 1 + i__ * a_dim1;
-		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-		i__2 = i__ - 1;
-		clarfg_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &tau[i__
-			- 1]);
-		i__2 = i__ - 1;
-		e[i__2] = alpha.r;
-		i__2 = i__ - 1 + i__ * a_dim1;
-		a[i__2].r = 1.f, a[i__2].i = 0.f;
-
-/*              Compute W(1:i-1,i) */
-
-		i__2 = i__ - 1;
-		chemv_("Upper", &i__2, &c_b56, &a[a_offset], lda, &a[i__ *
-			a_dim1 + 1], &c__1, &c_b55, &w[iw * w_dim1 + 1], &
-			c__1);
-		if (i__ < *n) {
-		    i__2 = i__ - 1;
-		    i__3 = *n - i__;
-		    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &w[(
-			    iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1],
-			    &c__1, &c_b55, &w[i__ + 1 + iw * w_dim1], &c__1);
-		    i__2 = i__ - 1;
-		    i__3 = *n - i__;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[(i__ + 1) *
-			     a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], &
-			    c__1, &c_b56, &w[iw * w_dim1 + 1], &c__1);
-		    i__2 = i__ - 1;
-		    i__3 = *n - i__;
-		    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[(
-			    i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1],
-			     &c__1, &c_b55, &w[i__ + 1 + iw * w_dim1], &c__1);
-		    i__2 = i__ - 1;
-		    i__3 = *n - i__;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemv_("No transpose", &i__2, &i__3, &q__1, &w[(iw + 1) *
-			    w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
-			    c__1, &c_b56, &w[iw * w_dim1 + 1], &c__1);
-		}
-		i__2 = i__ - 1;
-		cscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
-		q__3.r = -.5f, q__3.i = -0.f;
-		i__2 = i__ - 1;
-		q__2.r = q__3.r * tau[i__2].r - q__3.i * tau[i__2].i, q__2.i =
-			 q__3.r * tau[i__2].i + q__3.i * tau[i__2].r;
-		i__3 = i__ - 1;
-		cdotc_(&q__4, &i__3, &w[iw * w_dim1 + 1], &c__1, &a[i__ *
-			a_dim1 + 1], &c__1);
-		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r *
-			q__4.i + q__2.i * q__4.r;
-		alpha.r = q__1.r, alpha.i = q__1.i;
-		i__2 = i__ - 1;
-		caxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
-			w_dim1 + 1], &c__1);
-	    }
-
-/* L10: */
-	}
-    } else {
-
-/*        Reduce first NB columns of lower triangle */
-
-	i__1 = *nb;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*           Update A(i:n,i) */
-
-	    i__2 = i__ + i__ * a_dim1;
-	    i__3 = i__ + i__ * a_dim1;
-	    r__1 = a[i__3].r;
-	    a[i__2].r = r__1, a[i__2].i = 0.f;
-	    i__2 = i__ - 1;
-	    clacgv_(&i__2, &w[i__ + w_dim1], ldw);
-	    i__2 = *n - i__ + 1;
-	    i__3 = i__ - 1;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda,
-		     &w[i__ + w_dim1], ldw, &c_b56, &a[i__ + i__ * a_dim1], &
-		    c__1);
-	    i__2 = i__ - 1;
-	    clacgv_(&i__2, &w[i__ + w_dim1], ldw);
-	    i__2 = i__ - 1;
-	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
-	    i__2 = *n - i__ + 1;
-	    i__3 = i__ - 1;
-	    q__1.r = -1.f, q__1.i = -0.f;
-	    cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + w_dim1], ldw,
-		     &a[i__ + a_dim1], lda, &c_b56, &a[i__ + i__ * a_dim1], &
-		    c__1);
-	    i__2 = i__ - 1;
-	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
-	    i__2 = i__ + i__ * a_dim1;
-	    i__3 = i__ + i__ * a_dim1;
-	    r__1 = a[i__3].r;
-	    a[i__2].r = r__1, a[i__2].i = 0.f;
-	    if (i__ < *n) {
-
-/*
-                Generate elementary reflector H(i) to annihilate
-                A(i+2:n,i)
-*/
-
-		i__2 = i__ + 1 + i__ * a_dim1;
-		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
-		i__2 = *n - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1,
-			 &tau[i__]);
-		i__2 = i__;
-		e[i__2] = alpha.r;
-		i__2 = i__ + 1 + i__ * a_dim1;
-		a[i__2].r = 1.f, a[i__2].i = 0.f;
-
-/*              Compute W(i+1:n,i) */
-
-		i__2 = *n - i__;
-		chemv_("Lower", &i__2, &c_b56, &a[i__ + 1 + (i__ + 1) *
-			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b55, &w[i__ + 1 + i__ * w_dim1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &w[i__ +
-			1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b55, &w[i__ * w_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 +
-			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b56, &w[
-			i__ + 1 + i__ * w_dim1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
-			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b55, &w[i__ * w_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + 1 +
-			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b56, &w[
-			i__ + 1 + i__ * w_dim1], &c__1);
-		i__2 = *n - i__;
-		cscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
-		q__3.r = -.5f, q__3.i = -0.f;
-		i__2 = i__;
-		q__2.r = q__3.r * tau[i__2].r - q__3.i * tau[i__2].i, q__2.i =
-			 q__3.r * tau[i__2].i + q__3.i * tau[i__2].r;
-		i__3 = *n - i__;
-		cdotc_(&q__4, &i__3, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[
-			i__ + 1 + i__ * a_dim1], &c__1);
-		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r *
-			q__4.i + q__2.i * q__4.r;
-		alpha.r = q__1.r, alpha.i = q__1.i;
-		i__2 = *n - i__;
-		caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
-			i__ + 1 + i__ * w_dim1], &c__1);
-	    }
-
-/* L20: */
-	}
-    }
-
-    return 0;
-
-/*     End of CLATRD */
-
-} /* clatrd_ */
-
-/* Subroutine */ int clatrs_(char *uplo, char *trans, char *diag, char *
-	normin, integer *n, complex *a, integer *lda, complex *x, real *scale,
-	 real *cnorm, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    real r__1, r__2, r__3, r__4;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j;
-    static real xj, rec, tjj;
-    static integer jinc;
-    static real xbnd;
-    static integer imax;
-    static real tmax;
-    static complex tjjs;
-    static real xmax, grow;
-    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
-	    *, complex *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
-    static real tscal;
-    static complex uscal;
-    static integer jlast;
-    extern /* Complex */ VOID cdotu_(complex *, integer *, complex *, integer
-	    *, complex *, integer *);
-    static complex csumj;
-    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *,
-	    integer *, complex *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int ctrsv_(char *, char *, char *, integer *,
-	    complex *, integer *, complex *, integer *), slabad_(real *, real *);
-    extern integer icamax_(integer *, complex *, integer *);
-    extern /* Complex */ VOID cladiv_(complex *, complex *, complex *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
-	    *), xerbla_(char *, integer *);
-    static real bignum;
-    extern integer isamax_(integer *, real *, integer *);
-    extern doublereal scasum_(integer *, complex *, integer *);
-    static logical notran;
-    static integer jfirst;
-    static real smlnum;
-    static logical nounit;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1992
-
-
-    Purpose
-    =======
-
-    CLATRS solves one of the triangular systems
-
-       A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
-
-    with scaling to prevent overflow.  Here A is an upper or lower
-    triangular matrix, A**T denotes the transpose of A, A**H denotes the
-    conjugate transpose of A, x and b are n-element vectors, and s is a
-    scaling factor, usually less than or equal to 1, chosen so that the
-    components of x will be less than the overflow threshold.  If the
-    unscaled problem will not cause overflow, the Level 2 BLAS routine
-    CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j),
-    then s is set to 0 and a non-trivial solution to A*x = 0 is returned.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the matrix A is upper or lower triangular.
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    TRANS   (input) CHARACTER*1
-            Specifies the operation applied to A.
-            = 'N':  Solve A * x = s*b     (No transpose)
-            = 'T':  Solve A**T * x = s*b  (Transpose)
-            = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
-
-    DIAG    (input) CHARACTER*1
-            Specifies whether or not the matrix A is unit triangular.
-            = 'N':  Non-unit triangular
-            = 'U':  Unit triangular
-
-    NORMIN  (input) CHARACTER*1
-            Specifies whether CNORM has been set or not.
-            = 'Y':  CNORM contains the column norms on entry
-            = 'N':  CNORM is not set on entry.  On exit, the norms will
-                    be computed and stored in CNORM.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input) COMPLEX array, dimension (LDA,N)
-            The triangular matrix A.  If UPLO = 'U', the leading n by n
-            upper triangular part of the array A contains the upper
-            triangular matrix, and the strictly lower triangular part of
-            A is not referenced.  If UPLO = 'L', the leading n by n lower
-            triangular part of the array A contains the lower triangular
-            matrix, and the strictly upper triangular part of A is not
-            referenced.  If DIAG = 'U', the diagonal elements of A are
-            also not referenced and are assumed to be 1.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max (1,N).
-
-    X       (input/output) COMPLEX array, dimension (N)
-            On entry, the right hand side b of the triangular system.
-            On exit, X is overwritten by the solution vector x.
-
-    SCALE   (output) REAL
-            The scaling factor s for the triangular system
-               A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
-            If SCALE = 0, the matrix A is singular or badly scaled, and
-            the vector x is an exact or approximate solution to A*x = 0.
-
-    CNORM   (input or output) REAL array, dimension (N)
-
-            If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
-            contains the norm of the off-diagonal part of the j-th column
-            of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
-            to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
-            must be greater than or equal to the 1-norm.
-
-            If NORMIN = 'N', CNORM is an output argument and CNORM(j)
-            returns the 1-norm of the offdiagonal part of the j-th column
-            of A.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -k, the k-th argument had an illegal value
-
-    Further Details
-    ======= =======
-
-    A rough bound on x is computed; if that is less than overflow, CTRSV
-    is called, otherwise, specific code is used which checks for possible
-    overflow or divide-by-zero at every operation.
-
-    A columnwise scheme is used for solving A*x = b.  The basic algorithm
-    if A is lower triangular is
-
-         x[1:n] := b[1:n]
-         for j = 1, ..., n
-              x(j) := x(j) / A(j,j)
-              x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
-         end
-
-    Define bounds on the components of x after j iterations of the loop:
-       M(j) = bound on x[1:j]
-       G(j) = bound on x[j+1:n]
-    Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
-
-    Then for iteration j+1 we have
-       M(j+1) <= G(j) / | A(j+1,j+1) |
-       G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
-              <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
-
-    where CNORM(j+1) is greater than or equal to the infinity-norm of
-    column j+1 of A, not counting the diagonal.  Hence
-
-       G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
-                    1<=i<=j
-    and
-
-       |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
-                                     1<=i< j
-
-    Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTRSV if the
-    reciprocal of the largest M(j), j=1,..,n, is larger than
-    max(underflow, 1/overflow).
-
-    The bound on x(j) is also used to determine when a step in the
-    columnwise method can be performed without fear of overflow.  If
-    the computed bound is greater than a large constant, x is scaled to
-    prevent overflow, but if the bound overflows, x is set to 0, x(j) to
-    1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
-
-    Similarly, a row-wise scheme is used to solve A**T *x = b  or
-    A**H *x = b.  The basic algorithm for A upper triangular is
-
-         for j = 1, ..., n
-              x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
-         end
-
-    We simultaneously compute two bounds
-         G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
-         M(j) = bound on x(i), 1<=i<=j
-
-    The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
-    add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
-    Then the bound on x(j) is
-
-         M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
-
-              <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
-                        1<=i<=j
-
-    and we can safely call CTRSV if 1/M(n) and 1/G(n) are both greater
-    than max(underflow, 1/overflow).
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-    --cnorm;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    notran = lsame_(trans, "N");
-    nounit = lsame_(diag, "N");
-
-/*     Test the input parameters. */
-
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "T") && !
-	    lsame_(trans, "C")) {
-	*info = -2;
-    } else if (! nounit && ! lsame_(diag, "U")) {
-	*info = -3;
-    } else if (! lsame_(normin, "Y") && ! lsame_(normin,
-	     "N")) {
-	*info = -4;
-    } else if (*n < 0) {
-	*info = -5;
-    } else if (*lda < max(1,*n)) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CLATRS", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Determine machine dependent parameters to control overflow. */
-
-    smlnum = slamch_("Safe minimum");
-    bignum = 1.f / smlnum;
-    slabad_(&smlnum, &bignum);
-    smlnum /= slamch_("Precision");
-    bignum = 1.f / smlnum;
-    *scale = 1.f;
-
-    if (lsame_(normin, "N")) {
-
-/*        Compute the 1-norm of each column, not including the diagonal. */
-
-	if (upper) {
-
-/*           A is upper triangular. */
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j - 1;
-		cnorm[j] = scasum_(&i__2, &a[j * a_dim1 + 1], &c__1);
-/* L10: */
-	    }
-	} else {
-
-/*           A is lower triangular. */
-
-	    i__1 = *n - 1;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *n - j;
-		cnorm[j] = scasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1);
-/* L20: */
-	    }
-	    cnorm[*n] = 0.f;
-	}
-    }
-
-/*
-       Scale the column norms by TSCAL if the maximum element in CNORM is
-       greater than BIGNUM/2.
-*/
-
-    imax = isamax_(n, &cnorm[1], &c__1);
-    tmax = cnorm[imax];
-    if (tmax <= bignum * .5f) {
-	tscal = 1.f;
-    } else {
-	tscal = .5f / (smlnum * tmax);
-	sscal_(n, &tscal, &cnorm[1], &c__1);
-    }
-
-/*
-       Compute a bound on the computed solution vector to see if the
-       Level 2 BLAS routine CTRSV can be used.
-*/
-
-    xmax = 0.f;
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-/* Computing MAX */
-	i__2 = j;
-	r__3 = xmax, r__4 = (r__1 = x[i__2].r / 2.f, dabs(r__1)) + (r__2 =
-		r_imag(&x[j]) / 2.f, dabs(r__2));
-	xmax = dmax(r__3,r__4);
-/* L30: */
-    }
-    xbnd = xmax;
-
-    if (notran) {
-
-/*        Compute the growth in A * x = b. */
-
-	if (upper) {
-	    jfirst = *n;
-	    jlast = 1;
-	    jinc = -1;
-	} else {
-	    jfirst = 1;
-	    jlast = *n;
-	    jinc = 1;
-	}
-
-	if (tscal != 1.f) {
-	    grow = 0.f;
-	    goto L60;
-	}
-
-	if (nounit) {
-
-/*
-             A is non-unit triangular.
-
-             Compute GROW = 1/G(j) and XBND = 1/M(j).
-             Initially, G(0) = max{x(i), i=1,...,n}.
-*/
-
-	    grow = .5f / dmax(xbnd,smlnum);
-	    xbnd = grow;
-	    i__1 = jlast;
-	    i__2 = jinc;
-	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
-
-/*              Exit the loop if the growth factor is too small. */
-
-		if (grow <= smlnum) {
-		    goto L60;
-		}
-
-		i__3 = j + j * a_dim1;
-		tjjs.r = a[i__3].r, tjjs.i = a[i__3].i;
-		tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
-			dabs(r__2));
-
-		if (tjj >= smlnum) {
-
-/*
-                   M(j) = G(j-1) / abs(A(j,j))
-
-   Computing MIN
-*/
-		    r__1 = xbnd, r__2 = dmin(1.f,tjj) * grow;
-		    xbnd = dmin(r__1,r__2);
-		} else {
-
-/*                 M(j) could overflow, set XBND to 0. */
-
-		    xbnd = 0.f;
-		}
-
-		if (tjj + cnorm[j] >= smlnum) {
-
-/*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */
-
-		    grow *= tjj / (tjj + cnorm[j]);
-		} else {
-
-/*                 G(j) could overflow, set GROW to 0. */
-
-		    grow = 0.f;
-		}
-/* L40: */
-	    }
-	    grow = xbnd;
-	} else {
-
-/*
-             A is unit triangular.
-
-             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
-
-   Computing MIN
-*/
-	    r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum);
-	    grow = dmin(r__1,r__2);
-	    i__2 = jlast;
-	    i__1 = jinc;
-	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
-
-/*              Exit the loop if the growth factor is too small. */
-
-		if (grow <= smlnum) {
-		    goto L60;
-		}
-
-/*              G(j) = G(j-1)*( 1 + CNORM(j) ) */
-
-		grow *= 1.f / (cnorm[j] + 1.f);
-/* L50: */
-	    }
-	}
-L60:
-
-	;
-    } else {
-
-/*        Compute the growth in A**T * x = b  or  A**H * x = b. */
-
-	if (upper) {
-	    jfirst = 1;
-	    jlast = *n;
-	    jinc = 1;
-	} else {
-	    jfirst = *n;
-	    jlast = 1;
-	    jinc = -1;
-	}
-
-	if (tscal != 1.f) {
-	    grow = 0.f;
-	    goto L90;
-	}
-
-	if (nounit) {
-
-/*
-             A is non-unit triangular.
-
-             Compute GROW = 1/G(j) and XBND = 1/M(j).
-             Initially, M(0) = max{x(i), i=1,...,n}.
-*/
-
-	    grow = .5f / dmax(xbnd,smlnum);
-	    xbnd = grow;
-	    i__1 = jlast;
-	    i__2 = jinc;
-	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
-
-/*              Exit the loop if the growth factor is too small. */
-
-		if (grow <= smlnum) {
-		    goto L90;
-		}
-
-/*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */
-
-		xj = cnorm[j] + 1.f;
-/* Computing MIN */
-		r__1 = grow, r__2 = xbnd / xj;
-		grow = dmin(r__1,r__2);
-
-		i__3 = j + j * a_dim1;
-		tjjs.r = a[i__3].r, tjjs.i = a[i__3].i;
-		tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
-			dabs(r__2));
-
-		if (tjj >= smlnum) {
-
-/*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */
-
-		    if (xj > tjj) {
-			xbnd *= tjj / xj;
-		    }
-		} else {
-
-/*                 M(j) could overflow, set XBND to 0. */
-
-		    xbnd = 0.f;
-		}
-/* L70: */
-	    }
-	    grow = dmin(grow,xbnd);
-	} else {
-
-/*
-             A is unit triangular.
-
-             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
-
-   Computing MIN
-*/
-	    r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum);
-	    grow = dmin(r__1,r__2);
-	    i__2 = jlast;
-	    i__1 = jinc;
-	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
-
-/*              Exit the loop if the growth factor is too small. */
-
-		if (grow <= smlnum) {
-		    goto L90;
-		}
-
-/*              G(j) = ( 1 + CNORM(j) )*G(j-1) */
-
-		xj = cnorm[j] + 1.f;
-		grow /= xj;
-/* L80: */
-	    }
-	}
-L90:
-	;
-    }
-
-    if (grow * tscal > smlnum) {
-
-/*
-          Use the Level 2 BLAS solve if the reciprocal of the bound on
-          elements of X is not too small.
-*/
-
-	ctrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1);
-    } else {
-
-/*        Use a Level 1 BLAS solve, scaling intermediate results. */
-
-	if (xmax > bignum * .5f) {
-
-/*
-             Scale X so that its components are less than or equal to
-             BIGNUM in absolute value.
-*/
-
-	    *scale = bignum * .5f / xmax;
-	    csscal_(n, scale, &x[1], &c__1);
-	    xmax = bignum;
-	} else {
-	    xmax *= 2.f;
-	}
-
-	if (notran) {
-
-/*           Solve A * x = b */
-
-	    i__1 = jlast;
-	    i__2 = jinc;
-	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
-
-/*              Compute x(j) = b(j) / A(j,j), scaling x if necessary. */
-
-		i__3 = j;
-		xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]),
-			dabs(r__2));
-		if (nounit) {
-		    i__3 = j + j * a_dim1;
-		    q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3].i;
-		    tjjs.r = q__1.r, tjjs.i = q__1.i;
-		} else {
-		    tjjs.r = tscal, tjjs.i = 0.f;
-		    if (tscal == 1.f) {
-			goto L105;
-		    }
-		}
-		tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
-			dabs(r__2));
-		if (tjj > smlnum) {
-
-/*                    abs(A(j,j)) > SMLNUM: */
-
-		    if (tjj < 1.f) {
-			if (xj > tjj * bignum) {
-
-/*                          Scale x by 1/b(j). */
-
-			    rec = 1.f / xj;
-			    csscal_(n, &rec, &x[1], &c__1);
-			    *scale *= rec;
-			    xmax *= rec;
-			}
-		    }
-		    i__3 = j;
-		    cladiv_(&q__1, &x[j], &tjjs);
-		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-		    i__3 = j;
-		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
-			    ), dabs(r__2));
-		} else if (tjj > 0.f) {
-
-/*                    0 < abs(A(j,j)) <= SMLNUM: */
-
-		    if (xj > tjj * bignum) {
-
-/*
-                         Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
-                         to avoid overflow when dividing by A(j,j).
-*/
-
-			rec = tjj * bignum / xj;
-			if (cnorm[j] > 1.f) {
-
-/*
-                            Scale by 1/CNORM(j) to avoid overflow when
-                            multiplying x(j) times column j.
-*/
-
-			    rec /= cnorm[j];
-			}
-			csscal_(n, &rec, &x[1], &c__1);
-			*scale *= rec;
-			xmax *= rec;
-		    }
-		    i__3 = j;
-		    cladiv_(&q__1, &x[j], &tjjs);
-		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-		    i__3 = j;
-		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
-			    ), dabs(r__2));
-		} else {
-
-/*
-                      A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
-                      scale = 0, and compute a solution to A*x = 0.
-*/
-
-		    i__3 = *n;
-		    for (i__ = 1; i__ <= i__3; ++i__) {
-			i__4 = i__;
-			x[i__4].r = 0.f, x[i__4].i = 0.f;
-/* L100: */
-		    }
-		    i__3 = j;
-		    x[i__3].r = 1.f, x[i__3].i = 0.f;
-		    xj = 1.f;
-		    *scale = 0.f;
-		    xmax = 0.f;
-		}
-L105:
-
-/*
-                Scale x if necessary to avoid overflow when adding a
-                multiple of column j of A.
-*/
-
-		if (xj > 1.f) {
-		    rec = 1.f / xj;
-		    if (cnorm[j] > (bignum - xmax) * rec) {
-
-/*                    Scale x by 1/(2*abs(x(j))). */
-
-			rec *= .5f;
-			csscal_(n, &rec, &x[1], &c__1);
-			*scale *= rec;
-		    }
-		} else if (xj * cnorm[j] > bignum - xmax) {
-
-/*                 Scale x by 1/2. */
-
-		    csscal_(n, &c_b2206, &x[1], &c__1);
-		    *scale *= .5f;
-		}
-
-		if (upper) {
-		    if (j > 1) {
-
-/*
-                      Compute the update
-                         x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
-*/
-
-			i__3 = j - 1;
-			i__4 = j;
-			q__2.r = -x[i__4].r, q__2.i = -x[i__4].i;
-			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
-			caxpy_(&i__3, &q__1, &a[j * a_dim1 + 1], &c__1, &x[1],
-				 &c__1);
-			i__3 = j - 1;
-			i__ = icamax_(&i__3, &x[1], &c__1);
-			i__3 = i__;
-			xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 =
-				r_imag(&x[i__]), dabs(r__2));
-		    }
-		} else {
-		    if (j < *n) {
-
-/*
-                      Compute the update
-                         x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
-*/
-
-			i__3 = *n - j;
-			i__4 = j;
-			q__2.r = -x[i__4].r, q__2.i = -x[i__4].i;
-			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
-			caxpy_(&i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, &
-				x[j + 1], &c__1);
-			i__3 = *n - j;
-			i__ = j + icamax_(&i__3, &x[j + 1], &c__1);
-			i__3 = i__;
-			xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 =
-				r_imag(&x[i__]), dabs(r__2));
-		    }
-		}
-/* L110: */
-	    }
-
-	} else if (lsame_(trans, "T")) {
-
-/*           Solve A**T * x = b */
-
-	    i__2 = jlast;
-	    i__1 = jinc;
-	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
-
-/*
-                Compute x(j) = b(j) - sum A(k,j)*x(k).
-                                      k<>j
-*/
-
-		i__3 = j;
-		xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]),
-			dabs(r__2));
-		uscal.r = tscal, uscal.i = 0.f;
-		rec = 1.f / dmax(xmax,1.f);
-		if (cnorm[j] > (bignum - xj) * rec) {
-
-/*                 If x(j) could overflow, scale x by 1/(2*XMAX). */
-
-		    rec *= .5f;
-		    if (nounit) {
-			i__3 = j + j * a_dim1;
-			q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3]
-				.i;
-			tjjs.r = q__1.r, tjjs.i = q__1.i;
-		    } else {
-			tjjs.r = tscal, tjjs.i = 0.f;
-		    }
-		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
-			     dabs(r__2));
-		    if (tjj > 1.f) {
-
-/*
-                         Divide by A(j,j) when scaling x if A(j,j) > 1.
-
-   Computing MIN
-*/
-			r__1 = 1.f, r__2 = rec * tjj;
-			rec = dmin(r__1,r__2);
-			cladiv_(&q__1, &uscal, &tjjs);
-			uscal.r = q__1.r, uscal.i = q__1.i;
-		    }
-		    if (rec < 1.f) {
-			csscal_(n, &rec, &x[1], &c__1);
-			*scale *= rec;
-			xmax *= rec;
-		    }
-		}
-
-		csumj.r = 0.f, csumj.i = 0.f;
-		if (uscal.r == 1.f && uscal.i == 0.f) {
-
-/*
-                   If the scaling needed for A in the dot product is 1,
-                   call CDOTU to perform the dot product.
-*/
-
-		    if (upper) {
-			i__3 = j - 1;
-			cdotu_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1],
-				 &c__1);
-			csumj.r = q__1.r, csumj.i = q__1.i;
-		    } else if (j < *n) {
-			i__3 = *n - j;
-			cdotu_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, &
-				x[j + 1], &c__1);
-			csumj.r = q__1.r, csumj.i = q__1.i;
-		    }
-		} else {
-
-/*                 Otherwise, use in-line code for the dot product. */
-
-		    if (upper) {
-			i__3 = j - 1;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    i__4 = i__ + j * a_dim1;
-			    q__3.r = a[i__4].r * uscal.r - a[i__4].i *
-				    uscal.i, q__3.i = a[i__4].r * uscal.i + a[
-				    i__4].i * uscal.r;
-			    i__5 = i__;
-			    q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i,
-				    q__2.i = q__3.r * x[i__5].i + q__3.i * x[
-				    i__5].r;
-			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i +
-				    q__2.i;
-			    csumj.r = q__1.r, csumj.i = q__1.i;
-/* L120: */
-			}
-		    } else if (j < *n) {
-			i__3 = *n;
-			for (i__ = j + 1; i__ <= i__3; ++i__) {
-			    i__4 = i__ + j * a_dim1;
-			    q__3.r = a[i__4].r * uscal.r - a[i__4].i *
-				    uscal.i, q__3.i = a[i__4].r * uscal.i + a[
-				    i__4].i * uscal.r;
-			    i__5 = i__;
-			    q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i,
-				    q__2.i = q__3.r * x[i__5].i + q__3.i * x[
-				    i__5].r;
-			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i +
-				    q__2.i;
-			    csumj.r = q__1.r, csumj.i = q__1.i;
-/* L130: */
-			}
-		    }
-		}
-
-		q__1.r = tscal, q__1.i = 0.f;
-		if (uscal.r == q__1.r && uscal.i == q__1.i) {
-
-/*
-                   Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
-                   was not used to scale the dotproduct.
-*/
-
-		    i__3 = j;
-		    i__4 = j;
-		    q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i -
-			    csumj.i;
-		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-		    i__3 = j;
-		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
-			    ), dabs(r__2));
-		    if (nounit) {
-			i__3 = j + j * a_dim1;
-			q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3]
-				.i;
-			tjjs.r = q__1.r, tjjs.i = q__1.i;
-		    } else {
-			tjjs.r = tscal, tjjs.i = 0.f;
-			if (tscal == 1.f) {
-			    goto L145;
-			}
-		    }
-
-/*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */
-
-		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
-			     dabs(r__2));
-		    if (tjj > smlnum) {
-
-/*                       abs(A(j,j)) > SMLNUM: */
-
-			if (tjj < 1.f) {
-			    if (xj > tjj * bignum) {
-
-/*                             Scale X by 1/abs(x(j)). */
-
-				rec = 1.f / xj;
-				csscal_(n, &rec, &x[1], &c__1);
-				*scale *= rec;
-				xmax *= rec;
-			    }
-			}
-			i__3 = j;
-			cladiv_(&q__1, &x[j], &tjjs);
-			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-		    } else if (tjj > 0.f) {
-
-/*                       0 < abs(A(j,j)) <= SMLNUM: */
-
-			if (xj > tjj * bignum) {
-
-/*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
-
-			    rec = tjj * bignum / xj;
-			    csscal_(n, &rec, &x[1], &c__1);
-			    *scale *= rec;
-			    xmax *= rec;
-			}
-			i__3 = j;
-			cladiv_(&q__1, &x[j], &tjjs);
-			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-		    } else {
-
-/*
-                         A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
-                         scale = 0 and compute a solution to A**T *x = 0.
-*/
-
-			i__3 = *n;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    i__4 = i__;
-			    x[i__4].r = 0.f, x[i__4].i = 0.f;
-/* L140: */
-			}
-			i__3 = j;
-			x[i__3].r = 1.f, x[i__3].i = 0.f;
-			*scale = 0.f;
-			xmax = 0.f;
-		    }
-L145:
-		    ;
-		} else {
-
-/*
-                   Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
-                   product has already been divided by 1/A(j,j).
-*/
-
-		    i__3 = j;
-		    cladiv_(&q__2, &x[j], &tjjs);
-		    q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i;
-		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-		}
-/* Computing MAX */
-		i__3 = j;
-		r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 =
-			r_imag(&x[j]), dabs(r__2));
-		xmax = dmax(r__3,r__4);
-/* L150: */
-	    }
-
-	} else {
-
-/*           Solve A**H * x = b */
-
-	    i__1 = jlast;
-	    i__2 = jinc;
-	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
-
-/*
-                Compute x(j) = b(j) - sum A(k,j)*x(k).
-                                      k<>j
-*/
-
-		i__3 = j;
-		xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]),
-			dabs(r__2));
-		uscal.r = tscal, uscal.i = 0.f;
-		rec = 1.f / dmax(xmax,1.f);
-		if (cnorm[j] > (bignum - xj) * rec) {
-
-/*                 If x(j) could overflow, scale x by 1/(2*XMAX). */
-
-		    rec *= .5f;
-		    if (nounit) {
-			r_cnjg(&q__2, &a[j + j * a_dim1]);
-			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
-			tjjs.r = q__1.r, tjjs.i = q__1.i;
-		    } else {
-			tjjs.r = tscal, tjjs.i = 0.f;
-		    }
-		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
-			     dabs(r__2));
-		    if (tjj > 1.f) {
-
-/*
-                         Divide by A(j,j) when scaling x if A(j,j) > 1.
-
-   Computing MIN
-*/
-			r__1 = 1.f, r__2 = rec * tjj;
-			rec = dmin(r__1,r__2);
-			cladiv_(&q__1, &uscal, &tjjs);
-			uscal.r = q__1.r, uscal.i = q__1.i;
-		    }
-		    if (rec < 1.f) {
-			csscal_(n, &rec, &x[1], &c__1);
-			*scale *= rec;
-			xmax *= rec;
-		    }
-		}
-
-		csumj.r = 0.f, csumj.i = 0.f;
-		if (uscal.r == 1.f && uscal.i == 0.f) {
-
-/*
-                   If the scaling needed for A in the dot product is 1,
-                   call CDOTC to perform the dot product.
-*/
-
-		    if (upper) {
-			i__3 = j - 1;
-			cdotc_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1],
-				 &c__1);
-			csumj.r = q__1.r, csumj.i = q__1.i;
-		    } else if (j < *n) {
-			i__3 = *n - j;
-			cdotc_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, &
-				x[j + 1], &c__1);
-			csumj.r = q__1.r, csumj.i = q__1.i;
-		    }
-		} else {
-
-/*                 Otherwise, use in-line code for the dot product. */
-
-		    if (upper) {
-			i__3 = j - 1;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    r_cnjg(&q__4, &a[i__ + j * a_dim1]);
-			    q__3.r = q__4.r * uscal.r - q__4.i * uscal.i,
-				    q__3.i = q__4.r * uscal.i + q__4.i *
-				    uscal.r;
-			    i__4 = i__;
-			    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i,
-				    q__2.i = q__3.r * x[i__4].i + q__3.i * x[
-				    i__4].r;
-			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i +
-				    q__2.i;
-			    csumj.r = q__1.r, csumj.i = q__1.i;
-/* L160: */
-			}
-		    } else if (j < *n) {
-			i__3 = *n;
-			for (i__ = j + 1; i__ <= i__3; ++i__) {
-			    r_cnjg(&q__4, &a[i__ + j * a_dim1]);
-			    q__3.r = q__4.r * uscal.r - q__4.i * uscal.i,
-				    q__3.i = q__4.r * uscal.i + q__4.i *
-				    uscal.r;
-			    i__4 = i__;
-			    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i,
-				    q__2.i = q__3.r * x[i__4].i + q__3.i * x[
-				    i__4].r;
-			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i +
-				    q__2.i;
-			    csumj.r = q__1.r, csumj.i = q__1.i;
-/* L170: */
-			}
-		    }
-		}
-
-		q__1.r = tscal, q__1.i = 0.f;
-		if (uscal.r == q__1.r && uscal.i == q__1.i) {
-
-/*
-                   Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
-                   was not used to scale the dotproduct.
-*/
-
-		    i__3 = j;
-		    i__4 = j;
-		    q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i -
-			    csumj.i;
-		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-		    i__3 = j;
-		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
-			    ), dabs(r__2));
-		    if (nounit) {
-			r_cnjg(&q__2, &a[j + j * a_dim1]);
-			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
-			tjjs.r = q__1.r, tjjs.i = q__1.i;
-		    } else {
-			tjjs.r = tscal, tjjs.i = 0.f;
-			if (tscal == 1.f) {
-			    goto L185;
-			}
-		    }
-
-/*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */
-
-		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
-			     dabs(r__2));
-		    if (tjj > smlnum) {
-
-/*                       abs(A(j,j)) > SMLNUM: */
-
-			if (tjj < 1.f) {
-			    if (xj > tjj * bignum) {
-
-/*                             Scale X by 1/abs(x(j)). */
-
-				rec = 1.f / xj;
-				csscal_(n, &rec, &x[1], &c__1);
-				*scale *= rec;
-				xmax *= rec;
-			    }
-			}
-			i__3 = j;
-			cladiv_(&q__1, &x[j], &tjjs);
-			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-		    } else if (tjj > 0.f) {
-
-/*                       0 < abs(A(j,j)) <= SMLNUM: */
-
-			if (xj > tjj * bignum) {
-
-/*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
-
-			    rec = tjj * bignum / xj;
-			    csscal_(n, &rec, &x[1], &c__1);
-			    *scale *= rec;
-			    xmax *= rec;
-			}
-			i__3 = j;
-			cladiv_(&q__1, &x[j], &tjjs);
-			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-		    } else {
-
-/*
-                         A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
-                         scale = 0 and compute a solution to A**H *x = 0.
-*/
-
-			i__3 = *n;
-			for (i__ = 1; i__ <= i__3; ++i__) {
-			    i__4 = i__;
-			    x[i__4].r = 0.f, x[i__4].i = 0.f;
-/* L180: */
-			}
-			i__3 = j;
-			x[i__3].r = 1.f, x[i__3].i = 0.f;
-			*scale = 0.f;
-			xmax = 0.f;
-		    }
-L185:
-		    ;
-		} else {
-
-/*
-                   Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
-                   product has already been divided by 1/A(j,j).
-*/
-
-		    i__3 = j;
-		    cladiv_(&q__2, &x[j], &tjjs);
-		    q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i;
-		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-		}
-/* Computing MAX */
-		i__3 = j;
-		r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 =
-			r_imag(&x[j]), dabs(r__2));
-		xmax = dmax(r__3,r__4);
-/* L190: */
-	    }
-	}
-	*scale /= tscal;
-    }
-
-/*     Scale the column norms by 1/TSCAL for return. */
-
-    if (tscal != 1.f) {
-	r__1 = 1.f / tscal;
-	sscal_(n, &r__1, &cnorm[1], &c__1);
-    }
-
-    return 0;
-
-/*     End of CLATRS */
-
-} /* clatrs_ */
-
-/* Subroutine */ int clauu2_(char *uplo, integer *n, complex *a, integer *lda,
-	 integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    real r__1;
-    complex q__1;
-
-    /* Local variables */
-    static integer i__;
-    static real aii;
-    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
-	    *, complex *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
-	    , complex *, integer *, complex *, integer *, complex *, complex *
-	    , integer *);
-    static logical upper;
-    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *),
-	    csscal_(integer *, real *, complex *, integer *), xerbla_(char *,
-	    integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CLAUU2 computes the product U * U' or L' * L, where the triangular
-    factor U or L is stored in the upper or lower triangular part of
-    the array A.
-
-    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
-    overwriting the factor U in A.
-    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
-    overwriting the factor L in A.
-
-    This is the unblocked form of the algorithm, calling Level 2 BLAS.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the triangular factor stored in the array A
-            is upper or lower triangular:
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    N       (input) INTEGER
-            The order of the triangular factor U or L.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the triangular factor U or L.
-            On exit, if UPLO = 'U', the upper triangle of A is
-            overwritten with the upper triangle of the product U * U';
-            if UPLO = 'L', the lower triangle of A is overwritten with
-            the lower triangle of the product L' * L.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CLAUU2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (upper) {
-
-/*        Compute the product U * U'. */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = i__ + i__ * a_dim1;
-	    aii = a[i__2].r;
-	    if (i__ < *n) {
-		i__2 = i__ + i__ * a_dim1;
-		i__3 = *n - i__;
-		cdotc_(&q__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &a[
-			i__ + (i__ + 1) * a_dim1], lda);
-		r__1 = aii * aii + q__1.r;
-		a[i__2].r = r__1, a[i__2].i = 0.f;
-		i__2 = *n - i__;
-		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		q__1.r = aii, q__1.i = 0.f;
-		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			q__1, &a[i__ * a_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
-	    } else {
-		csscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
-	    }
-/* L10: */
-	}
-
-    } else {
-
-/*        Compute the product L' * L. */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = i__ + i__ * a_dim1;
-	    aii = a[i__2].r;
-	    if (i__ < *n) {
-		i__2 = i__ + i__ * a_dim1;
-		i__3 = *n - i__;
-		cdotc_(&q__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[
-			i__ + 1 + i__ * a_dim1], &c__1);
-		r__1 = aii * aii + q__1.r;
-		a[i__2].r = r__1, a[i__2].i = 0.f;
-		i__2 = i__ - 1;
-		clacgv_(&i__2, &a[i__ + a_dim1], lda);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		q__1.r = aii, q__1.i = 0.f;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
-			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			q__1, &a[i__ + a_dim1], lda);
-		i__2 = i__ - 1;
-		clacgv_(&i__2, &a[i__ + a_dim1], lda);
-	    } else {
-		csscal_(&i__, &aii, &a[i__ + a_dim1], lda);
-	    }
-/* L20: */
-	}
-    }
-
-    return 0;
-
-/*     End of CLAUU2 */
-
-} /* clauu2_ */
-
-/* Subroutine */ int clauum_(char *uplo, integer *n, complex *a, integer *lda,
-	 integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, ib, nb;
-    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
-	    integer *, complex *, complex *, integer *, complex *, integer *,
-	    complex *, complex *, integer *), cherk_(char *,
-	    char *, integer *, integer *, real *, complex *, integer *, real *
-	    , complex *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *,
-	    integer *, integer *, complex *, complex *, integer *, complex *,
-	    integer *);
-    static logical upper;
-    extern /* Subroutine */ int clauu2_(char *, integer *, complex *, integer
-	    *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CLAUUM computes the product U * U' or L' * L, where the triangular
-    factor U or L is stored in the upper or lower triangular part of
-    the array A.
-
-    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
-    overwriting the factor U in A.
-    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
-    overwriting the factor L in A.
-
-    This is the blocked form of the algorithm, calling Level 3 BLAS.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the triangular factor stored in the array A
-            is upper or lower triangular:
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    N       (input) INTEGER
-            The order of the triangular factor U or L.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the triangular factor U or L.
-            On exit, if UPLO = 'U', the upper triangle of A is
-            overwritten with the upper triangle of the product U * U';
-            if UPLO = 'L', the lower triangle of A is overwritten with
-            the lower triangle of the product L' * L.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CLAUUM", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Determine the block size for this environment. */
-
-    nb = ilaenv_(&c__1, "CLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
-	    ftnlen)1);
-
-    if ((nb <= 1) || (nb >= *n)) {
-
-/*        Use unblocked code */
-
-	clauu2_(uplo, n, &a[a_offset], lda, info);
-    } else {
-
-/*        Use blocked code */
-
-	if (upper) {
-
-/*           Compute the product U * U'. */
-
-	    i__1 = *n;
-	    i__2 = nb;
-	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-		i__3 = nb, i__4 = *n - i__ + 1;
-		ib = min(i__3,i__4);
-		i__3 = i__ - 1;
-		ctrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", &
-			i__3, &ib, &c_b56, &a[i__ + i__ * a_dim1], lda, &a[
-			i__ * a_dim1 + 1], lda);
-		clauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
-		if (i__ + ib <= *n) {
-		    i__3 = i__ - 1;
-		    i__4 = *n - i__ - ib + 1;
-		    cgemm_("No transpose", "Conjugate transpose", &i__3, &ib,
-			    &i__4, &c_b56, &a[(i__ + ib) * a_dim1 + 1], lda, &
-			    a[i__ + (i__ + ib) * a_dim1], lda, &c_b56, &a[i__
-			    * a_dim1 + 1], lda);
-		    i__3 = *n - i__ - ib + 1;
-		    cherk_("Upper", "No transpose", &ib, &i__3, &c_b1011, &a[
-			    i__ + (i__ + ib) * a_dim1], lda, &c_b1011, &a[i__
-			    + i__ * a_dim1], lda);
-		}
-/* L10: */
-	    }
-	} else {
-
-/*           Compute the product L' * L. */
-
-	    i__2 = *n;
-	    i__1 = nb;
-	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-/* Computing MIN */
-		i__3 = nb, i__4 = *n - i__ + 1;
-		ib = min(i__3,i__4);
-		i__3 = i__ - 1;
-		ctrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", &
-			ib, &i__3, &c_b56, &a[i__ + i__ * a_dim1], lda, &a[
-			i__ + a_dim1], lda);
-		clauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
-		if (i__ + ib <= *n) {
-		    i__3 = i__ - 1;
-		    i__4 = *n - i__ - ib + 1;
-		    cgemm_("Conjugate transpose", "No transpose", &ib, &i__3,
-			    &i__4, &c_b56, &a[i__ + ib + i__ * a_dim1], lda, &
-			    a[i__ + ib + a_dim1], lda, &c_b56, &a[i__ +
-			    a_dim1], lda);
-		    i__3 = *n - i__ - ib + 1;
-		    cherk_("Lower", "Conjugate transpose", &ib, &i__3, &
-			    c_b1011, &a[i__ + ib + i__ * a_dim1], lda, &
-			    c_b1011, &a[i__ + i__ * a_dim1], lda);
-		}
-/* L20: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of CLAUUM */
-
-} /* clauum_ */
-
-/* Subroutine */ int cpotf2_(char *uplo, integer *n, complex *a, integer *lda,
-	 integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    real r__1;
-    complex q__1, q__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer j;
-    static real ajj;
-    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
-	    *, complex *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
-	    , complex *, integer *, complex *, integer *, complex *, complex *
-	    , integer *);
-    static logical upper;
-    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *),
-	    csscal_(integer *, real *, complex *, integer *), xerbla_(char *,
-	    integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CPOTF2 computes the Cholesky factorization of a complex Hermitian
-    positive definite matrix A.
-
-    The factorization has the form
-       A = U' * U ,  if UPLO = 'U', or
-       A = L  * L',  if UPLO = 'L',
-    where U is an upper triangular matrix and L is lower triangular.
-
-    This is the unblocked version of the algorithm, calling Level 2 BLAS.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the upper or lower triangular part of the
-            Hermitian matrix A is stored.
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
-            n by n upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading n by n lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
-
-            On exit, if INFO = 0, the factor U or L from the Cholesky
-            factorization A = U'*U  or A = L*L'.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-            > 0: if INFO = k, the leading minor of order k is not
-                 positive definite, and the factorization could not be
-                 completed.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CPOTF2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (upper) {
-
-/*        Compute the Cholesky factorization A = U'*U. */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-
-/*           Compute U(J,J) and test for non-positive-definiteness. */
-
-	    i__2 = j + j * a_dim1;
-	    r__1 = a[i__2].r;
-	    i__3 = j - 1;
-	    cdotc_(&q__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1]
-		    , &c__1);
-	    q__1.r = r__1 - q__2.r, q__1.i = -q__2.i;
-	    ajj = q__1.r;
-	    if (ajj <= 0.f) {
-		i__2 = j + j * a_dim1;
-		a[i__2].r = ajj, a[i__2].i = 0.f;
-		goto L30;
-	    }
-	    ajj = sqrt(ajj);
-	    i__2 = j + j * a_dim1;
-	    a[i__2].r = ajj, a[i__2].i = 0.f;
-
-/*           Compute elements J+1:N of row J. */
-
-	    if (j < *n) {
-		i__2 = j - 1;
-		clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
-		i__2 = j - 1;
-		i__3 = *n - j;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("Transpose", &i__2, &i__3, &q__1, &a[(j + 1) * a_dim1
-			+ 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b56, &a[j + (
-			j + 1) * a_dim1], lda);
-		i__2 = j - 1;
-		clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
-		i__2 = *n - j;
-		r__1 = 1.f / ajj;
-		csscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda);
-	    }
-/* L10: */
-	}
-    } else {
-
-/*        Compute the Cholesky factorization A = L*L'. */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-
-/*           Compute L(J,J) and test for non-positive-definiteness. */
-
-	    i__2 = j + j * a_dim1;
-	    r__1 = a[i__2].r;
-	    i__3 = j - 1;
-	    cdotc_(&q__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda);
-	    q__1.r = r__1 - q__2.r, q__1.i = -q__2.i;
-	    ajj = q__1.r;
-	    if (ajj <= 0.f) {
-		i__2 = j + j * a_dim1;
-		a[i__2].r = ajj, a[i__2].i = 0.f;
-		goto L30;
-	    }
-	    ajj = sqrt(ajj);
-	    i__2 = j + j * a_dim1;
-	    a[i__2].r = ajj, a[i__2].i = 0.f;
-
-/*           Compute elements J+1:N of column J. */
-
-	    if (j < *n) {
-		i__2 = j - 1;
-		clacgv_(&i__2, &a[j + a_dim1], lda);
-		i__2 = *n - j;
-		i__3 = j - 1;
-		q__1.r = -1.f, q__1.i = -0.f;
-		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[j + 1 + a_dim1]
-			, lda, &a[j + a_dim1], lda, &c_b56, &a[j + 1 + j *
-			a_dim1], &c__1);
-		i__2 = j - 1;
-		clacgv_(&i__2, &a[j + a_dim1], lda);
-		i__2 = *n - j;
-		r__1 = 1.f / ajj;
-		csscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
-	    }
-/* L20: */
-	}
-    }
-    goto L40;
-
-L30:
-    *info = j;
-
-L40:
-    return 0;
-
-/*     End of CPOTF2 */
-
-} /* cpotf2_ */
-
-/* Subroutine */ int cpotrf_(char *uplo, integer *n, complex *a, integer *lda,
-	 integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    complex q__1;
-
-    /* Local variables */
-    static integer j, jb, nb;
-    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
-	    integer *, complex *, complex *, integer *, complex *, integer *,
-	    complex *, complex *, integer *), cherk_(char *,
-	    char *, integer *, integer *, real *, complex *, integer *, real *
-	    , complex *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
-	    integer *, integer *, complex *, complex *, integer *, complex *,
-	    integer *);
-    static logical upper;
-    extern /* Subroutine */ int cpotf2_(char *, integer *, complex *, integer
-	    *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CPOTRF computes the Cholesky factorization of a complex Hermitian
-    positive definite matrix A.
-
-    The factorization has the form
-       A = U**H * U,  if UPLO = 'U', or
-       A = L  * L**H,  if UPLO = 'L',
-    where U is an upper triangular matrix and L is lower triangular.
-
-    This is the block version of the algorithm, calling Level 3 BLAS.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
-            N-by-N upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading N-by-N lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
-
-            On exit, if INFO = 0, the factor U or L from the Cholesky
-            factorization A = U**H*U or A = L*L**H.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the leading minor of order i is not
-                  positive definite, and the factorization could not be
-                  completed.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CPOTRF", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Determine the block size for this environment. */
-
-    nb = ilaenv_(&c__1, "CPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
-	    ftnlen)1);
-    if ((nb <= 1) || (nb >= *n)) {
-
-/*        Use unblocked code. */
-
-	cpotf2_(uplo, n, &a[a_offset], lda, info);
-    } else {
-
-/*        Use blocked code. */
-
-	if (upper) {
-
-/*           Compute the Cholesky factorization A = U'*U. */
-
-	    i__1 = *n;
-	    i__2 = nb;
-	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
-
-/*
-                Update and factorize the current diagonal block and test
-                for non-positive-definiteness.
-
-   Computing MIN
-*/
-		i__3 = nb, i__4 = *n - j + 1;
-		jb = min(i__3,i__4);
-		i__3 = j - 1;
-		cherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1290, &
-			a[j * a_dim1 + 1], lda, &c_b1011, &a[j + j * a_dim1],
-			lda);
-		cpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
-		if (*info != 0) {
-		    goto L30;
-		}
-		if (j + jb <= *n) {
-
-/*                 Compute the current block row. */
-
-		    i__3 = *n - j - jb + 1;
-		    i__4 = j - 1;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("Conjugate transpose", "No transpose", &jb, &i__3,
-			    &i__4, &q__1, &a[j * a_dim1 + 1], lda, &a[(j + jb)
-			     * a_dim1 + 1], lda, &c_b56, &a[j + (j + jb) *
-			    a_dim1], lda);
-		    i__3 = *n - j - jb + 1;
-		    ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit",
-			     &jb, &i__3, &c_b56, &a[j + j * a_dim1], lda, &a[
-			    j + (j + jb) * a_dim1], lda);
-		}
-/* L10: */
-	    }
-
-	} else {
-
-/*           Compute the Cholesky factorization A = L*L'. */
-
-	    i__2 = *n;
-	    i__1 = nb;
-	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
-
-/*
-                Update and factorize the current diagonal block and test
-                for non-positive-definiteness.
-
-   Computing MIN
-*/
-		i__3 = nb, i__4 = *n - j + 1;
-		jb = min(i__3,i__4);
-		i__3 = j - 1;
-		cherk_("Lower", "No transpose", &jb, &i__3, &c_b1290, &a[j +
-			a_dim1], lda, &c_b1011, &a[j + j * a_dim1], lda);
-		cpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
-		if (*info != 0) {
-		    goto L30;
-		}
-		if (j + jb <= *n) {
-
-/*                 Compute the current block column. */
-
-		    i__3 = *n - j - jb + 1;
-		    i__4 = j - 1;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "Conjugate transpose", &i__3, &jb,
-			    &i__4, &q__1, &a[j + jb + a_dim1], lda, &a[j +
-			    a_dim1], lda, &c_b56, &a[j + jb + j * a_dim1],
-			    lda);
-		    i__3 = *n - j - jb + 1;
-		    ctrsm_("Right", "Lower", "Conjugate transpose", "Non-unit"
-			    , &i__3, &jb, &c_b56, &a[j + j * a_dim1], lda, &a[
-			    j + jb + j * a_dim1], lda);
-		}
-/* L20: */
-	    }
-	}
-    }
-    goto L40;
-
-L30:
-    *info = *info + j - 1;
-
-L40:
-    return 0;
-
-/*     End of CPOTRF */
-
-} /* cpotrf_ */
-
-/* Subroutine */ int cpotri_(char *uplo, integer *n, complex *a, integer *lda,
-	 integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1;
-
-    /* Local variables */
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *), clauum_(
-	    char *, integer *, complex *, integer *, integer *),
-	    ctrtri_(char *, char *, integer *, complex *, integer *, integer *
-	    );
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
-
-
-    Purpose
-    =======
-
-    CPOTRI computes the inverse of a complex Hermitian positive definite
-    matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
-    computed by CPOTRF.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the triangular factor U or L from the Cholesky
-            factorization A = U**H*U or A = L*L**H, as computed by
-            CPOTRF.
-            On exit, the upper or lower triangle of the (Hermitian)
-            inverse of A, overwriting the input factor U or L.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the (i,i) element of the factor U or L is
-                  zero, and the inverse could not be computed.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CPOTRI", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Invert the triangular Cholesky factor U or L. */
-
-    ctrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info);
-    if (*info > 0) {
-	return 0;
-    }
-
-/*     Form inv(U)*inv(U)' or inv(L)'*inv(L). */
-
-    clauum_(uplo, n, &a[a_offset], lda, info);
-
-    return 0;
-
-/*     End of CPOTRI */
-
-} /* cpotri_ */
-
-/* Subroutine */ int cpotrs_(char *uplo, integer *n, integer *nrhs, complex *
-	a, integer *lda, complex *b, integer *ldb, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
-
-    /* Local variables */
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
-	    integer *, integer *, complex *, complex *, integer *, complex *,
-	    integer *);
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CPOTRS solves a system of linear equations A*X = B with a Hermitian
-    positive definite matrix A using the Cholesky factorization
-    A = U**H*U or A = L*L**H computed by CPOTRF.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrix B.  NRHS >= 0.
-
-    A       (input) COMPLEX array, dimension (LDA,N)
-            The triangular factor U or L from the Cholesky factorization
-            A = U**H*U or A = L*L**H, as computed by CPOTRF.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    B       (input/output) COMPLEX array, dimension (LDB,NRHS)
-            On entry, the right hand side matrix B.
-            On exit, the solution matrix X.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*nrhs < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*ldb < max(1,*n)) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CPOTRS", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*n == 0) || (*nrhs == 0)) {
-	return 0;
-    }
-
-    if (upper) {
-
-/*
-          Solve A*X = B where A = U'*U.
-
-          Solve U'*X = B, overwriting B with X.
-*/
-
-	ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, &
-		c_b56, &a[a_offset], lda, &b[b_offset], ldb);
-
-/*        Solve U*X = B, overwriting B with X. */
-
-	ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b56, &
-		a[a_offset], lda, &b[b_offset], ldb);
-    } else {
-
-/*
-          Solve A*X = B where A = L*L'.
-
-          Solve L*X = B, overwriting B with X.
-*/
-
-	ctrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b56, &
-		a[a_offset], lda, &b[b_offset], ldb);
-
-/*        Solve L'*X = B, overwriting B with X. */
-
-	ctrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, &
-		c_b56, &a[a_offset], lda, &b[b_offset], ldb);
-    }
-
-    return 0;
-
-/*     End of CPOTRS */
-
-} /* cpotrs_ */
-
-/* Subroutine */ int csrot_(integer *n, complex *cx, integer *incx, complex *
-	cy, integer *incy, real *c__, real *s)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    complex q__1, q__2, q__3;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static complex ctemp;
-
-
-/*
-       applies a plane rotation, where the cos and sin (c and s) are real
-       and the vectors cx and cy are complex.
-       jack dongarra, linpack, 3/11/78.
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-          code for unequal increments or equal increments not equal
-            to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = ix;
-	q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
-	i__3 = iy;
-	q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
-	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-	ctemp.r = q__1.r, ctemp.i = q__1.i;
-	i__2 = iy;
-	i__3 = iy;
-	q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
-	i__4 = ix;
-	q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
-	q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
-	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
-	i__2 = ix;
-	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*        code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
-	i__3 = i__;
-	q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
-	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-	ctemp.r = q__1.r, ctemp.i = q__1.i;
-	i__2 = i__;
-	i__3 = i__;
-	q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
-	i__4 = i__;
-	q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
-	q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
-	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
-	i__2 = i__;
-	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
-/* L30: */
-    }
-    return 0;
-} /* csrot_ */
-
-/* Subroutine */ int cstedc_(char *compz, integer *n, real *d__, real *e,
-	complex *z__, integer *ldz, complex *work, integer *lwork, real *
-	rwork, integer *lrwork, integer *iwork, integer *liwork, integer *
-	info)
-{
-    /* System generated locals */
-    integer z_dim1, z_offset, i__1, i__2, i__3, i__4;
-    real r__1, r__2;
-
-    /* Builtin functions */
-    double log(doublereal);
-    integer pow_ii(integer *, integer *);
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, j, k, m;
-    static real p;
-    static integer ii, ll, end, lgn;
-    static real eps, tiny;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
-	    complex *, integer *);
-    static integer lwmin;
-    extern /* Subroutine */ int claed0_(integer *, integer *, real *, real *,
-	    complex *, integer *, complex *, integer *, real *, integer *,
-	    integer *);
-    static integer start;
-    extern /* Subroutine */ int clacrm_(integer *, integer *, complex *,
-	    integer *, real *, integer *, complex *, integer *, real *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
-	    *, integer *, complex *, integer *), xerbla_(char *,
-	    integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *), sstedc_(char *, integer *, real *, real *, real *,
-	    integer *, real *, integer *, integer *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *,
-	    real *, integer *);
-    static integer liwmin, icompz;
-    extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *,
-	    complex *, integer *, real *, integer *);
-    static real orgnrm;
-    extern doublereal slanst_(char *, integer *, real *, real *);
-    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
-    static integer lrwmin;
-    static logical lquery;
-    static integer smlsiz;
-    extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *,
-	    real *, integer *, real *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CSTEDC computes all eigenvalues and, optionally, eigenvectors of a
-    symmetric tridiagonal matrix using the divide and conquer method.
-    The eigenvectors of a full or band complex Hermitian matrix can also
-    be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this
-    matrix to tridiagonal form.
-
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.  See SLAED3 for details.
-
-    Arguments
-    =========
-
-    COMPZ   (input) CHARACTER*1
-            = 'N':  Compute eigenvalues only.
-            = 'I':  Compute eigenvectors of tridiagonal matrix also.
-            = 'V':  Compute eigenvectors of original Hermitian matrix
-                    also.  On entry, Z contains the unitary matrix used
-                    to reduce the original matrix to tridiagonal form.
-
-    N       (input) INTEGER
-            The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    D       (input/output) REAL array, dimension (N)
-            On entry, the diagonal elements of the tridiagonal matrix.
-            On exit, if INFO = 0, the eigenvalues in ascending order.
-
-    E       (input/output) REAL array, dimension (N-1)
-            On entry, the subdiagonal elements of the tridiagonal matrix.
-            On exit, E has been destroyed.
-
-    Z       (input/output) COMPLEX array, dimension (LDZ,N)
-            On entry, if COMPZ = 'V', then Z contains the unitary
-            matrix used in the reduction to tridiagonal form.
-            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
-            orthonormal eigenvectors of the original Hermitian matrix,
-            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
-            of the symmetric tridiagonal matrix.
-            If  COMPZ = 'N', then Z is not referenced.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.  LDZ >= 1.
-            If eigenvectors are desired, then LDZ >= max(1,N).
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1.
-            If COMPZ = 'V' and N > 1, LWORK must be at least N*N.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    RWORK   (workspace/output) REAL array,
-                                           dimension (LRWORK)
-            On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
-
-    LRWORK  (input) INTEGER
-            The dimension of the array RWORK.
-            If COMPZ = 'N' or N <= 1, LRWORK must be at least 1.
-            If COMPZ = 'V' and N > 1, LRWORK must be at least
-                           1 + 3*N + 2*N*lg N + 3*N**2 ,
-                           where lg( N ) = smallest integer k such
-                           that 2**k >= N.
-            If COMPZ = 'I' and N > 1, LRWORK must be at least
-                           1 + 4*N + 2*N**2 .
-
-            If LRWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the RWORK array,
-            returns this value as the first entry of the RWORK array, and
-            no error message related to LRWORK is issued by XERBLA.
-
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
-            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
-
-    LIWORK  (input) INTEGER
-            The dimension of the array IWORK.
-            If COMPZ = 'N' or N <= 1, LIWORK must be at least 1.
-            If COMPZ = 'V' or N > 1,  LIWORK must be at least
-                                      6 + 6*N + 5*N*lg N.
-            If COMPZ = 'I' or N > 1,  LIWORK must be at least
-                                      3 + 5*N .
-
-            If LIWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the IWORK array,
-            returns this value as the first entry of the IWORK array, and
-            no error message related to LIWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The algorithm failed to compute an eigenvalue while
-                  working on the submatrix lying in rows and columns
-                  INFO/(N+1) through mod(INFO,N+1).
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --work;
-    --rwork;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-    lquery = ((*lwork == -1) || (*lrwork == -1)) || (*liwork == -1);
-
-    if (lsame_(compz, "N")) {
-	icompz = 0;
-    } else if (lsame_(compz, "V")) {
-	icompz = 1;
-    } else if (lsame_(compz, "I")) {
-	icompz = 2;
-    } else {
-	icompz = -1;
-    }
-    if ((*n <= 1) || (icompz <= 0)) {
-	lwmin = 1;
-	liwmin = 1;
-	lrwmin = 1;
-    } else {
-	lgn = (integer) (log((real) (*n)) / log(2.f));
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (icompz == 1) {
-	    lwmin = *n * *n;
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lrwmin = *n * 3 + 1 + ((*n) << (1)) * lgn + i__1 * i__1 * 3;
-	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
-	} else if (icompz == 2) {
-	    lwmin = 1;
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lrwmin = ((*n) << (2)) + 1 + ((i__1 * i__1) << (1));
-	    liwmin = *n * 5 + 3;
-	}
-    }
-    if (icompz < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
-	*info = -6;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*lrwork < lrwmin && ! lquery) {
-	*info = -10;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
-	work[1].r = (real) lwmin, work[1].i = 0.f;
-	rwork[1] = (real) lrwmin;
-	iwork[1] = liwmin;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CSTEDC", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-    if (*n == 1) {
-	if (icompz != 0) {
-	    i__1 = z_dim1 + 1;
-	    z__[i__1].r = 1.f, z__[i__1].i = 0.f;
-	}
-	return 0;
-    }
-
-    smlsiz = ilaenv_(&c__9, "CSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-
-/*
-       If the following conditional clause is removed, then the routine
-       will use the Divide and Conquer routine to compute only the
-       eigenvalues, which requires (3N + 3N**2) real workspace and
-       (2 + 5N + 2N lg(N)) integer workspace.
-       Since on many architectures SSTERF is much faster than any other
-       algorithm for finding eigenvalues only, it is used here
-       as the default.
-
-       If COMPZ = 'N', use SSTERF to compute the eigenvalues.
-*/
-
-    if (icompz == 0) {
-	ssterf_(n, &d__[1], &e[1], info);
-	return 0;
-    }
-
-/*
-       If N is smaller than the minimum divide size (SMLSIZ+1), then
-       solve the problem with another solver.
-*/
-
-    if (*n <= smlsiz) {
-	if (icompz == 0) {
-	    ssterf_(n, &d__[1], &e[1], info);
-	    return 0;
-	} else if (icompz == 2) {
-	    csteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
-		    info);
-	    return 0;
-	} else {
-	    csteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
-		    info);
-	    return 0;
-	}
-    }
-
-/*     If COMPZ = 'I', we simply call SSTEDC instead. */
-
-    if (icompz == 2) {
-	slaset_("Full", n, n, &c_b320, &c_b1011, &rwork[1], n);
-	ll = *n * *n + 1;
-	i__1 = *lrwork - ll + 1;
-	sstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, &
-		iwork[1], liwork, info);
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *n;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * z_dim1;
-		i__4 = (j - 1) * *n + i__;
-		z__[i__3].r = rwork[i__4], z__[i__3].i = 0.f;
-/* L10: */
-	    }
-/* L20: */
-	}
-	return 0;
-    }
-
-/*
-       From now on, only option left to be handled is COMPZ = 'V',
-       i.e. ICOMPZ = 1.
-
-       Scale.
-*/
-
-    orgnrm = slanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.f) {
-	return 0;
-    }
-
-    eps = slamch_("Epsilon");
-
-    start = 1;
-
-/*     while ( START <= N ) */
-
-L30:
-    if (start <= *n) {
-
-/*
-       Let END be the position of the next subdiagonal entry such that
-       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
-       matrix identified by the elements between START and END
-       constitutes an independent sub-problem.
-*/
-
-	end = start;
-L40:
-	if (end < *n) {
-	    tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 =
-		    d__[end + 1], dabs(r__2)));
-	    if ((r__1 = e[end], dabs(r__1)) > tiny) {
-		++end;
-		goto L40;
-	    }
-	}
-
-/*        (Sub) Problem determined.  Compute its size and solve it. */
-
-	m = end - start + 1;
-	if (m > smlsiz) {
-	    *info = smlsiz;
-
-/*           Scale. */
-
-	    orgnrm = slanst_("M", &m, &d__[start], &e[start]);
-	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &m, &c__1, &d__[
-		    start], &m, info);
-	    i__1 = m - 1;
-	    i__2 = m - 1;
-	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &i__1, &c__1, &e[
-		    start], &i__2, info);
-
-	    claed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1],
-		    ldz, &work[1], n, &rwork[1], &iwork[1], info);
-	    if (*info > 0) {
-		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
-			+ 1) + start - 1;
-		return 0;
-	    }
-
-/*           Scale back. */
-
-	    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, &m, &c__1, &d__[
-		    start], &m, info);
-
-	} else {
-	    ssteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m *
-		     m + 1], info);
-	    clacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, &
-		    work[1], n, &rwork[m * m + 1]);
-	    clacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz);
-	    if (*info > 0) {
-		*info = start * (*n + 1) + end;
-		return 0;
-	    }
-	}
-
-	start = end + 1;
-	goto L30;
-    }
-
-/*
-       endwhile
-
-       If the problem split any number of times, then the eigenvalues
-       will not be properly ordered.  Here we permute the eigenvalues
-       (and the associated eigenvectors) into ascending order.
-*/
-
-    if (m != *n) {
-
-/*        Use Selection Sort to minimize swaps of eigenvectors */
-
-	i__1 = *n;
-	for (ii = 2; ii <= i__1; ++ii) {
-	    i__ = ii - 1;
-	    k = i__;
-	    p = d__[i__];
-	    i__2 = *n;
-	    for (j = ii; j <= i__2; ++j) {
-		if (d__[j] < p) {
-		    k = j;
-		    p = d__[j];
-		}
-/* L50: */
-	    }
-	    if (k != i__) {
-		d__[k] = d__[i__];
-		d__[i__] = p;
-		cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
-			 &c__1);
-	    }
-/* L60: */
-	}
-    }
-
-    work[1].r = (real) lwmin, work[1].i = 0.f;
-    rwork[1] = (real) lrwmin;
-    iwork[1] = liwmin;
-
-    return 0;
-
-/*     End of CSTEDC */
-
-} /* cstedc_ */
-
-/* Subroutine */ int csteqr_(char *compz, integer *n, real *d__, real *e,
-	complex *z__, integer *ldz, real *work, integer *info)
-{
-    /* System generated locals */
-    integer z_dim1, z_offset, i__1, i__2;
-    real r__1, r__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal), r_sign(real *, real *);
-
-    /* Local variables */
-    static real b, c__, f, g;
-    static integer i__, j, k, l, m;
-    static real p, r__, s;
-    static integer l1, ii, mm, lm1, mm1, nm1;
-    static real rt1, rt2, eps;
-    static integer lsv;
-    static real tst, eps2;
-    static integer lend, jtot;
-    extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *)
-	    ;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int clasr_(char *, char *, char *, integer *,
-	    integer *, real *, real *, complex *, integer *);
-    static real anorm;
-    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
-	    complex *, integer *);
-    static integer lendm1, lendp1;
-    extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real *
-	    , real *, real *);
-    extern doublereal slapy2_(real *, real *);
-    static integer iscale;
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int claset_(char *, integer *, integer *, complex
-	    *, complex *, complex *, integer *);
-    static real safmin;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static real safmax;
-    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *);
-    static integer lendsv;
-    extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
-	    );
-    static real ssfmin;
-    static integer nmaxit, icompz;
-    static real ssfmax;
-    extern doublereal slanst_(char *, integer *, real *, real *);
-    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CSTEQR computes all eigenvalues and, optionally, eigenvectors of a
-    symmetric tridiagonal matrix using the implicit QL or QR method.
-    The eigenvectors of a full or band complex Hermitian matrix can also
-    be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this
-    matrix to tridiagonal form.
-
-    Arguments
-    =========
-
-    COMPZ   (input) CHARACTER*1
-            = 'N':  Compute eigenvalues only.
-            = 'V':  Compute eigenvalues and eigenvectors of the original
-                    Hermitian matrix.  On entry, Z must contain the
-                    unitary matrix used to reduce the original matrix
-                    to tridiagonal form.
-            = 'I':  Compute eigenvalues and eigenvectors of the
-                    tridiagonal matrix.  Z is initialized to the identity
-                    matrix.
-
-    N       (input) INTEGER
-            The order of the matrix.  N >= 0.
-
-    D       (input/output) REAL array, dimension (N)
-            On entry, the diagonal elements of the tridiagonal matrix.
-            On exit, if INFO = 0, the eigenvalues in ascending order.
-
-    E       (input/output) REAL array, dimension (N-1)
-            On entry, the (n-1) subdiagonal elements of the tridiagonal
-            matrix.
-            On exit, E has been destroyed.
-
-    Z       (input/output) COMPLEX array, dimension (LDZ, N)
-            On entry, if  COMPZ = 'V', then Z contains the unitary
-            matrix used in the reduction to tridiagonal form.
-            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
-            orthonormal eigenvectors of the original Hermitian matrix,
-            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
-            of the symmetric tridiagonal matrix.
-            If COMPZ = 'N', then Z is not referenced.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.  LDZ >= 1, and if
-            eigenvectors are desired, then  LDZ >= max(1,N).
-
-    WORK    (workspace) REAL array, dimension (max(1,2*N-2))
-            If COMPZ = 'N', then WORK is not referenced.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  the algorithm has failed to find all the eigenvalues in
-                  a total of 30*N iterations; if INFO = i, then i
-                  elements of E have not converged to zero; on exit, D
-                  and E contain the elements of a symmetric tridiagonal
-                  matrix which is unitarily similar to the original
-                  matrix.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-
-    if (lsame_(compz, "N")) {
-	icompz = 0;
-    } else if (lsame_(compz, "V")) {
-	icompz = 1;
-    } else if (lsame_(compz, "I")) {
-	icompz = 2;
-    } else {
-	icompz = -1;
-    }
-    if (icompz < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
-	*info = -6;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CSTEQR", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (*n == 1) {
-	if (icompz == 2) {
-	    i__1 = z_dim1 + 1;
-	    z__[i__1].r = 1.f, z__[i__1].i = 0.f;
-	}
-	return 0;
-    }
-
-/*     Determine the unit roundoff and over/underflow thresholds. */
-
-    eps = slamch_("E");
-/* Computing 2nd power */
-    r__1 = eps;
-    eps2 = r__1 * r__1;
-    safmin = slamch_("S");
-    safmax = 1.f / safmin;
-    ssfmax = sqrt(safmax) / 3.f;
-    ssfmin = sqrt(safmin) / eps2;
-
-/*
-       Compute the eigenvalues and eigenvectors of the tridiagonal
-       matrix.
-*/
-
-    if (icompz == 2) {
-	claset_("Full", n, n, &c_b55, &c_b56, &z__[z_offset], ldz);
-    }
-
-    nmaxit = *n * 30;
-    jtot = 0;
-
-/*
-       Determine where the matrix splits and choose QL or QR iteration
-       for each block, according to whether top or bottom diagonal
-       element is smaller.
-*/
-
-    l1 = 1;
-    nm1 = *n - 1;
-
-L10:
-    if (l1 > *n) {
-	goto L160;
-    }
-    if (l1 > 1) {
-	e[l1 - 1] = 0.f;
-    }
-    if (l1 <= nm1) {
-	i__1 = nm1;
-	for (m = l1; m <= i__1; ++m) {
-	    tst = (r__1 = e[m], dabs(r__1));
-	    if (tst == 0.f) {
-		goto L30;
-	    }
-	    if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m
-		    + 1], dabs(r__2))) * eps) {
-		e[m] = 0.f;
-		goto L30;
-	    }
-/* L20: */
-	}
-    }
-    m = *n;
-
-L30:
-    l = l1;
-    lsv = l;
-    lend = m;
-    lendsv = lend;
-    l1 = m + 1;
-    if (lend == l) {
-	goto L10;
-    }
-
-/*     Scale submatrix in rows and columns L to LEND */
-
-    i__1 = lend - l + 1;
-    anorm = slanst_("I", &i__1, &d__[l], &e[l]);
-    iscale = 0;
-    if (anorm == 0.f) {
-	goto L10;
-    }
-    if (anorm > ssfmax) {
-	iscale = 1;
-	i__1 = lend - l + 1;
-	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
-		info);
-	i__1 = lend - l;
-	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
-		info);
-    } else if (anorm < ssfmin) {
-	iscale = 2;
-	i__1 = lend - l + 1;
-	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
-		info);
-	i__1 = lend - l;
-	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
-		info);
-    }
-
-/*     Choose between QL and QR iteration */
-
-    if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) {
-	lend = lsv;
-	l = lendsv;
-    }
-
-    if (lend > l) {
-
-/*
-          QL Iteration
-
-          Look for small subdiagonal element.
-*/
-
-L40:
-	if (l != lend) {
-	    lendm1 = lend - 1;
-	    i__1 = lendm1;
-	    for (m = l; m <= i__1; ++m) {
-/* Computing 2nd power */
-		r__2 = (r__1 = e[m], dabs(r__1));
-		tst = r__2 * r__2;
-		if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
-			+ 1], dabs(r__2)) + safmin) {
-		    goto L60;
-		}
-/* L50: */
-	    }
-	}
-
-	m = lend;
-
-L60:
-	if (m < lend) {
-	    e[m] = 0.f;
-	}
-	p = d__[l];
-	if (m == l) {
-	    goto L80;
-	}
-
-/*
-          If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
-          to compute its eigensystem.
-*/
-
-	if (m == l + 1) {
-	    if (icompz > 0) {
-		slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
-		work[l] = c__;
-		work[*n - 1 + l] = s;
-		clasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
-			z__[l * z_dim1 + 1], ldz);
-	    } else {
-		slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
-	    }
-	    d__[l] = rt1;
-	    d__[l + 1] = rt2;
-	    e[l] = 0.f;
-	    l += 2;
-	    if (l <= lend) {
-		goto L40;
-	    }
-	    goto L140;
-	}
-
-	if (jtot == nmaxit) {
-	    goto L140;
-	}
-	++jtot;
-
-/*        Form shift. */
-
-	g = (d__[l + 1] - p) / (e[l] * 2.f);
-	r__ = slapy2_(&g, &c_b1011);
-	g = d__[m] - p + e[l] / (g + r_sign(&r__, &g));
-
-	s = 1.f;
-	c__ = 1.f;
-	p = 0.f;
-
-/*        Inner loop */
-
-	mm1 = m - 1;
-	i__1 = l;
-	for (i__ = mm1; i__ >= i__1; --i__) {
-	    f = s * e[i__];
-	    b = c__ * e[i__];
-	    slartg_(&g, &f, &c__, &s, &r__);
-	    if (i__ != m - 1) {
-		e[i__ + 1] = r__;
-	    }
-	    g = d__[i__ + 1] - p;
-	    r__ = (d__[i__] - g) * s + c__ * 2.f * b;
-	    p = s * r__;
-	    d__[i__ + 1] = g + p;
-	    g = c__ * r__ - b;
-
-/*           If eigenvectors are desired, then save rotations. */
-
-	    if (icompz > 0) {
-		work[i__] = c__;
-		work[*n - 1 + i__] = -s;
-	    }
-
-/* L70: */
-	}
-
-/*        If eigenvectors are desired, then apply saved rotations. */
-
-	if (icompz > 0) {
-	    mm = m - l + 1;
-	    clasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
-		    * z_dim1 + 1], ldz);
-	}
-
-	d__[l] -= p;
-	e[l] = g;
-	goto L40;
-
-/*        Eigenvalue found. */
-
-L80:
-	d__[l] = p;
-
-	++l;
-	if (l <= lend) {
-	    goto L40;
-	}
-	goto L140;
-
-    } else {
-
-/*
-          QR Iteration
-
-          Look for small superdiagonal element.
-*/
-
-L90:
-	if (l != lend) {
-	    lendp1 = lend + 1;
-	    i__1 = lendp1;
-	    for (m = l; m >= i__1; --m) {
-/* Computing 2nd power */
-		r__2 = (r__1 = e[m - 1], dabs(r__1));
-		tst = r__2 * r__2;
-		if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
-			- 1], dabs(r__2)) + safmin) {
-		    goto L110;
-		}
-/* L100: */
-	    }
-	}
-
-	m = lend;
-
-L110:
-	if (m > lend) {
-	    e[m - 1] = 0.f;
-	}
-	p = d__[l];
-	if (m == l) {
-	    goto L130;
-	}
-
-/*
-          If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
-          to compute its eigensystem.
-*/
-
-	if (m == l - 1) {
-	    if (icompz > 0) {
-		slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
-			;
-		work[m] = c__;
-		work[*n - 1 + m] = s;
-		clasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
-			z__[(l - 1) * z_dim1 + 1], ldz);
-	    } else {
-		slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
-	    }
-	    d__[l - 1] = rt1;
-	    d__[l] = rt2;
-	    e[l - 1] = 0.f;
-	    l += -2;
-	    if (l >= lend) {
-		goto L90;
-	    }
-	    goto L140;
-	}
-
-	if (jtot == nmaxit) {
-	    goto L140;
-	}
-	++jtot;
-
-/*        Form shift. */
-
-	g = (d__[l - 1] - p) / (e[l - 1] * 2.f);
-	r__ = slapy2_(&g, &c_b1011);
-	g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g));
-
-	s = 1.f;
-	c__ = 1.f;
-	p = 0.f;
-
-/*        Inner loop */
-
-	lm1 = l - 1;
-	i__1 = lm1;
-	for (i__ = m; i__ <= i__1; ++i__) {
-	    f = s * e[i__];
-	    b = c__ * e[i__];
-	    slartg_(&g, &f, &c__, &s, &r__);
-	    if (i__ != m) {
-		e[i__ - 1] = r__;
-	    }
-	    g = d__[i__] - p;
-	    r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b;
-	    p = s * r__;
-	    d__[i__] = g + p;
-	    g = c__ * r__ - b;
-
-/*           If eigenvectors are desired, then save rotations. */
-
-	    if (icompz > 0) {
-		work[i__] = c__;
-		work[*n - 1 + i__] = s;
-	    }
-
-/* L120: */
-	}
-
-/*        If eigenvectors are desired, then apply saved rotations. */
-
-	if (icompz > 0) {
-	    mm = l - m + 1;
-	    clasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
-		    * z_dim1 + 1], ldz);
-	}
-
-	d__[l] -= p;
-	e[lm1] = g;
-	goto L90;
-
-/*        Eigenvalue found. */
-
-L130:
-	d__[l] = p;
-
-	--l;
-	if (l >= lend) {
-	    goto L90;
-	}
-	goto L140;
-
-    }
-
-/*     Undo scaling if necessary */
-
-L140:
-    if (iscale == 1) {
-	i__1 = lendsv - lsv + 1;
-	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
-		n, info);
-	i__1 = lendsv - lsv;
-	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
-		info);
-    } else if (iscale == 2) {
-	i__1 = lendsv - lsv + 1;
-	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
-		n, info);
-	i__1 = lendsv - lsv;
-	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
-		info);
-    }
-
-/*
-       Check for no convergence to an eigenvalue after a total
-       of N*MAXIT iterations.
-*/
-
-    if (jtot == nmaxit) {
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (e[i__] != 0.f) {
-		++(*info);
-	    }
-/* L150: */
-	}
-	return 0;
-    }
-    goto L10;
-
-/*     Order eigenvalues and eigenvectors. */
-
-L160:
-    if (icompz == 0) {
-
-/*        Use Quick Sort */
-
-	slasrt_("I", n, &d__[1], info);
-
-    } else {
-
-/*        Use Selection Sort to minimize swaps of eigenvectors */
-
-	i__1 = *n;
-	for (ii = 2; ii <= i__1; ++ii) {
-	    i__ = ii - 1;
-	    k = i__;
-	    p = d__[i__];
-	    i__2 = *n;
-	    for (j = ii; j <= i__2; ++j) {
-		if (d__[j] < p) {
-		    k = j;
-		    p = d__[j];
-		}
-/* L170: */
-	    }
-	    if (k != i__) {
-		d__[k] = d__[i__];
-		d__[i__] = p;
-		cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
-			 &c__1);
-	    }
-/* L180: */
-	}
-    }
-    return 0;
-
-/*     End of CSTEQR */
-
-} /* csteqr_ */
-
-/* Subroutine */ int ctrevc_(char *side, char *howmny, logical *select,
-	integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl,
-	complex *vr, integer *ldvr, integer *mm, integer *m, complex *work,
-	real *rwork, integer *info)
-{
-    /* System generated locals */
-    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
-	    i__2, i__3, i__4, i__5;
-    real r__1, r__2, r__3;
-    complex q__1, q__2;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, k, ii, ki, is;
-    static real ulp;
-    static logical allv;
-    static real unfl, ovfl, smin;
-    static logical over;
-    static real scale;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
-	    , complex *, integer *, complex *, integer *, complex *, complex *
-	    , integer *);
-    static real remax;
-    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
-	    complex *, integer *);
-    static logical leftv, bothv, somev;
-    extern /* Subroutine */ int slabad_(real *, real *);
-    extern integer icamax_(integer *, complex *, integer *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
-	    *), xerbla_(char *, integer *), clatrs_(char *, char *,
-	    char *, char *, integer *, complex *, integer *, complex *, real *
-	    , real *, integer *);
-    extern doublereal scasum_(integer *, complex *, integer *);
-    static logical rightv;
-    static real smlnum;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CTREVC computes some or all of the right and/or left eigenvectors of
-    a complex upper triangular matrix T.
-
-    The right eigenvector x and the left eigenvector y of T corresponding
-    to an eigenvalue w are defined by:
-
-                 T*x = w*x,     y'*T = w*y'
-
-    where y' denotes the conjugate transpose of the vector y.
-
-    If all eigenvectors are requested, the routine may either return the
-    matrices X and/or Y of right or left eigenvectors of T, or the
-    products Q*X and/or Q*Y, where Q is an input unitary
-    matrix. If T was obtained from the Schur factorization of an
-    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
-    right or left eigenvectors of A.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'R':  compute right eigenvectors only;
-            = 'L':  compute left eigenvectors only;
-            = 'B':  compute both right and left eigenvectors.
-
-    HOWMNY  (input) CHARACTER*1
-            = 'A':  compute all right and/or left eigenvectors;
-            = 'B':  compute all right and/or left eigenvectors,
-                    and backtransform them using the input matrices
-                    supplied in VR and/or VL;
-            = 'S':  compute selected right and/or left eigenvectors,
-                    specified by the logical array SELECT.
-
-    SELECT  (input) LOGICAL array, dimension (N)
-            If HOWMNY = 'S', SELECT specifies the eigenvectors to be
-            computed.
-            If HOWMNY = 'A' or 'B', SELECT is not referenced.
-            To select the eigenvector corresponding to the j-th
-            eigenvalue, SELECT(j) must be set to .TRUE..
-
-    N       (input) INTEGER
-            The order of the matrix T. N >= 0.
-
-    T       (input/output) COMPLEX array, dimension (LDT,N)
-            The upper triangular matrix T.  T is modified, but restored
-            on exit.
-
-    LDT     (input) INTEGER
-            The leading dimension of the array T. LDT >= max(1,N).
-
-    VL      (input/output) COMPLEX array, dimension (LDVL,MM)
-            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
-            contain an N-by-N matrix Q (usually the unitary matrix Q of
-            Schur vectors returned by CHSEQR).
-            On exit, if SIDE = 'L' or 'B', VL contains:
-            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
-                             VL is lower triangular. The i-th column
-                             VL(i) of VL is the eigenvector corresponding
-                             to T(i,i).
-            if HOWMNY = 'B', the matrix Q*Y;
-            if HOWMNY = 'S', the left eigenvectors of T specified by
-                             SELECT, stored consecutively in the columns
-                             of VL, in the same order as their
-                             eigenvalues.
-            If SIDE = 'R', VL is not referenced.
-
-    LDVL    (input) INTEGER
-            The leading dimension of the array VL.  LDVL >= max(1,N) if
-            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
-
-    VR      (input/output) COMPLEX array, dimension (LDVR,MM)
-            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
-            contain an N-by-N matrix Q (usually the unitary matrix Q of
-            Schur vectors returned by CHSEQR).
-            On exit, if SIDE = 'R' or 'B', VR contains:
-            if HOWMNY = 'A', the matrix X of right eigenvectors of T;
-                             VR is upper triangular. The i-th column
-                             VR(i) of VR is the eigenvector corresponding
-                             to T(i,i).
-            if HOWMNY = 'B', the matrix Q*X;
-            if HOWMNY = 'S', the right eigenvectors of T specified by
-                             SELECT, stored consecutively in the columns
-                             of VR, in the same order as their
-                             eigenvalues.
-            If SIDE = 'L', VR is not referenced.
-
-    LDVR    (input) INTEGER
-            The leading dimension of the array VR.  LDVR >= max(1,N) if
-             SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
-
-    MM      (input) INTEGER
-            The number of columns in the arrays VL and/or VR. MM >= M.
-
-    M       (output) INTEGER
-            The number of columns in the arrays VL and/or VR actually
-            used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
-            is set to N.  Each selected eigenvector occupies one
-            column.
-
-    WORK    (workspace) COMPLEX array, dimension (2*N)
-
-    RWORK   (workspace) REAL array, dimension (N)
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    The algorithm used in this program is basically backward (forward)
-    substitution, with scaling to make the code robust against
-    possible overflow.
-
-    Each eigenvector is normalized so that the element of largest
-    magnitude has magnitude 1; here the magnitude of a complex number
-    (x,y) is taken to be |x| + |y|.
-
-    =====================================================================
-
-
-       Decode and test the input parameters
-*/
-
-    /* Parameter adjustments */
-    --select;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-    vl_dim1 = *ldvl;
-    vl_offset = 1 + vl_dim1;
-    vl -= vl_offset;
-    vr_dim1 = *ldvr;
-    vr_offset = 1 + vr_dim1;
-    vr -= vr_offset;
-    --work;
-    --rwork;
-
-    /* Function Body */
-    bothv = lsame_(side, "B");
-    rightv = (lsame_(side, "R")) || (bothv);
-    leftv = (lsame_(side, "L")) || (bothv);
-
-    allv = lsame_(howmny, "A");
-    over = lsame_(howmny, "B");
-    somev = lsame_(howmny, "S");
-
-/*
-       Set M to the number of columns required to store the selected
-       eigenvectors.
-*/
-
-    if (somev) {
-	*m = 0;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (select[j]) {
-		++(*m);
-	    }
-/* L10: */
-	}
-    } else {
-	*m = *n;
-    }
-
-    *info = 0;
-    if (! rightv && ! leftv) {
-	*info = -1;
-    } else if (! allv && ! over && ! somev) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if (*ldt < max(1,*n)) {
-	*info = -6;
-    } else if ((*ldvl < 1) || (leftv && *ldvl < *n)) {
-	*info = -8;
-    } else if ((*ldvr < 1) || (rightv && *ldvr < *n)) {
-	*info = -10;
-    } else if (*mm < *m) {
-	*info = -11;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CTREVC", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Set the constants to control overflow. */
-
-    unfl = slamch_("Safe minimum");
-    ovfl = 1.f / unfl;
-    slabad_(&unfl, &ovfl);
-    ulp = slamch_("Precision");
-    smlnum = unfl * (*n / ulp);
-
-/*     Store the diagonal elements of T in working array WORK. */
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__ + *n;
-	i__3 = i__ + i__ * t_dim1;
-	work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i;
-/* L20: */
-    }
-
-/*
-       Compute 1-norm of each column of strictly upper triangular
-       part of T to control overflow in triangular solver.
-*/
-
-    rwork[1] = 0.f;
-    i__1 = *n;
-    for (j = 2; j <= i__1; ++j) {
-	i__2 = j - 1;
-	rwork[j] = scasum_(&i__2, &t[j * t_dim1 + 1], &c__1);
-/* L30: */
-    }
-
-    if (rightv) {
-
-/*        Compute right eigenvectors. */
-
-	is = *m;
-	for (ki = *n; ki >= 1; --ki) {
-
-	    if (somev) {
-		if (! select[ki]) {
-		    goto L80;
-		}
-	    }
-/* Computing MAX */
-	    i__1 = ki + ki * t_dim1;
-	    r__3 = ulp * ((r__1 = t[i__1].r, dabs(r__1)) + (r__2 = r_imag(&t[
-		    ki + ki * t_dim1]), dabs(r__2)));
-	    smin = dmax(r__3,smlnum);
-
-	    work[1].r = 1.f, work[1].i = 0.f;
-
-/*           Form right-hand side. */
-
-	    i__1 = ki - 1;
-	    for (k = 1; k <= i__1; ++k) {
-		i__2 = k;
-		i__3 = k + ki * t_dim1;
-		q__1.r = -t[i__3].r, q__1.i = -t[i__3].i;
-		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
-/* L40: */
-	    }
-
-/*
-             Solve the triangular system:
-                (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK.
-*/
-
-	    i__1 = ki - 1;
-	    for (k = 1; k <= i__1; ++k) {
-		i__2 = k + k * t_dim1;
-		i__3 = k + k * t_dim1;
-		i__4 = ki + ki * t_dim1;
-		q__1.r = t[i__3].r - t[i__4].r, q__1.i = t[i__3].i - t[i__4]
-			.i;
-		t[i__2].r = q__1.r, t[i__2].i = q__1.i;
-		i__2 = k + k * t_dim1;
-		if ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k *
-			 t_dim1]), dabs(r__2)) < smin) {
-		    i__3 = k + k * t_dim1;
-		    t[i__3].r = smin, t[i__3].i = 0.f;
-		}
-/* L50: */
-	    }
-
-	    if (ki > 1) {
-		i__1 = ki - 1;
-		clatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[
-			t_offset], ldt, &work[1], &scale, &rwork[1], info);
-		i__1 = ki;
-		work[i__1].r = scale, work[i__1].i = 0.f;
-	    }
-
-/*           Copy the vector x or Q*x to VR and normalize. */
-
-	    if (! over) {
-		ccopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1);
-
-		ii = icamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
-		i__1 = ii + is * vr_dim1;
-		remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 =
-			r_imag(&vr[ii + is * vr_dim1]), dabs(r__2)));
-		csscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
-
-		i__1 = *n;
-		for (k = ki + 1; k <= i__1; ++k) {
-		    i__2 = k + is * vr_dim1;
-		    vr[i__2].r = 0.f, vr[i__2].i = 0.f;
-/* L60: */
-		}
-	    } else {
-		if (ki > 1) {
-		    i__1 = ki - 1;
-		    q__1.r = scale, q__1.i = 0.f;
-		    cgemv_("N", n, &i__1, &c_b56, &vr[vr_offset], ldvr, &work[
-			    1], &c__1, &q__1, &vr[ki * vr_dim1 + 1], &c__1);
-		}
-
-		ii = icamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
-		i__1 = ii + ki * vr_dim1;
-		remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 =
-			r_imag(&vr[ii + ki * vr_dim1]), dabs(r__2)));
-		csscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
-	    }
-
-/*           Set back the original diagonal elements of T. */
-
-	    i__1 = ki - 1;
-	    for (k = 1; k <= i__1; ++k) {
-		i__2 = k + k * t_dim1;
-		i__3 = k + *n;
-		t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i;
-/* L70: */
-	    }
-
-	    --is;
-L80:
-	    ;
-	}
-    }
-
-    if (leftv) {
-
-/*        Compute left eigenvectors. */
-
-	is = 1;
-	i__1 = *n;
-	for (ki = 1; ki <= i__1; ++ki) {
-
-	    if (somev) {
-		if (! select[ki]) {
-		    goto L130;
-		}
-	    }
-/* Computing MAX */
-	    i__2 = ki + ki * t_dim1;
-	    r__3 = ulp * ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[
-		    ki + ki * t_dim1]), dabs(r__2)));
-	    smin = dmax(r__3,smlnum);
-
-	    i__2 = *n;
-	    work[i__2].r = 1.f, work[i__2].i = 0.f;
-
-/*           Form right-hand side. */
-
-	    i__2 = *n;
-	    for (k = ki + 1; k <= i__2; ++k) {
-		i__3 = k;
-		r_cnjg(&q__2, &t[ki + k * t_dim1]);
-		q__1.r = -q__2.r, q__1.i = -q__2.i;
-		work[i__3].r = q__1.r, work[i__3].i = q__1.i;
-/* L90: */
-	    }
-
-/*
-             Solve the triangular system:
-                (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK.
-*/
-
-	    i__2 = *n;
-	    for (k = ki + 1; k <= i__2; ++k) {
-		i__3 = k + k * t_dim1;
-		i__4 = k + k * t_dim1;
-		i__5 = ki + ki * t_dim1;
-		q__1.r = t[i__4].r - t[i__5].r, q__1.i = t[i__4].i - t[i__5]
-			.i;
-		t[i__3].r = q__1.r, t[i__3].i = q__1.i;
-		i__3 = k + k * t_dim1;
-		if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k *
-			 t_dim1]), dabs(r__2)) < smin) {
-		    i__4 = k + k * t_dim1;
-		    t[i__4].r = smin, t[i__4].i = 0.f;
-		}
-/* L100: */
-	    }
-
-	    if (ki < *n) {
-		i__2 = *n - ki;
-		clatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", &
-			i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki +
-			1], &scale, &rwork[1], info);
-		i__2 = ki;
-		work[i__2].r = scale, work[i__2].i = 0.f;
-	    }
-
-/*           Copy the vector x or Q*x to VL and normalize. */
-
-	    if (! over) {
-		i__2 = *n - ki + 1;
-		ccopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1)
-			;
-
-		i__2 = *n - ki + 1;
-		ii = icamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1;
-		i__2 = ii + is * vl_dim1;
-		remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 =
-			r_imag(&vl[ii + is * vl_dim1]), dabs(r__2)));
-		i__2 = *n - ki + 1;
-		csscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
-
-		i__2 = ki - 1;
-		for (k = 1; k <= i__2; ++k) {
-		    i__3 = k + is * vl_dim1;
-		    vl[i__3].r = 0.f, vl[i__3].i = 0.f;
-/* L110: */
-		}
-	    } else {
-		if (ki < *n) {
-		    i__2 = *n - ki;
-		    q__1.r = scale, q__1.i = 0.f;
-		    cgemv_("N", n, &i__2, &c_b56, &vl[(ki + 1) * vl_dim1 + 1],
-			     ldvl, &work[ki + 1], &c__1, &q__1, &vl[ki *
-			    vl_dim1 + 1], &c__1);
-		}
-
-		ii = icamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
-		i__2 = ii + ki * vl_dim1;
-		remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 =
-			r_imag(&vl[ii + ki * vl_dim1]), dabs(r__2)));
-		csscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
-	    }
-
-/*           Set back the original diagonal elements of T. */
-
-	    i__2 = *n;
-	    for (k = ki + 1; k <= i__2; ++k) {
-		i__3 = k + k * t_dim1;
-		i__4 = k + *n;
-		t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i;
-/* L120: */
-	    }
-
-	    ++is;
-L130:
-	    ;
-	}
-    }
-
-    return 0;
-
-/*     End of CTREVC */
-
-} /* ctrevc_ */
-
-/* Subroutine */ int ctrti2_(char *uplo, char *diag, integer *n, complex *a,
-	integer *lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    complex q__1;
-
-    /* Builtin functions */
-    void c_div(complex *, complex *, complex *);
-
-    /* Local variables */
-    static integer j;
-    static complex ajj;
-    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    static logical upper;
-    extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *,
-	    complex *, integer *, complex *, integer *), xerbla_(char *, integer *);
-    static logical nounit;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CTRTI2 computes the inverse of a complex upper or lower triangular
-    matrix.
-
-    This is the Level 2 BLAS version of the algorithm.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the matrix A is upper or lower triangular.
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    DIAG    (input) CHARACTER*1
-            Specifies whether or not the matrix A is unit triangular.
-            = 'N':  Non-unit triangular
-            = 'U':  Unit triangular
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the triangular matrix A.  If UPLO = 'U', the
-            leading n by n upper triangular part of the array A contains
-            the upper triangular matrix, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading n by n lower triangular part of the array A contains
-            the lower triangular matrix, and the strictly upper
-            triangular part of A is not referenced.  If DIAG = 'U', the
-            diagonal elements of A are also not referenced and are
-            assumed to be 1.
-
-            On exit, the (triangular) inverse of the original matrix, in
-            the same storage format.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    nounit = lsame_(diag, "N");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (! nounit && ! lsame_(diag, "U")) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CTRTI2", &i__1);
-	return 0;
-    }
-
-    if (upper) {
-
-/*        Compute inverse of upper triangular matrix. */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (nounit) {
-		i__2 = j + j * a_dim1;
-		c_div(&q__1, &c_b56, &a[j + j * a_dim1]);
-		a[i__2].r = q__1.r, a[i__2].i = q__1.i;
-		i__2 = j + j * a_dim1;
-		q__1.r = -a[i__2].r, q__1.i = -a[i__2].i;
-		ajj.r = q__1.r, ajj.i = q__1.i;
-	    } else {
-		q__1.r = -1.f, q__1.i = -0.f;
-		ajj.r = q__1.r, ajj.i = q__1.i;
-	    }
-
-/*           Compute elements 1:j-1 of j-th column. */
-
-	    i__2 = j - 1;
-	    ctrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, &
-		    a[j * a_dim1 + 1], &c__1);
-	    i__2 = j - 1;
-	    cscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1);
-/* L10: */
-	}
-    } else {
-
-/*        Compute inverse of lower triangular matrix. */
-
-	for (j = *n; j >= 1; --j) {
-	    if (nounit) {
-		i__1 = j + j * a_dim1;
-		c_div(&q__1, &c_b56, &a[j + j * a_dim1]);
-		a[i__1].r = q__1.r, a[i__1].i = q__1.i;
-		i__1 = j + j * a_dim1;
-		q__1.r = -a[i__1].r, q__1.i = -a[i__1].i;
-		ajj.r = q__1.r, ajj.i = q__1.i;
-	    } else {
-		q__1.r = -1.f, q__1.i = -0.f;
-		ajj.r = q__1.r, ajj.i = q__1.i;
-	    }
-	    if (j < *n) {
-
-/*              Compute elements j+1:n of j-th column. */
-
-		i__1 = *n - j;
-		ctrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j +
-			1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1);
-		i__1 = *n - j;
-		cscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1);
-	    }
-/* L20: */
-	}
-    }
-
-    return 0;
-
-/*     End of CTRTI2 */
-
-} /* ctrti2_ */
-
-/* Subroutine */ int ctrtri_(char *uplo, char *diag, integer *n, complex *a,
-	integer *lda, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5;
-    complex q__1;
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer j, jb, nb, nn;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *,
-	    integer *, integer *, complex *, complex *, integer *, complex *,
-	    integer *), ctrsm_(char *, char *,
-	     char *, char *, integer *, integer *, complex *, complex *,
-	    integer *, complex *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int ctrti2_(char *, char *, integer *, complex *,
-	    integer *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static logical nounit;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CTRTRI computes the inverse of a complex upper or lower triangular
-    matrix A.
-
-    This is the Level 3 BLAS version of the algorithm.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  A is upper triangular;
-            = 'L':  A is lower triangular.
-
-    DIAG    (input) CHARACTER*1
-            = 'N':  A is non-unit triangular;
-            = 'U':  A is unit triangular.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the triangular matrix A.  If UPLO = 'U', the
-            leading N-by-N upper triangular part of the array A contains
-            the upper triangular matrix, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading N-by-N lower triangular part of the array A contains
-            the lower triangular matrix, and the strictly upper
-            triangular part of A is not referenced.  If DIAG = 'U', the
-            diagonal elements of A are also not referenced and are
-            assumed to be 1.
-            On exit, the (triangular) inverse of the original matrix, in
-            the same storage format.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
-                 matrix is singular and its inverse can not be computed.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    nounit = lsame_(diag, "N");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (! nounit && ! lsame_(diag, "U")) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CTRTRI", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Check for singularity if non-unit. */
-
-    if (nounit) {
-	i__1 = *n;
-	for (*info = 1; *info <= i__1; ++(*info)) {
-	    i__2 = *info + *info * a_dim1;
-	    if (a[i__2].r == 0.f && a[i__2].i == 0.f) {
-		return 0;
-	    }
-/* L10: */
-	}
-	*info = 0;
-    }
-
-/*
-       Determine the block size for this environment.
-
-   Writing concatenation
-*/
-    i__3[0] = 1, a__1[0] = uplo;
-    i__3[1] = 1, a__1[1] = diag;
-    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-    nb = ilaenv_(&c__1, "CTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-    if ((nb <= 1) || (nb >= *n)) {
-
-/*        Use unblocked code */
-
-	ctrti2_(uplo, diag, n, &a[a_offset], lda, info);
-    } else {
-
-/*        Use blocked code */
-
-	if (upper) {
-
-/*           Compute inverse of upper triangular matrix */
-
-	    i__1 = *n;
-	    i__2 = nb;
-	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
-/* Computing MIN */
-		i__4 = nb, i__5 = *n - j + 1;
-		jb = min(i__4,i__5);
-
-/*              Compute rows 1:j-1 of current block column */
-
-		i__4 = j - 1;
-		ctrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
-			c_b56, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
-		i__4 = j - 1;
-		q__1.r = -1.f, q__1.i = -0.f;
-		ctrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
-			q__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1],
-			lda);
-
-/*              Compute inverse of current diagonal block */
-
-		ctrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
-/* L20: */
-	    }
-	} else {
-
-/*           Compute inverse of lower triangular matrix */
-
-	    nn = (*n - 1) / nb * nb + 1;
-	    i__2 = -nb;
-	    for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) {
-/* Computing MIN */
-		i__1 = nb, i__4 = *n - j + 1;
-		jb = min(i__1,i__4);
-		if (j + jb <= *n) {
-
-/*                 Compute rows j+jb:n of current block column */
-
-		    i__1 = *n - j - jb + 1;
-		    ctrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
-			    &c_b56, &a[j + jb + (j + jb) * a_dim1], lda, &a[j
-			    + jb + j * a_dim1], lda);
-		    i__1 = *n - j - jb + 1;
-		    q__1.r = -1.f, q__1.i = -0.f;
-		    ctrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
-			     &q__1, &a[j + j * a_dim1], lda, &a[j + jb + j *
-			    a_dim1], lda);
-		}
-
-/*              Compute inverse of current diagonal block */
-
-		ctrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
-/* L30: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of CTRTRI */
-
-} /* ctrtri_ */
-
-/* Subroutine */ int cung2r_(integer *m, integer *n, integer *k, complex *a,
-	integer *lda, complex *tau, complex *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    complex q__1;
-
-    /* Local variables */
-    static integer i__, j, l;
-    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *), clarf_(char *, integer *, integer *, complex *,
-	    integer *, complex *, complex *, integer *, complex *),
-	    xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CUNG2R generates an m by n complex matrix Q with orthonormal columns,
-    which is defined as the first n columns of a product of k elementary
-    reflectors of order m
-
-          Q  =  H(1) H(2) . . . H(k)
-
-    as returned by CGEQRF.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix Q. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix Q. M >= N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines the
-            matrix Q. N >= K >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the i-th column must contain the vector which
-            defines the elementary reflector H(i), for i = 1,2,...,k, as
-            returned by CGEQRF in the first k columns of its array
-            argument A.
-            On exit, the m by n matrix Q.
-
-    LDA     (input) INTEGER
-            The first dimension of the array A. LDA >= max(1,M).
-
-    TAU     (input) COMPLEX array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by CGEQRF.
-
-    WORK    (workspace) COMPLEX array, dimension (N)
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument has an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if ((*n < 0) || (*n > *m)) {
-	*info = -2;
-    } else if ((*k < 0) || (*k > *n)) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNG2R", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n <= 0) {
-	return 0;
-    }
-
-/*     Initialise columns k+1:n to columns of the unit matrix */
-
-    i__1 = *n;
-    for (j = *k + 1; j <= i__1; ++j) {
-	i__2 = *m;
-	for (l = 1; l <= i__2; ++l) {
-	    i__3 = l + j * a_dim1;
-	    a[i__3].r = 0.f, a[i__3].i = 0.f;
-/* L10: */
-	}
-	i__2 = j + j * a_dim1;
-	a[i__2].r = 1.f, a[i__2].i = 0.f;
-/* L20: */
-    }
-
-    for (i__ = *k; i__ >= 1; --i__) {
-
-/*        Apply H(i) to A(i:m,i:n) from the left */
-
-	if (i__ < *n) {
-	    i__1 = i__ + i__ * a_dim1;
-	    a[i__1].r = 1.f, a[i__1].i = 0.f;
-	    i__1 = *m - i__ + 1;
-	    i__2 = *n - i__;
-	    clarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[
-		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
-	}
-	if (i__ < *m) {
-	    i__1 = *m - i__;
-	    i__2 = i__;
-	    q__1.r = -tau[i__2].r, q__1.i = -tau[i__2].i;
-	    cscal_(&i__1, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
-	}
-	i__1 = i__ + i__ * a_dim1;
-	i__2 = i__;
-	q__1.r = 1.f - tau[i__2].r, q__1.i = 0.f - tau[i__2].i;
-	a[i__1].r = q__1.r, a[i__1].i = q__1.i;
-
-/*        Set A(1:i-1,i) to zero */
-
-	i__1 = i__ - 1;
-	for (l = 1; l <= i__1; ++l) {
-	    i__2 = l + i__ * a_dim1;
-	    a[i__2].r = 0.f, a[i__2].i = 0.f;
-/* L30: */
-	}
-/* L40: */
-    }
-    return 0;
-
-/*     End of CUNG2R */
-
-} /* cung2r_ */
-
-/* Subroutine */ int cungbr_(char *vect, integer *m, integer *n, integer *k,
-	complex *a, integer *lda, complex *tau, complex *work, integer *lwork,
-	 integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, nb, mn;
-    extern logical lsame_(char *, char *);
-    static integer iinfo;
-    static logical wantq;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int cunglq_(integer *, integer *, integer *,
-	    complex *, integer *, complex *, complex *, integer *, integer *),
-	     cungqr_(integer *, integer *, integer *, complex *, integer *,
-	    complex *, complex *, integer *, integer *);
-    static integer lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CUNGBR generates one of the complex unitary matrices Q or P**H
-    determined by CGEBRD when reducing a complex matrix A to bidiagonal
-    form: A = Q * B * P**H.  Q and P**H are defined as products of
-    elementary reflectors H(i) or G(i) respectively.
-
-    If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
-    is of order M:
-    if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n
-    columns of Q, where m >= n >= k;
-    if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an
-    M-by-M matrix.
-
-    If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H
-    is of order N:
-    if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m
-    rows of P**H, where n >= m >= k;
-    if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as
-    an N-by-N matrix.
-
-    Arguments
-    =========
-
-    VECT    (input) CHARACTER*1
-            Specifies whether the matrix Q or the matrix P**H is
-            required, as defined in the transformation applied by CGEBRD:
-            = 'Q':  generate Q;
-            = 'P':  generate P**H.
-
-    M       (input) INTEGER
-            The number of rows of the matrix Q or P**H to be returned.
-            M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix Q or P**H to be returned.
-            N >= 0.
-            If VECT = 'Q', M >= N >= min(M,K);
-            if VECT = 'P', N >= M >= min(N,K).
-
-    K       (input) INTEGER
-            If VECT = 'Q', the number of columns in the original M-by-K
-            matrix reduced by CGEBRD.
-            If VECT = 'P', the number of rows in the original K-by-N
-            matrix reduced by CGEBRD.
-            K >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the vectors which define the elementary reflectors,
-            as returned by CGEBRD.
-            On exit, the M-by-N matrix Q or P**H.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A. LDA >= M.
-
-    TAU     (input) COMPLEX array, dimension
-                                  (min(M,K)) if VECT = 'Q'
-                                  (min(N,K)) if VECT = 'P'
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i) or G(i), which determines Q or P**H, as
-            returned by CGEBRD in its array argument TAUQ or TAUP.
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK >= max(1,min(M,N)).
-            For optimum performance LWORK >= min(M,N)*NB, where NB
-            is the optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    wantq = lsame_(vect, "Q");
-    mn = min(*m,*n);
-    lquery = *lwork == -1;
-    if (! wantq && ! lsame_(vect, "P")) {
-	*info = -1;
-    } else if (*m < 0) {
-	*info = -2;
-    } else if (((*n < 0) || (wantq && ((*n > *m) || (*n < min(*m,*k))))) || (!
-	     wantq && ((*m > *n) || (*m < min(*n,*k))))) {
-	*info = -3;
-    } else if (*k < 0) {
-	*info = -4;
-    } else if (*lda < max(1,*m)) {
-	*info = -6;
-    } else if (*lwork < max(1,mn) && ! lquery) {
-	*info = -9;
-    }
-
-    if (*info == 0) {
-	if (wantq) {
-	    nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
-		    ftnlen)1);
-	} else {
-	    nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
-		    ftnlen)1);
-	}
-	lwkopt = max(1,mn) * nb;
-	work[1].r = (real) lwkopt, work[1].i = 0.f;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNGBR", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*m == 0) || (*n == 0)) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
-
-    if (wantq) {
-
-/*
-          Form Q, determined by a call to CGEBRD to reduce an m-by-k
-          matrix
-*/
-
-	if (*m >= *k) {
-
-/*           If m >= k, assume m >= n >= k */
-
-	    cungqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
-		    iinfo);
-
-	} else {
-
-/*
-             If m < k, assume m = n
-
-             Shift the vectors which define the elementary reflectors one
-             column to the right, and set the first row and column of Q
-             to those of the unit matrix
-*/
-
-	    for (j = *m; j >= 2; --j) {
-		i__1 = j * a_dim1 + 1;
-		a[i__1].r = 0.f, a[i__1].i = 0.f;
-		i__1 = *m;
-		for (i__ = j + 1; i__ <= i__1; ++i__) {
-		    i__2 = i__ + j * a_dim1;
-		    i__3 = i__ + (j - 1) * a_dim1;
-		    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
-/* L10: */
-		}
-/* L20: */
-	    }
-	    i__1 = a_dim1 + 1;
-	    a[i__1].r = 1.f, a[i__1].i = 0.f;
-	    i__1 = *m;
-	    for (i__ = 2; i__ <= i__1; ++i__) {
-		i__2 = i__ + a_dim1;
-		a[i__2].r = 0.f, a[i__2].i = 0.f;
-/* L30: */
-	    }
-	    if (*m > 1) {
-
-/*              Form Q(2:m,2:m) */
-
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		i__3 = *m - 1;
-		cungqr_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
-			tau[1], &work[1], lwork, &iinfo);
-	    }
-	}
-    } else {
-
-/*
-          Form P', determined by a call to CGEBRD to reduce a k-by-n
-          matrix
-*/
-
-	if (*k < *n) {
-
-/*           If k < n, assume k <= m <= n */
-
-	    cunglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
-		    iinfo);
-
-	} else {
-
-/*
-             If k >= n, assume m = n
-
-             Shift the vectors which define the elementary reflectors one
-             row downward, and set the first row and column of P' to
-             those of the unit matrix
-*/
-
-	    i__1 = a_dim1 + 1;
-	    a[i__1].r = 1.f, a[i__1].i = 0.f;
-	    i__1 = *n;
-	    for (i__ = 2; i__ <= i__1; ++i__) {
-		i__2 = i__ + a_dim1;
-		a[i__2].r = 0.f, a[i__2].i = 0.f;
-/* L40: */
-	    }
-	    i__1 = *n;
-	    for (j = 2; j <= i__1; ++j) {
-		for (i__ = j - 1; i__ >= 2; --i__) {
-		    i__2 = i__ + j * a_dim1;
-		    i__3 = i__ - 1 + j * a_dim1;
-		    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
-/* L50: */
-		}
-		i__2 = j * a_dim1 + 1;
-		a[i__2].r = 0.f, a[i__2].i = 0.f;
-/* L60: */
-	    }
-	    if (*n > 1) {
-
-/*              Form P'(2:n,2:n) */
-
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		i__3 = *n - 1;
-		cunglq_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
-			tau[1], &work[1], lwork, &iinfo);
-	    }
-	}
-    }
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    return 0;
-
-/*     End of CUNGBR */
-
-} /* cungbr_ */
-
-/* Subroutine */ int cunghr_(integer *n, integer *ilo, integer *ihi, complex *
-	a, integer *lda, complex *tau, complex *work, integer *lwork, integer
-	*info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j, nb, nh, iinfo;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int cungqr_(integer *, integer *, integer *,
-	    complex *, integer *, complex *, complex *, integer *, integer *);
-    static integer lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CUNGHR generates a complex unitary matrix Q which is defined as the
-    product of IHI-ILO elementary reflectors of order N, as returned by
-    CGEHRD:
-
-    Q = H(ilo) H(ilo+1) . . . H(ihi-1).
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The order of the matrix Q. N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            ILO and IHI must have the same values as in the previous call
-            of CGEHRD. Q is equal to the unit matrix except in the
-            submatrix Q(ilo+1:ihi,ilo+1:ihi).
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the vectors which define the elementary reflectors,
-            as returned by CGEHRD.
-            On exit, the N-by-N unitary matrix Q.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A. LDA >= max(1,N).
-
-    TAU     (input) COMPLEX array, dimension (N-1)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by CGEHRD.
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK >= IHI-ILO.
-            For optimum performance LWORK >= (IHI-ILO)*NB, where NB is
-            the optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    nh = *ihi - *ilo;
-    lquery = *lwork == -1;
-    if (*n < 0) {
-	*info = -1;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
-	*info = -2;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*lwork < max(1,nh) && ! lquery) {
-	*info = -8;
-    }
-
-    if (*info == 0) {
-	nb = ilaenv_(&c__1, "CUNGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (
-		ftnlen)1);
-	lwkopt = max(1,nh) * nb;
-	work[1].r = (real) lwkopt, work[1].i = 0.f;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNGHR", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
-
-/*
-       Shift the vectors which define the elementary reflectors one
-       column to the right, and set the first ilo and the last n-ihi
-       rows and columns to those of the unit matrix
-*/
-
-    i__1 = *ilo + 1;
-    for (j = *ihi; j >= i__1; --j) {
-	i__2 = j - 1;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * a_dim1;
-	    a[i__3].r = 0.f, a[i__3].i = 0.f;
-/* L10: */
-	}
-	i__2 = *ihi;
-	for (i__ = j + 1; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * a_dim1;
-	    i__4 = i__ + (j - 1) * a_dim1;
-	    a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i;
-/* L20: */
-	}
-	i__2 = *n;
-	for (i__ = *ihi + 1; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * a_dim1;
-	    a[i__3].r = 0.f, a[i__3].i = 0.f;
-/* L30: */
-	}
-/* L40: */
-    }
-    i__1 = *ilo;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *n;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * a_dim1;
-	    a[i__3].r = 0.f, a[i__3].i = 0.f;
-/* L50: */
-	}
-	i__2 = j + j * a_dim1;
-	a[i__2].r = 1.f, a[i__2].i = 0.f;
-/* L60: */
-    }
-    i__1 = *n;
-    for (j = *ihi + 1; j <= i__1; ++j) {
-	i__2 = *n;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * a_dim1;
-	    a[i__3].r = 0.f, a[i__3].i = 0.f;
-/* L70: */
-	}
-	i__2 = j + j * a_dim1;
-	a[i__2].r = 1.f, a[i__2].i = 0.f;
-/* L80: */
-    }
-
-    if (nh > 0) {
-
-/*        Generate Q(ilo+1:ihi,ilo+1:ihi) */
-
-	cungqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*
-		ilo], &work[1], lwork, &iinfo);
-    }
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    return 0;
-
-/*     End of CUNGHR */
-
-} /* cunghr_ */
-
-/* Subroutine */ int cungl2_(integer *m, integer *n, integer *k, complex *a,
-	integer *lda, complex *tau, complex *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    complex q__1, q__2;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, l;
-    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *), clarf_(char *, integer *, integer *, complex *,
-	    integer *, complex *, complex *, integer *, complex *),
-	    clacgv_(integer *, complex *, integer *), xerbla_(char *, integer
-	    *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CUNGL2 generates an m-by-n complex matrix Q with orthonormal rows,
-    which is defined as the first m rows of a product of k elementary
-    reflectors of order n
-
-          Q  =  H(k)' . . . H(2)' H(1)'
-
-    as returned by CGELQF.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix Q. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix Q. N >= M.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines the
-            matrix Q. M >= K >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the i-th row must contain the vector which defines
-            the elementary reflector H(i), for i = 1,2,...,k, as returned
-            by CGELQF in the first k rows of its array argument A.
-            On exit, the m by n matrix Q.
-
-    LDA     (input) INTEGER
-            The first dimension of the array A. LDA >= max(1,M).
-
-    TAU     (input) COMPLEX array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by CGELQF.
-
-    WORK    (workspace) COMPLEX array, dimension (M)
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument has an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < *m) {
-	*info = -2;
-    } else if ((*k < 0) || (*k > *m)) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNGL2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*m <= 0) {
-	return 0;
-    }
-
-    if (*k < *m) {
-
-/*        Initialise rows k+1:m to rows of the unit matrix */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (l = *k + 1; l <= i__2; ++l) {
-		i__3 = l + j * a_dim1;
-		a[i__3].r = 0.f, a[i__3].i = 0.f;
-/* L10: */
-	    }
-	    if (j > *k && j <= *m) {
-		i__2 = j + j * a_dim1;
-		a[i__2].r = 1.f, a[i__2].i = 0.f;
-	    }
-/* L20: */
-	}
-    }
-
-    for (i__ = *k; i__ >= 1; --i__) {
-
-/*        Apply H(i)' to A(i:m,i:n) from the right */
-
-	if (i__ < *n) {
-	    i__1 = *n - i__;
-	    clacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda);
-	    if (i__ < *m) {
-		i__1 = i__ + i__ * a_dim1;
-		a[i__1].r = 1.f, a[i__1].i = 0.f;
-		i__1 = *m - i__;
-		i__2 = *n - i__ + 1;
-		r_cnjg(&q__1, &tau[i__]);
-		clarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &
-			q__1, &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
-	    }
-	    i__1 = *n - i__;
-	    i__2 = i__;
-	    q__1.r = -tau[i__2].r, q__1.i = -tau[i__2].i;
-	    cscal_(&i__1, &q__1, &a[i__ + (i__ + 1) * a_dim1], lda);
-	    i__1 = *n - i__;
-	    clacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda);
-	}
-	i__1 = i__ + i__ * a_dim1;
-	r_cnjg(&q__2, &tau[i__]);
-	q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i;
-	a[i__1].r = q__1.r, a[i__1].i = q__1.i;
-
-/*        Set A(i,1:i-1,i) to zero */
-
-	i__1 = i__ - 1;
-	for (l = 1; l <= i__1; ++l) {
-	    i__2 = i__ + l * a_dim1;
-	    a[i__2].r = 0.f, a[i__2].i = 0.f;
-/* L30: */
-	}
-/* L40: */
-    }
-    return 0;
-
-/*     End of CUNGL2 */
-
-} /* cungl2_ */
-
-/* Subroutine */ int cunglq_(integer *m, integer *n, integer *k, complex *a,
-	integer *lda, complex *tau, complex *work, integer *lwork, integer *
-	info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int cungl2_(integer *, integer *, integer *,
-	    complex *, integer *, complex *, complex *, integer *), clarfb_(
-	    char *, char *, char *, char *, integer *, integer *, integer *,
-	    complex *, integer *, complex *, integer *, complex *, integer *,
-	    complex *, integer *), clarft_(
-	    char *, char *, integer *, integer *, complex *, integer *,
-	    complex *, complex *, integer *), xerbla_(char *,
-	    integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CUNGLQ generates an M-by-N complex matrix Q with orthonormal rows,
-    which is defined as the first M rows of a product of K elementary
-    reflectors of order N
-
-          Q  =  H(k)' . . . H(2)' H(1)'
-
-    as returned by CGELQF.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix Q. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix Q. N >= M.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines the
-            matrix Q. M >= K >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the i-th row must contain the vector which defines
-            the elementary reflector H(i), for i = 1,2,...,k, as returned
-            by CGELQF in the first k rows of its array argument A.
-            On exit, the M-by-N matrix Q.
-
-    LDA     (input) INTEGER
-            The first dimension of the array A. LDA >= max(1,M).
-
-    TAU     (input) COMPLEX array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by CGELQF.
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK >= max(1,M).
-            For optimum performance LWORK >= M*NB, where NB is
-            the optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit;
-            < 0:  if INFO = -i, the i-th argument has an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
-    lwkopt = max(1,*m) * nb;
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < *m) {
-	*info = -2;
-    } else if ((*k < 0) || (*k > *m)) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    } else if (*lwork < max(1,*m) && ! lquery) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNGLQ", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*m <= 0) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
-
-    nbmin = 2;
-    nx = 0;
-    iws = *m;
-    if (nb > 1 && nb < *k) {
-
-/*
-          Determine when to cross over from blocked to unblocked code.
-
-   Computing MAX
-*/
-	i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGLQ", " ", m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < *k) {
-
-/*           Determine if workspace is large enough for blocked code. */
-
-	    ldwork = *m;
-	    iws = ldwork * nb;
-	    if (*lwork < iws) {
-
-/*
-                Not enough workspace to use optimal NB:  reduce NB and
-                determine the minimum value of NB.
-*/
-
-		nb = *lwork / ldwork;
-/* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGLQ", " ", m, n, k, &c_n1,
-			 (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-	    }
-	}
-    }
-
-    if (nb >= nbmin && nb < *k && nx < *k) {
-
-/*
-          Use blocked code after the last block.
-          The first kk rows are handled by the block method.
-*/
-
-	ki = (*k - nx - 1) / nb * nb;
-/* Computing MIN */
-	i__1 = *k, i__2 = ki + nb;
-	kk = min(i__1,i__2);
-
-/*        Set A(kk+1:m,1:kk) to zero. */
-
-	i__1 = kk;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = kk + 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * a_dim1;
-		a[i__3].r = 0.f, a[i__3].i = 0.f;
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else {
-	kk = 0;
-    }
-
-/*     Use unblocked code for the last or only block. */
-
-    if (kk < *m) {
-	i__1 = *m - kk;
-	i__2 = *n - kk;
-	i__3 = *k - kk;
-	cungl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
-		tau[kk + 1], &work[1], &iinfo);
-    }
-
-    if (kk > 0) {
-
-/*        Use blocked code */
-
-	i__1 = -nb;
-	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
-/* Computing MIN */
-	    i__2 = nb, i__3 = *k - i__ + 1;
-	    ib = min(i__2,i__3);
-	    if (i__ + ib <= *m) {
-
-/*
-                Form the triangular factor of the block reflector
-                H = H(i) H(i+1) . . . H(i+ib-1)
-*/
-
-		i__2 = *n - i__ + 1;
-		clarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ *
-			a_dim1], lda, &tau[i__], &work[1], &ldwork);
-
-/*              Apply H' to A(i+ib:m,i:n) from the right */
-
-		i__2 = *m - i__ - ib + 1;
-		i__3 = *n - i__ + 1;
-		clarfb_("Right", "Conjugate transpose", "Forward", "Rowwise",
-			&i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
-			1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[
-			ib + 1], &ldwork);
-	    }
-
-/*           Apply H' to columns i:n of current block */
-
-	    i__2 = *n - i__ + 1;
-	    cungl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
-		    work[1], &iinfo);
-
-/*           Set columns 1:i-1 of current block to zero */
-
-	    i__2 = i__ - 1;
-	    for (j = 1; j <= i__2; ++j) {
-		i__3 = i__ + ib - 1;
-		for (l = i__; l <= i__3; ++l) {
-		    i__4 = l + j * a_dim1;
-		    a[i__4].r = 0.f, a[i__4].i = 0.f;
-/* L30: */
-		}
-/* L40: */
-	    }
-/* L50: */
-	}
-    }
-
-    work[1].r = (real) iws, work[1].i = 0.f;
-    return 0;
-
-/*     End of CUNGLQ */
-
-} /* cunglq_ */
-
-/* Subroutine */ int cungqr_(integer *m, integer *n, integer *k, complex *a,
-	integer *lda, complex *tau, complex *work, integer *lwork, integer *
-	info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int cung2r_(integer *, integer *, integer *,
-	    complex *, integer *, complex *, complex *, integer *), clarfb_(
-	    char *, char *, char *, char *, integer *, integer *, integer *,
-	    complex *, integer *, complex *, integer *, complex *, integer *,
-	    complex *, integer *), clarft_(
-	    char *, char *, integer *, integer *, complex *, integer *,
-	    complex *, complex *, integer *), xerbla_(char *,
-	    integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CUNGQR generates an M-by-N complex matrix Q with orthonormal columns,
-    which is defined as the first N columns of a product of K elementary
-    reflectors of order M
-
-          Q  =  H(1) H(2) . . . H(k)
-
-    as returned by CGEQRF.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix Q. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix Q. M >= N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines the
-            matrix Q. N >= K >= 0.
-
-    A       (input/output) COMPLEX array, dimension (LDA,N)
-            On entry, the i-th column must contain the vector which
-            defines the elementary reflector H(i), for i = 1,2,...,k, as
-            returned by CGEQRF in the first k columns of its array
-            argument A.
-            On exit, the M-by-N matrix Q.
-
-    LDA     (input) INTEGER
-            The first dimension of the array A. LDA >= max(1,M).
-
-    TAU     (input) COMPLEX array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by CGEQRF.
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK >= max(1,N).
-            For optimum performance LWORK >= N*NB, where NB is the
-            optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument has an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
-    lwkopt = max(1,*n) * nb;
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if ((*n < 0) || (*n > *m)) {
-	*info = -2;
-    } else if ((*k < 0) || (*k > *n)) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    } else if (*lwork < max(1,*n) && ! lquery) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNGQR", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n <= 0) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
-
-    nbmin = 2;
-    nx = 0;
-    iws = *n;
-    if (nb > 1 && nb < *k) {
-
-/*
-          Determine when to cross over from blocked to unblocked code.
-
-   Computing MAX
-*/
-	i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGQR", " ", m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < *k) {
-
-/*           Determine if workspace is large enough for blocked code. */
-
-	    ldwork = *n;
-	    iws = ldwork * nb;
-	    if (*lwork < iws) {
-
-/*
-                Not enough workspace to use optimal NB:  reduce NB and
-                determine the minimum value of NB.
-*/
-
-		nb = *lwork / ldwork;
-/* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGQR", " ", m, n, k, &c_n1,
-			 (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-	    }
-	}
-    }
-
-    if (nb >= nbmin && nb < *k && nx < *k) {
-
-/*
-          Use blocked code after the last block.
-          The first kk columns are handled by the block method.
-*/
-
-	ki = (*k - nx - 1) / nb * nb;
-/* Computing MIN */
-	i__1 = *k, i__2 = ki + nb;
-	kk = min(i__1,i__2);
-
-/*        Set A(1:kk,kk+1:n) to zero. */
-
-	i__1 = *n;
-	for (j = kk + 1; j <= i__1; ++j) {
-	    i__2 = kk;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * a_dim1;
-		a[i__3].r = 0.f, a[i__3].i = 0.f;
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else {
-	kk = 0;
-    }
-
-/*     Use unblocked code for the last or only block. */
-
-    if (kk < *n) {
-	i__1 = *m - kk;
-	i__2 = *n - kk;
-	i__3 = *k - kk;
-	cung2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
-		tau[kk + 1], &work[1], &iinfo);
-    }
-
-    if (kk > 0) {
-
-/*        Use blocked code */
-
-	i__1 = -nb;
-	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
-/* Computing MIN */
-	    i__2 = nb, i__3 = *k - i__ + 1;
-	    ib = min(i__2,i__3);
-	    if (i__ + ib <= *n) {
-
-/*
-                Form the triangular factor of the block reflector
-                H = H(i) H(i+1) . . . H(i+ib-1)
-*/
-
-		i__2 = *m - i__ + 1;
-		clarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ *
-			a_dim1], lda, &tau[i__], &work[1], &ldwork);
-
-/*              Apply H to A(i:m,i+ib:n) from the left */
-
-		i__2 = *m - i__ + 1;
-		i__3 = *n - i__ - ib + 1;
-		clarfb_("Left", "No transpose", "Forward", "Columnwise", &
-			i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
-			1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &
-			work[ib + 1], &ldwork);
-	    }
-
-/*           Apply H to rows i:m of current block */
-
-	    i__2 = *m - i__ + 1;
-	    cung2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
-		    work[1], &iinfo);
-
-/*           Set rows 1:i-1 of current block to zero */
-
-	    i__2 = i__ + ib - 1;
-	    for (j = i__; j <= i__2; ++j) {
-		i__3 = i__ - 1;
-		for (l = 1; l <= i__3; ++l) {
-		    i__4 = l + j * a_dim1;
-		    a[i__4].r = 0.f, a[i__4].i = 0.f;
-/* L30: */
-		}
-/* L40: */
-	    }
-/* L50: */
-	}
-    }
-
-    work[1].r = (real) iws, work[1].i = 0.f;
-    return 0;
-
-/*     End of CUNGQR */
-
-} /* cungqr_ */
-
-/* Subroutine */ int cunm2l_(char *side, char *trans, integer *m, integer *n,
-	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
-	integer *ldc, complex *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
-    complex q__1;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, i1, i2, i3, mi, ni, nq;
-    static complex aii;
-    static logical left;
-    static complex taui;
-    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
-	    , integer *, complex *, complex *, integer *, complex *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical notran;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CUNM2L overwrites the general complex m-by-n matrix C with
-
-          Q * C  if SIDE = 'L' and TRANS = 'N', or
-
-          Q'* C  if SIDE = 'L' and TRANS = 'C', or
-
-          C * Q  if SIDE = 'R' and TRANS = 'N', or
-
-          C * Q' if SIDE = 'R' and TRANS = 'C',
-
-    where Q is a complex unitary matrix defined as the product of k
-    elementary reflectors
-
-          Q = H(k) . . . H(2) H(1)
-
-    as returned by CGEQLF. Q is of order m if SIDE = 'L' and of order n
-    if SIDE = 'R'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q' from the Left
-            = 'R': apply Q or Q' from the Right
-
-    TRANS   (input) CHARACTER*1
-            = 'N': apply Q  (No transpose)
-            = 'C': apply Q' (Conjugate transpose)
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines
-            the matrix Q.
-            If SIDE = 'L', M >= K >= 0;
-            if SIDE = 'R', N >= K >= 0.
-
-    A       (input) COMPLEX array, dimension (LDA,K)
-            The i-th column must contain the vector which defines the
-            elementary reflector H(i), for i = 1,2,...,k, as returned by
-            CGEQLF in the last k columns of its array argument A.
-            A is modified by the routine but restored on exit.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-            If SIDE = 'L', LDA >= max(1,M);
-            if SIDE = 'R', LDA >= max(1,N).
-
-    TAU     (input) COMPLEX array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by CGEQLF.
-
-    C       (input/output) COMPLEX array, dimension (LDC,N)
-            On entry, the m-by-n matrix C.
-            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace) COMPLEX array, dimension
-                                     (N) if SIDE = 'L',
-                                     (M) if SIDE = 'R'
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-
-/*     NQ is the order of Q */
-
-    if (left) {
-	nq = *m;
-    } else {
-	nq = *n;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "C")) {
-	*info = -2;
-    } else if (*m < 0) {
-	*info = -3;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
-	*info = -5;
-    } else if (*lda < max(1,nq)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNM2L", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	return 0;
-    }
-
-    if ((left && notran) || (! left && ! notran)) {
-	i1 = 1;
-	i2 = *k;
-	i3 = 1;
-    } else {
-	i1 = *k;
-	i2 = 1;
-	i3 = -1;
-    }
-
-    if (left) {
-	ni = *n;
-    } else {
-	mi = *m;
-    }
-
-    i__1 = i2;
-    i__2 = i3;
-    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	if (left) {
-
-/*           H(i) or H(i)' is applied to C(1:m-k+i,1:n) */
-
-	    mi = *m - *k + i__;
-	} else {
-
-/*           H(i) or H(i)' is applied to C(1:m,1:n-k+i) */
-
-	    ni = *n - *k + i__;
-	}
-
-/*        Apply H(i) or H(i)' */
-
-	if (notran) {
-	    i__3 = i__;
-	    taui.r = tau[i__3].r, taui.i = tau[i__3].i;
-	} else {
-	    r_cnjg(&q__1, &tau[i__]);
-	    taui.r = q__1.r, taui.i = q__1.i;
-	}
-	i__3 = nq - *k + i__ + i__ * a_dim1;
-	aii.r = a[i__3].r, aii.i = a[i__3].i;
-	i__3 = nq - *k + i__ + i__ * a_dim1;
-	a[i__3].r = 1.f, a[i__3].i = 0.f;
-	clarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &taui, &c__[
-		c_offset], ldc, &work[1]);
-	i__3 = nq - *k + i__ + i__ * a_dim1;
-	a[i__3].r = aii.r, a[i__3].i = aii.i;
-/* L10: */
-    }
-    return 0;
-
-/*     End of CUNM2L */
-
-} /* cunm2l_ */
-
-/* Subroutine */ int cunm2r_(char *side, char *trans, integer *m, integer *n,
-	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
-	integer *ldc, complex *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
-    complex q__1;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
-    static complex aii;
-    static logical left;
-    static complex taui;
-    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
-	    , integer *, complex *, complex *, integer *, complex *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical notran;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CUNM2R overwrites the general complex m-by-n matrix C with
-
-          Q * C  if SIDE = 'L' and TRANS = 'N', or
-
-          Q'* C  if SIDE = 'L' and TRANS = 'C', or
-
-          C * Q  if SIDE = 'R' and TRANS = 'N', or
-
-          C * Q' if SIDE = 'R' and TRANS = 'C',
-
-    where Q is a complex unitary matrix defined as the product of k
-    elementary reflectors
-
-          Q = H(1) H(2) . . . H(k)
-
-    as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n
-    if SIDE = 'R'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q' from the Left
-            = 'R': apply Q or Q' from the Right
-
-    TRANS   (input) CHARACTER*1
-            = 'N': apply Q  (No transpose)
-            = 'C': apply Q' (Conjugate transpose)
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines
-            the matrix Q.
-            If SIDE = 'L', M >= K >= 0;
-            if SIDE = 'R', N >= K >= 0.
-
-    A       (input) COMPLEX array, dimension (LDA,K)
-            The i-th column must contain the vector which defines the
-            elementary reflector H(i), for i = 1,2,...,k, as returned by
-            CGEQRF in the first k columns of its array argument A.
-            A is modified by the routine but restored on exit.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-            If SIDE = 'L', LDA >= max(1,M);
-            if SIDE = 'R', LDA >= max(1,N).
-
-    TAU     (input) COMPLEX array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by CGEQRF.
-
-    C       (input/output) COMPLEX array, dimension (LDC,N)
-            On entry, the m-by-n matrix C.
-            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace) COMPLEX array, dimension
-                                     (N) if SIDE = 'L',
-                                     (M) if SIDE = 'R'
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-
-/*     NQ is the order of Q */
-
-    if (left) {
-	nq = *m;
-    } else {
-	nq = *n;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "C")) {
-	*info = -2;
-    } else if (*m < 0) {
-	*info = -3;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
-	*info = -5;
-    } else if (*lda < max(1,nq)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNM2R", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	return 0;
-    }
-
-    if ((left && ! notran) || (! left && notran)) {
-	i1 = 1;
-	i2 = *k;
-	i3 = 1;
-    } else {
-	i1 = *k;
-	i2 = 1;
-	i3 = -1;
-    }
-
-    if (left) {
-	ni = *n;
-	jc = 1;
-    } else {
-	mi = *m;
-	ic = 1;
-    }
-
-    i__1 = i2;
-    i__2 = i3;
-    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	if (left) {
-
-/*           H(i) or H(i)' is applied to C(i:m,1:n) */
-
-	    mi = *m - i__ + 1;
-	    ic = i__;
-	} else {
-
-/*           H(i) or H(i)' is applied to C(1:m,i:n) */
-
-	    ni = *n - i__ + 1;
-	    jc = i__;
-	}
-
-/*        Apply H(i) or H(i)' */
-
-	if (notran) {
-	    i__3 = i__;
-	    taui.r = tau[i__3].r, taui.i = tau[i__3].i;
-	} else {
-	    r_cnjg(&q__1, &tau[i__]);
-	    taui.r = q__1.r, taui.i = q__1.i;
-	}
-	i__3 = i__ + i__ * a_dim1;
-	aii.r = a[i__3].r, aii.i = a[i__3].i;
-	i__3 = i__ + i__ * a_dim1;
-	a[i__3].r = 1.f, a[i__3].i = 0.f;
-	clarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &taui, &c__[ic
-		+ jc * c_dim1], ldc, &work[1]);
-	i__3 = i__ + i__ * a_dim1;
-	a[i__3].r = aii.r, a[i__3].i = aii.i;
-/* L10: */
-    }
-    return 0;
-
-/*     End of CUNM2R */
-
-} /* cunm2r_ */
-
-/* Subroutine */ int cunmbr_(char *vect, char *side, char *trans, integer *m,
-	integer *n, integer *k, complex *a, integer *lda, complex *tau,
-	complex *c__, integer *ldc, complex *work, integer *lwork, integer *
-	info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2];
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer i1, i2, nb, mi, ni, nq, nw;
-    static logical left;
-    extern logical lsame_(char *, char *);
-    static integer iinfo;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int cunmlq_(char *, char *, integer *, integer *,
-	    integer *, complex *, integer *, complex *, complex *, integer *,
-	    complex *, integer *, integer *);
-    static logical notran;
-    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *,
-	    integer *, complex *, integer *, complex *, complex *, integer *,
-	    complex *, integer *, integer *);
-    static logical applyq;
-    static char transt[1];
-    static integer lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    If VECT = 'Q', CUNMBR overwrites the general complex M-by-N matrix C
-    with
-                    SIDE = 'L'     SIDE = 'R'
-    TRANS = 'N':      Q * C          C * Q
-    TRANS = 'C':      Q**H * C       C * Q**H
-
-    If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C
-    with
-                    SIDE = 'L'     SIDE = 'R'
-    TRANS = 'N':      P * C          C * P
-    TRANS = 'C':      P**H * C       C * P**H
-
-    Here Q and P**H are the unitary matrices determined by CGEBRD when
-    reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q
-    and P**H are defined as products of elementary reflectors H(i) and
-    G(i) respectively.
-
-    Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
-    order of the unitary matrix Q or P**H that is applied.
-
-    If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
-    if nq >= k, Q = H(1) H(2) . . . H(k);
-    if nq < k, Q = H(1) H(2) . . . H(nq-1).
-
-    If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
-    if k < nq, P = G(1) G(2) . . . G(k);
-    if k >= nq, P = G(1) G(2) . . . G(nq-1).
-
-    Arguments
-    =========
-
-    VECT    (input) CHARACTER*1
-            = 'Q': apply Q or Q**H;
-            = 'P': apply P or P**H.
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q, Q**H, P or P**H from the Left;
-            = 'R': apply Q, Q**H, P or P**H from the Right.
-
-    TRANS   (input) CHARACTER*1
-            = 'N':  No transpose, apply Q or P;
-            = 'C':  Conjugate transpose, apply Q**H or P**H.
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            If VECT = 'Q', the number of columns in the original
-            matrix reduced by CGEBRD.
-            If VECT = 'P', the number of rows in the original
-            matrix reduced by CGEBRD.
-            K >= 0.
-
-    A       (input) COMPLEX array, dimension
-                                  (LDA,min(nq,K)) if VECT = 'Q'
-                                  (LDA,nq)        if VECT = 'P'
-            The vectors which define the elementary reflectors H(i) and
-            G(i), whose products determine the matrices Q and P, as
-            returned by CGEBRD.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-            If VECT = 'Q', LDA >= max(1,nq);
-            if VECT = 'P', LDA >= max(1,min(nq,K)).
-
-    TAU     (input) COMPLEX array, dimension (min(nq,K))
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i) or G(i) which determines Q or P, as returned
-            by CGEBRD in the array argument TAUQ or TAUP.
-
-    C       (input/output) COMPLEX array, dimension (LDC,N)
-            On entry, the M-by-N matrix C.
-            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q
-            or P*C or P**H*C or C*P or C*P**H.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If SIDE = 'L', LWORK >= max(1,N);
-            if SIDE = 'R', LWORK >= max(1,M).
-            For optimum performance LWORK >= N*NB if SIDE = 'L', and
-            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-            blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    applyq = lsame_(vect, "Q");
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-    lquery = *lwork == -1;
-
-/*     NQ is the order of Q or P and NW is the minimum dimension of WORK */
-
-    if (left) {
-	nq = *m;
-	nw = *n;
-    } else {
-	nq = *n;
-	nw = *m;
-    }
-    if (! applyq && ! lsame_(vect, "P")) {
-	*info = -1;
-    } else if (! left && ! lsame_(side, "R")) {
-	*info = -2;
-    } else if (! notran && ! lsame_(trans, "C")) {
-	*info = -3;
-    } else if (*m < 0) {
-	*info = -4;
-    } else if (*n < 0) {
-	*info = -5;
-    } else if (*k < 0) {
-	*info = -6;
-    } else /* if(complicated condition) */ {
-/* Computing MAX */
-	i__1 = 1, i__2 = min(nq,*k);
-	if ((applyq && *lda < max(1,nq)) || (! applyq && *lda < max(i__1,i__2)
-		)) {
-	    *info = -8;
-	} else if (*ldc < max(1,*m)) {
-	    *info = -11;
-	} else if (*lwork < max(1,nw) && ! lquery) {
-	    *info = -13;
-	}
-    }
-
-    if (*info == 0) {
-	if (applyq) {
-	    if (left) {
-/* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__1, n, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    } else {
-/* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__1, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    }
-	} else {
-	    if (left) {
-/* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		nb = ilaenv_(&c__1, "CUNMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    } else {
-/* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		nb = ilaenv_(&c__1, "CUNMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    }
-	}
-	lwkopt = max(1,nw) * nb;
-	work[1].r = (real) lwkopt, work[1].i = 0.f;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNMBR", &i__1);
-	return 0;
-    } else if (lquery) {
-    }
-
-/*     Quick return if possible */
-
-    work[1].r = 1.f, work[1].i = 0.f;
-    if ((*m == 0) || (*n == 0)) {
-	return 0;
-    }
-
-    if (applyq) {
-
-/*        Apply Q */
-
-	if (nq >= *k) {
-
-/*           Q was determined by a call to CGEBRD with nq >= k */
-
-	    cunmqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
-		    c_offset], ldc, &work[1], lwork, &iinfo);
-	} else if (nq > 1) {
-
-/*           Q was determined by a call to CGEBRD with nq < k */
-
-	    if (left) {
-		mi = *m - 1;
-		ni = *n;
-		i1 = 2;
-		i2 = 1;
-	    } else {
-		mi = *m;
-		ni = *n - 1;
-		i1 = 1;
-		i2 = 2;
-	    }
-	    i__1 = nq - 1;
-	    cunmqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1]
-		    , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
-	}
-    } else {
-
-/*        Apply P */
-
-	if (notran) {
-	    *(unsigned char *)transt = 'C';
-	} else {
-	    *(unsigned char *)transt = 'N';
-	}
-	if (nq > *k) {
-
-/*           P was determined by a call to CGEBRD with nq > k */
-
-	    cunmlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
-		    c_offset], ldc, &work[1], lwork, &iinfo);
-	} else if (nq > 1) {
-
-/*           P was determined by a call to CGEBRD with nq <= k */
-
-	    if (left) {
-		mi = *m - 1;
-		ni = *n;
-		i1 = 2;
-		i2 = 1;
-	    } else {
-		mi = *m;
-		ni = *n - 1;
-		i1 = 1;
-		i2 = 2;
-	    }
-	    i__1 = nq - 1;
-	    cunmlq_(side, transt, &mi, &ni, &i__1, &a[((a_dim1) << (1)) + 1],
-		    lda, &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1],
-		    lwork, &iinfo);
-	}
-    }
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    return 0;
-
-/*     End of CUNMBR */
-
-} /* cunmbr_ */
-
-/* Subroutine */ int cunml2_(char *side, char *trans, integer *m, integer *n,
-	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
-	integer *ldc, complex *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
-    complex q__1;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
-    static complex aii;
-    static logical left;
-    static complex taui;
-    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
-	    , integer *, complex *, complex *, integer *, complex *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *),
-	    xerbla_(char *, integer *);
-    static logical notran;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    CUNML2 overwrites the general complex m-by-n matrix C with
-
-          Q * C  if SIDE = 'L' and TRANS = 'N', or
-
-          Q'* C  if SIDE = 'L' and TRANS = 'C', or
-
-          C * Q  if SIDE = 'R' and TRANS = 'N', or
-
-          C * Q' if SIDE = 'R' and TRANS = 'C',
-
-    where Q is a complex unitary matrix defined as the product of k
-    elementary reflectors
-
-          Q = H(k)' . . . H(2)' H(1)'
-
-    as returned by CGELQF. Q is of order m if SIDE = 'L' and of order n
-    if SIDE = 'R'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q' from the Left
-            = 'R': apply Q or Q' from the Right
-
-    TRANS   (input) CHARACTER*1
-            = 'N': apply Q  (No transpose)
-            = 'C': apply Q' (Conjugate transpose)
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines
-            the matrix Q.
-            If SIDE = 'L', M >= K >= 0;
-            if SIDE = 'R', N >= K >= 0.
-
-    A       (input) COMPLEX array, dimension
-                                 (LDA,M) if SIDE = 'L',
-                                 (LDA,N) if SIDE = 'R'
-            The i-th row must contain the vector which defines the
-            elementary reflector H(i), for i = 1,2,...,k, as returned by
-            CGELQF in the first k rows of its array argument A.
-            A is modified by the routine but restored on exit.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A. LDA >= max(1,K).
-
-    TAU     (input) COMPLEX array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by CGELQF.
-
-    C       (input/output) COMPLEX array, dimension (LDC,N)
-            On entry, the m-by-n matrix C.
-            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace) COMPLEX array, dimension
-                                     (N) if SIDE = 'L',
-                                     (M) if SIDE = 'R'
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-
-/*     NQ is the order of Q */
-
-    if (left) {
-	nq = *m;
-    } else {
-	nq = *n;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "C")) {
-	*info = -2;
-    } else if (*m < 0) {
-	*info = -3;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
-	*info = -5;
-    } else if (*lda < max(1,*k)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNML2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	return 0;
-    }
-
-    if ((left && notran) || (! left && ! notran)) {
-	i1 = 1;
-	i2 = *k;
-	i3 = 1;
-    } else {
-	i1 = *k;
-	i2 = 1;
-	i3 = -1;
-    }
-
-    if (left) {
-	ni = *n;
-	jc = 1;
-    } else {
-	mi = *m;
-	ic = 1;
-    }
-
-    i__1 = i2;
-    i__2 = i3;
-    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	if (left) {
-
-/*           H(i) or H(i)' is applied to C(i:m,1:n) */
-
-	    mi = *m - i__ + 1;
-	    ic = i__;
-	} else {
-
-/*           H(i) or H(i)' is applied to C(1:m,i:n) */
-
-	    ni = *n - i__ + 1;
-	    jc = i__;
-	}
-
-/*        Apply H(i) or H(i)' */
-
-	if (notran) {
-	    r_cnjg(&q__1, &tau[i__]);
-	    taui.r = q__1.r, taui.i = q__1.i;
-	} else {
-	    i__3 = i__;
-	    taui.r = tau[i__3].r, taui.i = tau[i__3].i;
-	}
-	if (i__ < nq) {
-	    i__3 = nq - i__;
-	    clacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda);
-	}
-	i__3 = i__ + i__ * a_dim1;
-	aii.r = a[i__3].r, aii.i = a[i__3].i;
-	i__3 = i__ + i__ * a_dim1;
-	a[i__3].r = 1.f, a[i__3].i = 0.f;
-	clarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &taui, &c__[ic +
-		jc * c_dim1], ldc, &work[1]);
-	i__3 = i__ + i__ * a_dim1;
-	a[i__3].r = aii.r, a[i__3].i = aii.i;
-	if (i__ < nq) {
-	    i__3 = nq - i__;
-	    clacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda);
-	}
-/* L10: */
-    }
-    return 0;
-
-/*     End of CUNML2 */
-
-} /* cunml2_ */
-
-/* Subroutine */ int cunmlq_(char *side, char *trans, integer *m, integer *n,
-	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
-	integer *ldc, complex *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
-	    i__5;
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer i__;
-    static complex t[4160]	/* was [65][64] */;
-    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
-    static logical left;
-    extern logical lsame_(char *, char *);
-    static integer nbmin, iinfo;
-    extern /* Subroutine */ int cunml2_(char *, char *, integer *, integer *,
-	    integer *, complex *, integer *, complex *, complex *, integer *,
-	    complex *, integer *), clarfb_(char *, char *,
-	    char *, char *, integer *, integer *, integer *, complex *,
-	    integer *, complex *, integer *, complex *, integer *, complex *,
-	    integer *), clarft_(char *, char *
-	    , integer *, integer *, complex *, integer *, complex *, complex *
-	    , integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static logical notran;
-    static integer ldwork;
-    static char transt[1];
-    static integer lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CUNMLQ overwrites the general complex M-by-N matrix C with
-
-                    SIDE = 'L'     SIDE = 'R'
-    TRANS = 'N':      Q * C          C * Q
-    TRANS = 'C':      Q**H * C       C * Q**H
-
-    where Q is a complex unitary matrix defined as the product of k
-    elementary reflectors
-
-          Q = H(k)' . . . H(2)' H(1)'
-
-    as returned by CGELQF. Q is of order M if SIDE = 'L' and of order N
-    if SIDE = 'R'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q**H from the Left;
-            = 'R': apply Q or Q**H from the Right.
-
-    TRANS   (input) CHARACTER*1
-            = 'N':  No transpose, apply Q;
-            = 'C':  Conjugate transpose, apply Q**H.
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines
-            the matrix Q.
-            If SIDE = 'L', M >= K >= 0;
-            if SIDE = 'R', N >= K >= 0.
-
-    A       (input) COMPLEX array, dimension
-                                 (LDA,M) if SIDE = 'L',
-                                 (LDA,N) if SIDE = 'R'
-            The i-th row must contain the vector which defines the
-            elementary reflector H(i), for i = 1,2,...,k, as returned by
-            CGELQF in the first k rows of its array argument A.
-            A is modified by the routine but restored on exit.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A. LDA >= max(1,K).
-
-    TAU     (input) COMPLEX array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by CGELQF.
-
-    C       (input/output) COMPLEX array, dimension (LDC,N)
-            On entry, the M-by-N matrix C.
-            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If SIDE = 'L', LWORK >= max(1,N);
-            if SIDE = 'R', LWORK >= max(1,M).
-            For optimum performance LWORK >= N*NB if SIDE 'L', and
-            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-            blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-    lquery = *lwork == -1;
-
-/*     NQ is the order of Q and NW is the minimum dimension of WORK */
-
-    if (left) {
-	nq = *m;
-	nw = *n;
-    } else {
-	nq = *n;
-	nw = *m;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "C")) {
-	*info = -2;
-    } else if (*m < 0) {
-	*info = -3;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
-	*info = -5;
-    } else if (*lda < max(1,*k)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    } else if (*lwork < max(1,nw) && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
-
-/*
-          Determine the block size.  NB may be at most NBMAX, where NBMAX
-          is used to define the local array T.
-
-   Computing MIN
-   Writing concatenation
-*/
-	i__3[0] = 1, a__1[0] = side;
-	i__3[1] = 1, a__1[1] = trans;
-	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMLQ", ch__1, m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)2);
-	nb = min(i__1,i__2);
-	lwkopt = max(1,nw) * nb;
-	work[1].r = (real) lwkopt, work[1].i = 0.f;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNMLQ", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
-
-    nbmin = 2;
-    ldwork = nw;
-    if (nb > 1 && nb < *k) {
-	iws = nw * nb;
-	if (*lwork < iws) {
-	    nb = *lwork / ldwork;
-/*
-   Computing MAX
-   Writing concatenation
-*/
-	    i__3[0] = 1, a__1[0] = side;
-	    i__3[1] = 1, a__1[1] = trans;
-	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	    i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMLQ", ch__1, m, n, k, &c_n1, (
-		    ftnlen)6, (ftnlen)2);
-	    nbmin = max(i__1,i__2);
-	}
-    } else {
-	iws = nw;
-    }
-
-    if ((nb < nbmin) || (nb >= *k)) {
-
-/*        Use unblocked code */
-
-	cunml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
-		c_offset], ldc, &work[1], &iinfo);
-    } else {
-
-/*        Use blocked code */
-
-	if ((left && notran) || (! left && ! notran)) {
-	    i1 = 1;
-	    i2 = *k;
-	    i3 = nb;
-	} else {
-	    i1 = (*k - 1) / nb * nb + 1;
-	    i2 = 1;
-	    i3 = -nb;
-	}
-
-	if (left) {
-	    ni = *n;
-	    jc = 1;
-	} else {
-	    mi = *m;
-	    ic = 1;
-	}
-
-	if (notran) {
-	    *(unsigned char *)transt = 'C';
-	} else {
-	    *(unsigned char *)transt = 'N';
-	}
-
-	i__1 = i2;
-	i__2 = i3;
-	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__4 = nb, i__5 = *k - i__ + 1;
-	    ib = min(i__4,i__5);
-
-/*
-             Form the triangular factor of the block reflector
-             H = H(i) H(i+1) . . . H(i+ib-1)
-*/
-
-	    i__4 = nq - i__ + 1;
-	    clarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1],
-		    lda, &tau[i__], t, &c__65);
-	    if (left) {
-
-/*              H or H' is applied to C(i:m,1:n) */
-
-		mi = *m - i__ + 1;
-		ic = i__;
-	    } else {
-
-/*              H or H' is applied to C(1:m,i:n) */
-
-		ni = *n - i__ + 1;
-		jc = i__;
-	    }
-
-/*           Apply H or H' */
-
-	    clarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__
-		    + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1],
-		    ldc, &work[1], &ldwork);
-/* L10: */
-	}
-    }
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    return 0;
-
-/*     End of CUNMLQ */
-
-} /* cunmlq_ */
-
-/* Subroutine */ int cunmql_(char *side, char *trans, integer *m, integer *n,
-	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
-	integer *ldc, complex *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
-	    i__5;
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer i__;
-    static complex t[4160]	/* was [65][64] */;
-    static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws;
-    static logical left;
-    extern logical lsame_(char *, char *);
-    static integer nbmin, iinfo;
-    extern /* Subroutine */ int cunm2l_(char *, char *, integer *, integer *,
-	    integer *, complex *, integer *, complex *, complex *, integer *,
-	    complex *, integer *), clarfb_(char *, char *,
-	    char *, char *, integer *, integer *, integer *, complex *,
-	    integer *, complex *, integer *, complex *, integer *, complex *,
-	    integer *), clarft_(char *, char *
-	    , integer *, integer *, complex *, integer *, complex *, complex *
-	    , integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static logical notran;
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CUNMQL overwrites the general complex M-by-N matrix C with
-
-                    SIDE = 'L'     SIDE = 'R'
-    TRANS = 'N':      Q * C          C * Q
-    TRANS = 'C':      Q**H * C       C * Q**H
-
-    where Q is a complex unitary matrix defined as the product of k
-    elementary reflectors
-
-          Q = H(k) . . . H(2) H(1)
-
-    as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N
-    if SIDE = 'R'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q**H from the Left;
-            = 'R': apply Q or Q**H from the Right.
-
-    TRANS   (input) CHARACTER*1
-            = 'N':  No transpose, apply Q;
-            = 'C':  Transpose, apply Q**H.
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines
-            the matrix Q.
-            If SIDE = 'L', M >= K >= 0;
-            if SIDE = 'R', N >= K >= 0.
-
-    A       (input) COMPLEX array, dimension (LDA,K)
-            The i-th column must contain the vector which defines the
-            elementary reflector H(i), for i = 1,2,...,k, as returned by
-            CGEQLF in the last k columns of its array argument A.
-            A is modified by the routine but restored on exit.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-            If SIDE = 'L', LDA >= max(1,M);
-            if SIDE = 'R', LDA >= max(1,N).
-
-    TAU     (input) COMPLEX array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by CGEQLF.
-
-    C       (input/output) COMPLEX array, dimension (LDC,N)
-            On entry, the M-by-N matrix C.
-            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If SIDE = 'L', LWORK >= max(1,N);
-            if SIDE = 'R', LWORK >= max(1,M).
-            For optimum performance LWORK >= N*NB if SIDE = 'L', and
-            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-            blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-    lquery = *lwork == -1;
-
-/*     NQ is the order of Q and NW is the minimum dimension of WORK */
-
-    if (left) {
-	nq = *m;
-	nw = *n;
-    } else {
-	nq = *n;
-	nw = *m;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "C")) {
-	*info = -2;
-    } else if (*m < 0) {
-	*info = -3;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
-	*info = -5;
-    } else if (*lda < max(1,nq)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    } else if (*lwork < max(1,nw) && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
-
-/*
-          Determine the block size.  NB may be at most NBMAX, where NBMAX
-          is used to define the local array T.
-
-   Computing MIN
-   Writing concatenation
-*/
-	i__3[0] = 1, a__1[0] = side;
-	i__3[1] = 1, a__1[1] = trans;
-	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQL", ch__1, m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)2);
-	nb = min(i__1,i__2);
-	lwkopt = max(1,nw) * nb;
-	work[1].r = (real) lwkopt, work[1].i = 0.f;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNMQL", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
-
-    nbmin = 2;
-    ldwork = nw;
-    if (nb > 1 && nb < *k) {
-	iws = nw * nb;
-	if (*lwork < iws) {
-	    nb = *lwork / ldwork;
-/*
-   Computing MAX
-   Writing concatenation
-*/
-	    i__3[0] = 1, a__1[0] = side;
-	    i__3[1] = 1, a__1[1] = trans;
-	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	    i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQL", ch__1, m, n, k, &c_n1, (
-		    ftnlen)6, (ftnlen)2);
-	    nbmin = max(i__1,i__2);
-	}
-    } else {
-	iws = nw;
-    }
-
-    if ((nb < nbmin) || (nb >= *k)) {
-
-/*        Use unblocked code */
-
-	cunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
-		c_offset], ldc, &work[1], &iinfo);
-    } else {
-
-/*        Use blocked code */
-
-	if ((left && notran) || (! left && ! notran)) {
-	    i1 = 1;
-	    i2 = *k;
-	    i3 = nb;
-	} else {
-	    i1 = (*k - 1) / nb * nb + 1;
-	    i2 = 1;
-	    i3 = -nb;
-	}
-
-	if (left) {
-	    ni = *n;
-	} else {
-	    mi = *m;
-	}
-
-	i__1 = i2;
-	i__2 = i3;
-	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__4 = nb, i__5 = *k - i__ + 1;
-	    ib = min(i__4,i__5);
-
-/*
-             Form the triangular factor of the block reflector
-             H = H(i+ib-1) . . . H(i+1) H(i)
-*/
-
-	    i__4 = nq - *k + i__ + ib - 1;
-	    clarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1]
-		    , lda, &tau[i__], t, &c__65);
-	    if (left) {
-
-/*              H or H' is applied to C(1:m-k+i+ib-1,1:n) */
-
-		mi = *m - *k + i__ + ib - 1;
-	    } else {
-
-/*              H or H' is applied to C(1:m,1:n-k+i+ib-1) */
-
-		ni = *n - *k + i__ + ib - 1;
-	    }
-
-/*           Apply H or H' */
-
-	    clarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[
-		    i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, &
-		    work[1], &ldwork);
-/* L10: */
-	}
-    }
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    return 0;
-
-/*     End of CUNMQL */
-
-} /* cunmql_ */
-
-/* Subroutine */ int cunmqr_(char *side, char *trans, integer *m, integer *n,
-	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
-	integer *ldc, complex *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
-	    i__5;
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer i__;
-    static complex t[4160]	/* was [65][64] */;
-    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
-    static logical left;
-    extern logical lsame_(char *, char *);
-    static integer nbmin, iinfo;
-    extern /* Subroutine */ int cunm2r_(char *, char *, integer *, integer *,
-	    integer *, complex *, integer *, complex *, complex *, integer *,
-	    complex *, integer *), clarfb_(char *, char *,
-	    char *, char *, integer *, integer *, integer *, complex *,
-	    integer *, complex *, integer *, complex *, integer *, complex *,
-	    integer *), clarft_(char *, char *
-	    , integer *, integer *, complex *, integer *, complex *, complex *
-	    , integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static logical notran;
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CUNMQR overwrites the general complex M-by-N matrix C with
-
-                    SIDE = 'L'     SIDE = 'R'
-    TRANS = 'N':      Q * C          C * Q
-    TRANS = 'C':      Q**H * C       C * Q**H
-
-    where Q is a complex unitary matrix defined as the product of k
-    elementary reflectors
-
-          Q = H(1) H(2) . . . H(k)
-
-    as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N
-    if SIDE = 'R'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q**H from the Left;
-            = 'R': apply Q or Q**H from the Right.
-
-    TRANS   (input) CHARACTER*1
-            = 'N':  No transpose, apply Q;
-            = 'C':  Conjugate transpose, apply Q**H.
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines
-            the matrix Q.
-            If SIDE = 'L', M >= K >= 0;
-            if SIDE = 'R', N >= K >= 0.
-
-    A       (input) COMPLEX array, dimension (LDA,K)
-            The i-th column must contain the vector which defines the
-            elementary reflector H(i), for i = 1,2,...,k, as returned by
-            CGEQRF in the first k columns of its array argument A.
-            A is modified by the routine but restored on exit.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-            If SIDE = 'L', LDA >= max(1,M);
-            if SIDE = 'R', LDA >= max(1,N).
-
-    TAU     (input) COMPLEX array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by CGEQRF.
-
-    C       (input/output) COMPLEX array, dimension (LDC,N)
-            On entry, the M-by-N matrix C.
-            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If SIDE = 'L', LWORK >= max(1,N);
-            if SIDE = 'R', LWORK >= max(1,M).
-            For optimum performance LWORK >= N*NB if SIDE = 'L', and
-            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-            blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-    lquery = *lwork == -1;
-
-/*     NQ is the order of Q and NW is the minimum dimension of WORK */
-
-    if (left) {
-	nq = *m;
-	nw = *n;
-    } else {
-	nq = *n;
-	nw = *m;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "C")) {
-	*info = -2;
-    } else if (*m < 0) {
-	*info = -3;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
-	*info = -5;
-    } else if (*lda < max(1,nq)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    } else if (*lwork < max(1,nw) && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
-
-/*
-          Determine the block size.  NB may be at most NBMAX, where NBMAX
-          is used to define the local array T.
-
-   Computing MIN
-   Writing concatenation
-*/
-	i__3[0] = 1, a__1[0] = side;
-	i__3[1] = 1, a__1[1] = trans;
-	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQR", ch__1, m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)2);
-	nb = min(i__1,i__2);
-	lwkopt = max(1,nw) * nb;
-	work[1].r = (real) lwkopt, work[1].i = 0.f;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("CUNMQR", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
-
-    nbmin = 2;
-    ldwork = nw;
-    if (nb > 1 && nb < *k) {
-	iws = nw * nb;
-	if (*lwork < iws) {
-	    nb = *lwork / ldwork;
-/*
-   Computing MAX
-   Writing concatenation
-*/
-	    i__3[0] = 1, a__1[0] = side;
-	    i__3[1] = 1, a__1[1] = trans;
-	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	    i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQR", ch__1, m, n, k, &c_n1, (
-		    ftnlen)6, (ftnlen)2);
-	    nbmin = max(i__1,i__2);
-	}
-    } else {
-	iws = nw;
-    }
-
-    if ((nb < nbmin) || (nb >= *k)) {
-
-/*        Use unblocked code */
-
-	cunm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
-		c_offset], ldc, &work[1], &iinfo);
-    } else {
-
-/*        Use blocked code */
-
-	if ((left && ! notran) || (! left && notran)) {
-	    i1 = 1;
-	    i2 = *k;
-	    i3 = nb;
-	} else {
-	    i1 = (*k - 1) / nb * nb + 1;
-	    i2 = 1;
-	    i3 = -nb;
-	}
-
-	if (left) {
-	    ni = *n;
-	    jc = 1;
-	} else {
-	    mi = *m;
-	    ic = 1;
-	}
-
-	i__1 = i2;
-	i__2 = i3;
-	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__4 = nb, i__5 = *k - i__ + 1;
-	    ib = min(i__4,i__5);
-
-/*
-             Form the triangular factor of the block reflector
-             H = H(i) H(i+1) . . . H(i+ib-1)
-*/
-
-	    i__4 = nq - i__ + 1;
-	    clarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ *
-		    a_dim1], lda, &tau[i__], t, &c__65)
-		    ;
-	    if (left) {
-
-/*              H or H' is applied to C(i:m,1:n) */
-
-		mi = *m - i__ + 1;
-		ic = i__;
-	    } else {
-
-/*              H or H' is applied to C(1:m,i:n) */
-
-		ni = *n - i__ + 1;
-		jc = i__;
-	    }
-
-/*           Apply H or H' */
-
-	    clarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
-		    i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc *
-		    c_dim1], ldc, &work[1], &ldwork);
-/* L10: */
-	}
-    }
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    return 0;
-
-/*     End of CUNMQR */
-
-} /* cunmqr_ */
-
-/* Subroutine */ int cunmtr_(char *side, char *uplo, char *trans, integer *m,
-	integer *n, complex *a, integer *lda, complex *tau, complex *c__,
-	integer *ldc, complex *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3;
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer i1, i2, nb, mi, ni, nq, nw;
-    static logical left;
-    extern logical lsame_(char *, char *);
-    static integer iinfo;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int cunmql_(char *, char *, integer *, integer *,
-	    integer *, complex *, integer *, complex *, complex *, integer *,
-	    complex *, integer *, integer *), cunmqr_(char *,
-	    char *, integer *, integer *, integer *, complex *, integer *,
-	    complex *, complex *, integer *, complex *, integer *, integer *);
-    static integer lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CUNMTR overwrites the general complex M-by-N matrix C with
-
-                    SIDE = 'L'     SIDE = 'R'
-    TRANS = 'N':      Q * C          C * Q
-    TRANS = 'C':      Q**H * C       C * Q**H
-
-    where Q is a complex unitary matrix of order nq, with nq = m if
-    SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
-    nq-1 elementary reflectors, as returned by CHETRD:
-
-    if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
-
-    if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q**H from the Left;
-            = 'R': apply Q or Q**H from the Right.
-
-    UPLO    (input) CHARACTER*1
-            = 'U': Upper triangle of A contains elementary reflectors
-                   from CHETRD;
-            = 'L': Lower triangle of A contains elementary reflectors
-                   from CHETRD.
-
-    TRANS   (input) CHARACTER*1
-            = 'N':  No transpose, apply Q;
-            = 'C':  Conjugate transpose, apply Q**H.
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    A       (input) COMPLEX array, dimension
-                                 (LDA,M) if SIDE = 'L'
-                                 (LDA,N) if SIDE = 'R'
-            The vectors which define the elementary reflectors, as
-            returned by CHETRD.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-            LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
-
-    TAU     (input) COMPLEX array, dimension
-                                 (M-1) if SIDE = 'L'
-                                 (N-1) if SIDE = 'R'
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by CHETRD.
-
-    C       (input/output) COMPLEX array, dimension (LDC,N)
-            On entry, the M-by-N matrix C.
-            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If SIDE = 'L', LWORK >= max(1,N);
-            if SIDE = 'R', LWORK >= max(1,M).
-            For optimum performance LWORK >= N*NB if SIDE = 'L', and
-            LWORK >=M*NB if SIDE = 'R', where NB is the optimal
-            blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    upper = lsame_(uplo, "U");
-    lquery = *lwork == -1;
-
-/*     NQ is the order of Q and NW is the minimum dimension of WORK */
-
-    if (left) {
-	nq = *m;
-	nw = *n;
-    } else {
-	nq = *n;
-	nw = *m;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! upper && ! lsame_(uplo, "L")) {
-	*info = -2;
-    } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "C")) {
-	*info = -3;
-    } else if (*m < 0) {
-	*info = -4;
-    } else if (*n < 0) {
-	*info = -5;
-    } else if (*lda < max(1,nq)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    } else if (*lwork < max(1,nw) && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
-	if (upper) {
-	    if (left) {
-/* Writing concatenation */
-		i__1[0] = 1, a__1[0] = side;
-		i__1[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
-		i__2 = *m - 1;
-		i__3 = *m - 1;
-		nb = ilaenv_(&c__1, "CUNMQL", ch__1, &i__2, n, &i__3, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    } else {
-/* Writing concatenation */
-		i__1[0] = 1, a__1[0] = side;
-		i__1[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
-		i__2 = *n - 1;
-		i__3 = *n - 1;
-		nb = ilaenv_(&c__1, "CUNMQL", ch__1, m, &i__2, &i__3, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    }
-	} else {
-	    if (left) {
-/* Writing concatenation */
-		i__1[0] = 1, a__1[0] = side;
-		i__1[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
-		i__2 = *m - 1;
-		i__3 = *m - 1;
-		nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__2, n, &i__3, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    } else {
-/* Writing concatenation */
-		i__1[0] = 1, a__1[0] = side;
-		i__1[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
-		i__2 = *n - 1;
-		i__3 = *n - 1;
-		nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__2, &i__3, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    }
-	}
-	lwkopt = max(1,nw) * nb;
-	work[1].r = (real) lwkopt, work[1].i = 0.f;
-    }
-
-    if (*info != 0) {
-	i__2 = -(*info);
-	xerbla_("CUNMTR", &i__2);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (nq == 1)) {
-	work[1].r = 1.f, work[1].i = 0.f;
-	return 0;
-    }
-
-    if (left) {
-	mi = *m - 1;
-	ni = *n;
-    } else {
-	mi = *m;
-	ni = *n - 1;
-    }
-
-    if (upper) {
-
-/*        Q was determined by a call to CHETRD with UPLO = 'U' */
-
-	i__2 = nq - 1;
-	cunmql_(side, trans, &mi, &ni, &i__2, &a[((a_dim1) << (1)) + 1], lda,
-		&tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
-    } else {
-
-/*        Q was determined by a call to CHETRD with UPLO = 'L' */
-
-	if (left) {
-	    i1 = 2;
-	    i2 = 1;
-	} else {
-	    i1 = 1;
-	    i2 = 2;
-	}
-	i__2 = nq - 1;
-	cunmqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], &
-		c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
-    }
-    work[1].r = (real) lwkopt, work[1].i = 0.f;
-    return 0;
-
-/*     End of CUNMTR */
-
-} /* cunmtr_ */
-
-/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal *
-	d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt,
-	integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer *
-	iwork, integer *info)
-{
-    /* System generated locals */
-    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double d_sign(doublereal *, doublereal *), log(doublereal);
-
-    /* Local variables */
-    static integer i__, j, k;
-    static doublereal p, r__;
-    static integer z__, ic, ii, kk;
-    static doublereal cs;
-    static integer is, iu;
-    static doublereal sn;
-    static integer nm1;
-    static doublereal eps;
-    static integer ivt, difl, difr, ierr, perm, mlvl, sqre;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *
-	    , doublereal *, integer *), dswap_(integer *, doublereal *,
-	    integer *, doublereal *, integer *);
-    static integer poles, iuplo, nsize, start;
-    extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    integer *, integer *, doublereal *, integer *);
-
-    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *, integer *, integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
-	     integer *), dlascl_(char *, integer *, integer *, doublereal *,
-	    doublereal *, integer *, integer *, doublereal *, integer *,
-	    integer *), dlasdq_(char *, integer *, integer *, integer
-	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
-	     integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlaset_(char *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static integer givcol;
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    static integer icompq;
-    static doublereal orgnrm;
-    static integer givnum, givptr, qstart, smlsiz, wstart, smlszp;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       December 1, 1999
-
-
-    Purpose
-    =======
-
-    DBDSDC computes the singular value decomposition (SVD) of a real
-    N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,
-    using a divide and conquer method, where S is a diagonal matrix
-    with non-negative diagonal elements (the singular values of B), and
-    U and VT are orthogonal matrices of left and right singular vectors,
-    respectively. DBDSDC can be used to compute all singular values,
-    and optionally, singular vectors or singular vectors in compact form.
-
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.  See DLASD3 for details.
-
-    The code currently call DLASDQ if singular values only are desired.
-    However, it can be slightly modified to compute singular values
-    using the divide and conquer method.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  B is upper bidiagonal.
-            = 'L':  B is lower bidiagonal.
-
-    COMPQ   (input) CHARACTER*1
-            Specifies whether singular vectors are to be computed
-            as follows:
-            = 'N':  Compute singular values only;
-            = 'P':  Compute singular values and compute singular
-                    vectors in compact form;
-            = 'I':  Compute singular values and singular vectors.
-
-    N       (input) INTEGER
-            The order of the matrix B.  N >= 0.
-
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the n diagonal elements of the bidiagonal matrix B.
-            On exit, if INFO=0, the singular values of B.
-
-    E       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the elements of E contain the offdiagonal
-            elements of the bidiagonal matrix whose SVD is desired.
-            On exit, E has been destroyed.
-
-    U       (output) DOUBLE PRECISION array, dimension (LDU,N)
-            If  COMPQ = 'I', then:
-               On exit, if INFO = 0, U contains the left singular vectors
-               of the bidiagonal matrix.
-            For other values of COMPQ, U is not referenced.
-
-    LDU     (input) INTEGER
-            The leading dimension of the array U.  LDU >= 1.
-            If singular vectors are desired, then LDU >= max( 1, N ).
-
-    VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
-            If  COMPQ = 'I', then:
-               On exit, if INFO = 0, VT' contains the right singular
-               vectors of the bidiagonal matrix.
-            For other values of COMPQ, VT is not referenced.
-
-    LDVT    (input) INTEGER
-            The leading dimension of the array VT.  LDVT >= 1.
-            If singular vectors are desired, then LDVT >= max( 1, N ).
-
-    Q       (output) DOUBLE PRECISION array, dimension (LDQ)
-            If  COMPQ = 'P', then:
-               On exit, if INFO = 0, Q and IQ contain the left
-               and right singular vectors in a compact form,
-               requiring O(N log N) space instead of 2*N**2.
-               In particular, Q contains all the DOUBLE PRECISION data in
-               LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1))))
-               words of memory, where SMLSIZ is returned by ILAENV and
-               is equal to the maximum size of the subproblems at the
-               bottom of the computation tree (usually about 25).
-            For other values of COMPQ, Q is not referenced.
-
-    IQ      (output) INTEGER array, dimension (LDIQ)
-            If  COMPQ = 'P', then:
-               On exit, if INFO = 0, Q and IQ contain the left
-               and right singular vectors in a compact form,
-               requiring O(N log N) space instead of 2*N**2.
-               In particular, IQ contains all INTEGER data in
-               LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1))))
-               words of memory, where SMLSIZ is returned by ILAENV and
-               is equal to the maximum size of the subproblems at the
-               bottom of the computation tree (usually about 25).
-            For other values of COMPQ, IQ is not referenced.
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
-            If COMPQ = 'N' then LWORK >= (4 * N).
-            If COMPQ = 'P' then LWORK >= (6 * N).
-            If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).
-
-    IWORK   (workspace) INTEGER array, dimension (8*N)
-
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The algorithm failed to compute an singular value.
-                  The update process of divide and conquer failed.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    --q;
-    --iq;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-    iuplo = 0;
-    if (lsame_(uplo, "U")) {
-	iuplo = 1;
-    }
-    if (lsame_(uplo, "L")) {
-	iuplo = 2;
-    }
-    if (lsame_(compq, "N")) {
-	icompq = 0;
-    } else if (lsame_(compq, "P")) {
-	icompq = 1;
-    } else if (lsame_(compq, "I")) {
-	icompq = 2;
-    } else {
-	icompq = -1;
-    }
-    if (iuplo == 0) {
-	*info = -1;
-    } else if (icompq < 0) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if ((*ldu < 1) || (icompq == 2 && *ldu < *n)) {
-	*info = -7;
-    } else if ((*ldvt < 1) || (icompq == 2 && *ldvt < *n)) {
-	*info = -9;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DBDSDC", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-    smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-    if (*n == 1) {
-	if (icompq == 1) {
-	    q[1] = d_sign(&c_b2865, &d__[1]);
-	    q[smlsiz * *n + 1] = 1.;
-	} else if (icompq == 2) {
-	    u[u_dim1 + 1] = d_sign(&c_b2865, &d__[1]);
-	    vt[vt_dim1 + 1] = 1.;
-	}
-	d__[1] = abs(d__[1]);
-	return 0;
-    }
-    nm1 = *n - 1;
-
-/*
-       If matrix lower bidiagonal, rotate to be upper bidiagonal
-       by applying Givens rotations on the left
-*/
-
-    wstart = 1;
-    qstart = 3;
-    if (icompq == 1) {
-	dcopy_(n, &d__[1], &c__1, &q[1], &c__1);
-	i__1 = *n - 1;
-	dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1);
-    }
-    if (iuplo == 2) {
-	qstart = 5;
-	wstart = ((*n) << (1)) - 1;
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
-	    d__[i__] = r__;
-	    e[i__] = sn * d__[i__ + 1];
-	    d__[i__ + 1] = cs * d__[i__ + 1];
-	    if (icompq == 1) {
-		q[i__ + ((*n) << (1))] = cs;
-		q[i__ + *n * 3] = sn;
-	    } else if (icompq == 2) {
-		work[i__] = cs;
-		work[nm1 + i__] = -sn;
-	    }
-/* L10: */
-	}
-    }
-
-/*     If ICOMPQ = 0, use DLASDQ to compute the singular values. */
-
-    if (icompq == 0) {
-	dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
-		vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
-		wstart], info);
-	goto L40;
-    }
-
-/*
-       If N is smaller than the minimum divide size SMLSIZ, then solve
-       the problem with another solver.
-*/
-
-    if (*n <= smlsiz) {
-	if (icompq == 2) {
-	    dlaset_("A", n, n, &c_b2879, &c_b2865, &u[u_offset], ldu);
-	    dlaset_("A", n, n, &c_b2879, &c_b2865, &vt[vt_offset], ldvt);
-	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
-		    , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
-		    wstart], info);
-	} else if (icompq == 1) {
-	    iu = 1;
-	    ivt = iu + *n;
-	    dlaset_("A", n, n, &c_b2879, &c_b2865, &q[iu + (qstart - 1) * *n],
-		     n);
-	    dlaset_("A", n, n, &c_b2879, &c_b2865, &q[ivt + (qstart - 1) * *n]
-		    , n);
-	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (
-		    qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[
-		    iu + (qstart - 1) * *n], n, &work[wstart], info);
-	}
-	goto L40;
-    }
-
-    if (icompq == 2) {
-	dlaset_("A", n, n, &c_b2879, &c_b2865, &u[u_offset], ldu);
-	dlaset_("A", n, n, &c_b2879, &c_b2865, &vt[vt_offset], ldvt);
-    }
-
-/*     Scale. */
-
-    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.) {
-	return 0;
-    }
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, n, &c__1, &d__[1], n, &ierr);
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, &nm1, &c__1, &e[1], &nm1, &
-	    ierr);
-
-    eps = EPSILON;
-
-    mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) /
-	    log(2.)) + 1;
-    smlszp = smlsiz + 1;
-
-    if (icompq == 1) {
-	iu = 1;
-	ivt = smlsiz + 1;
-	difl = ivt + smlszp;
-	difr = difl + mlvl;
-	z__ = difr + ((mlvl) << (1));
-	ic = z__ + mlvl;
-	is = ic + 1;
-	poles = is + 1;
-	givnum = poles + ((mlvl) << (1));
-
-	k = 1;
-	givptr = 2;
-	perm = 3;
-	givcol = perm + mlvl;
-    }
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((d__1 = d__[i__], abs(d__1)) < eps) {
-	    d__[i__] = d_sign(&eps, &d__[i__]);
-	}
-/* L20: */
-    }
-
-    start = 1;
-    sqre = 0;
-
-    i__1 = nm1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (((d__1 = e[i__], abs(d__1)) < eps) || (i__ == nm1)) {
-
-/*
-          Subproblem found. First determine its size and then
-          apply divide and conquer on it.
-*/
-
-	    if (i__ < nm1) {
-
-/*        A subproblem with E(I) small for I < NM1. */
-
-		nsize = i__ - start + 1;
-	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
-
-/*        A subproblem with E(NM1) not too small but I = NM1. */
-
-		nsize = *n - start + 1;
-	    } else {
-
-/*
-          A subproblem with E(NM1) small. This implies an
-          1-by-1 subproblem at D(N). Solve this 1-by-1 problem
-          first.
-*/
-
-		nsize = i__ - start + 1;
-		if (icompq == 2) {
-		    u[*n + *n * u_dim1] = d_sign(&c_b2865, &d__[*n]);
-		    vt[*n + *n * vt_dim1] = 1.;
-		} else if (icompq == 1) {
-		    q[*n + (qstart - 1) * *n] = d_sign(&c_b2865, &d__[*n]);
-		    q[*n + (smlsiz + qstart - 1) * *n] = 1.;
-		}
-		d__[*n] = (d__1 = d__[*n], abs(d__1));
-	    }
-	    if (icompq == 2) {
-		dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start +
-			start * u_dim1], ldu, &vt[start + start * vt_dim1],
-			ldvt, &smlsiz, &iwork[1], &work[wstart], info);
-	    } else {
-		dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[
-			start], &q[start + (iu + qstart - 2) * *n], n, &q[
-			start + (ivt + qstart - 2) * *n], &iq[start + k * *n],
-			 &q[start + (difl + qstart - 2) * *n], &q[start + (
-			difr + qstart - 2) * *n], &q[start + (z__ + qstart -
-			2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[
-			start + givptr * *n], &iq[start + givcol * *n], n, &
-			iq[start + perm * *n], &q[start + (givnum + qstart -
-			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
-			start + (is + qstart - 2) * *n], &work[wstart], &
-			iwork[1], info);
-		if (*info != 0) {
-		    return 0;
-		}
-	    }
-	    start = i__ + 1;
-	}
-/* L30: */
-    }
-
-/*     Unscale */
-
-    dlascl_("G", &c__0, &c__0, &c_b2865, &orgnrm, n, &c__1, &d__[1], n, &ierr);
-L40:
-
-/*     Use Selection Sort to minimize swaps of singular vectors */
-
-    i__1 = *n;
-    for (ii = 2; ii <= i__1; ++ii) {
-	i__ = ii - 1;
-	kk = i__;
-	p = d__[i__];
-	i__2 = *n;
-	for (j = ii; j <= i__2; ++j) {
-	    if (d__[j] > p) {
-		kk = j;
-		p = d__[j];
-	    }
-/* L50: */
-	}
-	if (kk != i__) {
-	    d__[kk] = d__[i__];
-	    d__[i__] = p;
-	    if (icompq == 1) {
-		iq[i__] = kk;
-	    } else if (icompq == 2) {
-		dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &
-			c__1);
-		dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt);
-	    }
-	} else if (icompq == 1) {
-	    iq[i__] = i__;
-	}
-/* L60: */
-    }
-
-/*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */
-
-    if (icompq == 1) {
-	if (iuplo == 1) {
-	    iq[*n] = 1;
-	} else {
-	    iq[*n] = 0;
-	}
-    }
-
-/*
-       If B is lower bidiagonal, update U by those Givens rotations
-       which rotated B to be upper bidiagonal
-*/
-
-    if (iuplo == 2 && icompq == 2) {
-	dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu);
-    }
-
-    return 0;
-
-/*     End of DBDSDC */
-
-} /* dbdsdc_ */
-
-/* Subroutine */ int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer *
-	nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt,
-	integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer *
-	ldc, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
-	    i__2;
-    doublereal d__1, d__2, d__3, d__4;
-
-    /* Builtin functions */
-    double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign(
-	    doublereal *, doublereal *);
-
-    /* Local variables */
-    static doublereal f, g, h__;
-    static integer i__, j, m;
-    static doublereal r__, cs;
-    static integer ll;
-    static doublereal sn, mu;
-    static integer nm1, nm12, nm13, lll;
-    static doublereal eps, sll, tol, abse;
-    static integer idir;
-    static doublereal abss;
-    static integer oldm;
-    static doublereal cosl;
-    static integer isub, iter;
-    static doublereal unfl, sinl, cosr, smin, smax, sinr;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *), dlas2_(
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *), dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    static doublereal oldcs;
-    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *);
-    static integer oldll;
-    static doublereal shift, sigmn, oldsn;
-    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer maxit;
-    static doublereal sminl, sigmx;
-    static logical lower;
-    extern /* Subroutine */ int dlasq1_(integer *, doublereal *, doublereal *,
-	     doublereal *, integer *), dlasv2_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *);
-
-    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *), xerbla_(char *,
-	    integer *);
-    static doublereal sminoa, thresh;
-    static logical rotate;
-    static doublereal sminlo, tolmul;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    DBDSQR computes the singular value decomposition (SVD) of a real
-    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
-    denotes the transpose of P), where S is a diagonal matrix with
-    non-negative diagonal elements (the singular values of B), and Q
-    and P are orthogonal matrices.
-
-    The routine computes S, and optionally computes U * Q, P' * VT,
-    or Q' * C, for given real input matrices U, VT, and C.
-
-    See "Computing  Small Singular Values of Bidiagonal Matrices With
-    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
-    LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
-    no. 5, pp. 873-912, Sept 1990) and
-    "Accurate singular values and differential qd algorithms," by
-    B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
-    Department, University of California at Berkeley, July 1992
-    for a detailed description of the algorithm.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  B is upper bidiagonal;
-            = 'L':  B is lower bidiagonal.
-
-    N       (input) INTEGER
-            The order of the matrix B.  N >= 0.
-
-    NCVT    (input) INTEGER
-            The number of columns of the matrix VT. NCVT >= 0.
-
-    NRU     (input) INTEGER
-            The number of rows of the matrix U. NRU >= 0.
-
-    NCC     (input) INTEGER
-            The number of columns of the matrix C. NCC >= 0.
-
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the n diagonal elements of the bidiagonal matrix B.
-            On exit, if INFO=0, the singular values of B in decreasing
-            order.
-
-    E       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the elements of E contain the
-            offdiagonal elements of the bidiagonal matrix whose SVD
-            is desired. On normal exit (INFO = 0), E is destroyed.
-            If the algorithm does not converge (INFO > 0), D and E
-            will contain the diagonal and superdiagonal elements of a
-            bidiagonal matrix orthogonally equivalent to the one given
-            as input. E(N) is used for workspace.
-
-    VT      (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
-            On entry, an N-by-NCVT matrix VT.
-            On exit, VT is overwritten by P' * VT.
-            VT is not referenced if NCVT = 0.
-
-    LDVT    (input) INTEGER
-            The leading dimension of the array VT.
-            LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
-
-    U       (input/output) DOUBLE PRECISION array, dimension (LDU, N)
-            On entry, an NRU-by-N matrix U.
-            On exit, U is overwritten by U * Q.
-            U is not referenced if NRU = 0.
-
-    LDU     (input) INTEGER
-            The leading dimension of the array U.  LDU >= max(1,NRU).
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
-            On entry, an N-by-NCC matrix C.
-            On exit, C is overwritten by Q' * C.
-            C is not referenced if NCC = 0.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C.
-            LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (4*N)
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  If INFO = -i, the i-th argument had an illegal value
-            > 0:  the algorithm did not converge; D and E contain the
-                  elements of a bidiagonal matrix which is orthogonally
-                  similar to the input matrix B;  if INFO = i, i
-                  elements of E have not converged to zero.
-
-    Internal Parameters
-    ===================
-
-    TOLMUL  DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
-            TOLMUL controls the convergence criterion of the QR loop.
-            If it is positive, TOLMUL*EPS is the desired relative
-               precision in the computed singular values.
-            If it is negative, abs(TOLMUL*EPS*sigma_max) is the
-               desired absolute accuracy in the computed singular
-               values (corresponds to relative accuracy
-               abs(TOLMUL*EPS) in the largest singular value.
-            abs(TOLMUL) should be between 1 and 1/EPS, and preferably
-               between 10 (for fast convergence) and .1/EPS
-               (for there to be some accuracy in the results).
-            Default is to lose at either one eighth or 2 of the
-               available decimal digits in each computed singular value
-               (whichever is smaller).
-
-    MAXITR  INTEGER, default = 6
-            MAXITR controls the maximum number of passes of the
-            algorithm through its inner loop. The algorithms stops
-            (and so fails to converge) if the number of passes
-            through the inner loop exceeds MAXITR*N**2.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    lower = lsame_(uplo, "L");
-    if (! lsame_(uplo, "U") && ! lower) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*ncvt < 0) {
-	*info = -3;
-    } else if (*nru < 0) {
-	*info = -4;
-    } else if (*ncc < 0) {
-	*info = -5;
-    } else if ((*ncvt == 0 && *ldvt < 1) || (*ncvt > 0 && *ldvt < max(1,*n)))
-	    {
-	*info = -9;
-    } else if (*ldu < max(1,*nru)) {
-	*info = -11;
-    } else if ((*ncc == 0 && *ldc < 1) || (*ncc > 0 && *ldc < max(1,*n))) {
-	*info = -13;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DBDSQR", &i__1);
-	return 0;
-    }
-    if (*n == 0) {
-	return 0;
-    }
-    if (*n == 1) {
-	goto L160;
-    }
-
-/*     ROTATE is true if any singular vectors desired, false otherwise */
-
-    rotate = ((*ncvt > 0) || (*nru > 0)) || (*ncc > 0);
-
-/*     If no singular vectors desired, use qd algorithm */
-
-    if (! rotate) {
-	dlasq1_(n, &d__[1], &e[1], &work[1], info);
-	return 0;
-    }
-
-    nm1 = *n - 1;
-    nm12 = nm1 + nm1;
-    nm13 = nm12 + nm1;
-    idir = 0;
-
-/*     Get machine constants */
-
-    eps = EPSILON;
-    unfl = SAFEMINIMUM;
-
-/*
-       If matrix lower bidiagonal, rotate to be upper bidiagonal
-       by applying Givens rotations on the left
-*/
-
-    if (lower) {
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
-	    d__[i__] = r__;
-	    e[i__] = sn * d__[i__ + 1];
-	    d__[i__ + 1] = cs * d__[i__ + 1];
-	    work[i__] = cs;
-	    work[nm1 + i__] = sn;
-/* L10: */
-	}
-
-/*        Update singular vectors if desired */
-
-	if (*nru > 0) {
-	    dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset],
-		    ldu);
-	}
-	if (*ncc > 0) {
-	    dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
-		     ldc);
-	}
-    }
-
-/*
-       Compute singular values to relative accuracy TOL
-       (By setting TOL to be negative, algorithm will compute
-       singular values to absolute accuracy ABS(TOL)*norm(input matrix))
-
-   Computing MAX
-   Computing MIN
-*/
-    d__3 = 100., d__4 = pow_dd(&eps, &c_b2944);
-    d__1 = 10., d__2 = min(d__3,d__4);
-    tolmul = max(d__1,d__2);
-    tol = tolmul * eps;
-
-/*     Compute approximate maximum, minimum singular values */
-
-    smax = 0.;
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1));
-	smax = max(d__2,d__3);
-/* L20: */
-    }
-    i__1 = *n - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
-	smax = max(d__2,d__3);
-/* L30: */
-    }
-    sminl = 0.;
-    if (tol >= 0.) {
-
-/*        Relative accuracy desired */
-
-	sminoa = abs(d__[1]);
-	if (sminoa == 0.) {
-	    goto L50;
-	}
-	mu = sminoa;
-	i__1 = *n;
-	for (i__ = 2; i__ <= i__1; ++i__) {
-	    mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1]
-		    , abs(d__1))));
-	    sminoa = min(sminoa,mu);
-	    if (sminoa == 0.) {
-		goto L50;
-	    }
-/* L40: */
-	}
-L50:
-	sminoa /= sqrt((doublereal) (*n));
-/* Computing MAX */
-	d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl;
-	thresh = max(d__1,d__2);
-    } else {
-
-/*
-          Absolute accuracy desired
-
-   Computing MAX
-*/
-	d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl;
-	thresh = max(d__1,d__2);
-    }
-
-/*
-       Prepare for main iteration loop for the singular values
-       (MAXIT is the maximum number of passes through the inner
-       loop permitted before nonconvergence signalled.)
-*/
-
-    maxit = *n * 6 * *n;
-    iter = 0;
-    oldll = -1;
-    oldm = -1;
-
-/*     M points to last element of unconverged part of matrix */
-
-    m = *n;
-
-/*     Begin main iteration loop */
-
-L60:
-
-/*     Check for convergence or exceeding iteration count */
-
-    if (m <= 1) {
-	goto L160;
-    }
-    if (iter > maxit) {
-	goto L200;
-    }
-
-/*     Find diagonal block of matrix to work on */
-
-    if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) {
-	d__[m] = 0.;
-    }
-    smax = (d__1 = d__[m], abs(d__1));
-    smin = smax;
-    i__1 = m - 1;
-    for (lll = 1; lll <= i__1; ++lll) {
-	ll = m - lll;
-	abss = (d__1 = d__[ll], abs(d__1));
-	abse = (d__1 = e[ll], abs(d__1));
-	if (tol < 0. && abss <= thresh) {
-	    d__[ll] = 0.;
-	}
-	if (abse <= thresh) {
-	    goto L80;
-	}
-	smin = min(smin,abss);
-/* Computing MAX */
-	d__1 = max(smax,abss);
-	smax = max(d__1,abse);
-/* L70: */
-    }
-    ll = 0;
-    goto L90;
-L80:
-    e[ll] = 0.;
-
-/*     Matrix splits since E(LL) = 0 */
-
-    if (ll == m - 1) {
-
-/*        Convergence of bottom singular value, return to top of loop */
-
-	--m;
-	goto L60;
-    }
-L90:
-    ++ll;
-
-/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */
-
-    if (ll == m - 1) {
-
-/*        2 by 2 block, handle separately */
-
-	dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
-		 &sinl, &cosl);
-	d__[m - 1] = sigmx;
-	e[m - 1] = 0.;
-	d__[m] = sigmn;
-
-/*        Compute singular vectors, if desired */
-
-	if (*ncvt > 0) {
-	    drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &
-		    cosr, &sinr);
-	}
-	if (*nru > 0) {
-	    drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &
-		    c__1, &cosl, &sinl);
-	}
-	if (*ncc > 0) {
-	    drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &
-		    cosl, &sinl);
-	}
-	m += -2;
-	goto L60;
-    }
-
-/*
-       If working on new submatrix, choose shift direction
-       (from larger end diagonal element towards smaller)
-*/
-
-    if ((ll > oldm) || (m < oldll)) {
-	if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) {
-
-/*           Chase bulge from top (big end) to bottom (small end) */
-
-	    idir = 1;
-	} else {
-
-/*           Chase bulge from bottom (big end) to top (small end) */
-
-	    idir = 2;
-	}
-    }
-
-/*     Apply convergence tests */
-
-    if (idir == 1) {
-
-/*
-          Run convergence test in forward direction
-          First apply standard test to bottom of matrix
-*/
-
-	if (((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs(
-		d__1))) || (tol < 0. && (d__3 = e[m - 1], abs(d__3)) <=
-		thresh)) {
-	    e[m - 1] = 0.;
-	    goto L60;
-	}
-
-	if (tol >= 0.) {
-
-/*
-             If relative accuracy desired,
-             apply convergence criterion forward
-*/
-
-	    mu = (d__1 = d__[ll], abs(d__1));
-	    sminl = mu;
-	    i__1 = m - 1;
-	    for (lll = ll; lll <= i__1; ++lll) {
-		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
-		    e[lll] = 0.;
-		    goto L60;
-		}
-		sminlo = sminl;
-		mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[
-			lll], abs(d__1))));
-		sminl = min(sminl,mu);
-/* L100: */
-	    }
-	}
-
-    } else {
-
-/*
-          Run convergence test in backward direction
-          First apply standard test to top of matrix
-*/
-
-	if (((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(
-		d__1))) || (tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh))
-		{
-	    e[ll] = 0.;
-	    goto L60;
-	}
-
-	if (tol >= 0.) {
-
-/*
-             If relative accuracy desired,
-             apply convergence criterion backward
-*/
-
-	    mu = (d__1 = d__[m], abs(d__1));
-	    sminl = mu;
-	    i__1 = ll;
-	    for (lll = m - 1; lll >= i__1; --lll) {
-		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
-		    e[lll] = 0.;
-		    goto L60;
-		}
-		sminlo = sminl;
-		mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll]
-			, abs(d__1))));
-		sminl = min(sminl,mu);
-/* L110: */
-	    }
-	}
-    }
-    oldll = ll;
-    oldm = m;
-
-/*
-       Compute shift.  First, test if shifting would ruin relative
-       accuracy, and if so set the shift to zero.
-
-   Computing MAX
-*/
-    d__1 = eps, d__2 = tol * .01;
-    if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) {
-
-/*        Use a zero shift to avoid loss of relative accuracy */
-
-	shift = 0.;
-    } else {
-
-/*        Compute the shift from 2-by-2 block at end of matrix */
-
-	if (idir == 1) {
-	    sll = (d__1 = d__[ll], abs(d__1));
-	    dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
-	} else {
-	    sll = (d__1 = d__[m], abs(d__1));
-	    dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
-	}
-
-/*        Test if shift negligible, and if so set to zero */
-
-	if (sll > 0.) {
-/* Computing 2nd power */
-	    d__1 = shift / sll;
-	    if (d__1 * d__1 < eps) {
-		shift = 0.;
-	    }
-	}
-    }
-
-/*     Increment iteration count */
-
-    iter = iter + m - ll;
-
-/*     If SHIFT = 0, do simplified QR iteration */
-
-    if (shift == 0.) {
-	if (idir == 1) {
-
-/*
-             Chase bulge from top to bottom
-             Save cosines and sines for later singular vector updates
-*/
-
-	    cs = 1.;
-	    oldcs = 1.;
-	    i__1 = m - 1;
-	    for (i__ = ll; i__ <= i__1; ++i__) {
-		d__1 = d__[i__] * cs;
-		dlartg_(&d__1, &e[i__], &cs, &sn, &r__);
-		if (i__ > ll) {
-		    e[i__ - 1] = oldsn * r__;
-		}
-		d__1 = oldcs * r__;
-		d__2 = d__[i__ + 1] * sn;
-		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
-		work[i__ - ll + 1] = cs;
-		work[i__ - ll + 1 + nm1] = sn;
-		work[i__ - ll + 1 + nm12] = oldcs;
-		work[i__ - ll + 1 + nm13] = oldsn;
-/* L120: */
-	    }
-	    h__ = d__[m] * cs;
-	    d__[m] = h__ * oldcs;
-	    e[m - 1] = h__ * oldsn;
-
-/*           Update singular vectors */
-
-	    if (*ncvt > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
-			ll + vt_dim1], ldvt);
-	    }
-	    if (*nru > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
-			+ 1], &u[ll * u_dim1 + 1], ldu);
-	    }
-	    if (*ncc > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
-			+ 1], &c__[ll + c_dim1], ldc);
-	    }
-
-/*           Test convergence */
-
-	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
-		e[m - 1] = 0.;
-	    }
-
-	} else {
-
-/*
-             Chase bulge from bottom to top
-             Save cosines and sines for later singular vector updates
-*/
-
-	    cs = 1.;
-	    oldcs = 1.;
-	    i__1 = ll + 1;
-	    for (i__ = m; i__ >= i__1; --i__) {
-		d__1 = d__[i__] * cs;
-		dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__);
-		if (i__ < m) {
-		    e[i__] = oldsn * r__;
-		}
-		d__1 = oldcs * r__;
-		d__2 = d__[i__ - 1] * sn;
-		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
-		work[i__ - ll] = cs;
-		work[i__ - ll + nm1] = -sn;
-		work[i__ - ll + nm12] = oldcs;
-		work[i__ - ll + nm13] = -oldsn;
-/* L130: */
-	    }
-	    h__ = d__[ll] * cs;
-	    d__[ll] = h__ * oldcs;
-	    e[ll] = h__ * oldsn;
-
-/*           Update singular vectors */
-
-	    if (*ncvt > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
-			nm13 + 1], &vt[ll + vt_dim1], ldvt);
-	    }
-	    if (*nru > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
-			 u_dim1 + 1], ldu);
-	    }
-	    if (*ncc > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
-			ll + c_dim1], ldc);
-	    }
-
-/*           Test convergence */
-
-	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
-		e[ll] = 0.;
-	    }
-	}
-    } else {
-
-/*        Use nonzero shift */
-
-	if (idir == 1) {
-
-/*
-             Chase bulge from top to bottom
-             Save cosines and sines for later singular vector updates
-*/
-
-	    f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b2865, &
-		    d__[ll]) + shift / d__[ll]);
-	    g = e[ll];
-	    i__1 = m - 1;
-	    for (i__ = ll; i__ <= i__1; ++i__) {
-		dlartg_(&f, &g, &cosr, &sinr, &r__);
-		if (i__ > ll) {
-		    e[i__ - 1] = r__;
-		}
-		f = cosr * d__[i__] + sinr * e[i__];
-		e[i__] = cosr * e[i__] - sinr * d__[i__];
-		g = sinr * d__[i__ + 1];
-		d__[i__ + 1] = cosr * d__[i__ + 1];
-		dlartg_(&f, &g, &cosl, &sinl, &r__);
-		d__[i__] = r__;
-		f = cosl * e[i__] + sinl * d__[i__ + 1];
-		d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
-		if (i__ < m - 1) {
-		    g = sinl * e[i__ + 1];
-		    e[i__ + 1] = cosl * e[i__ + 1];
-		}
-		work[i__ - ll + 1] = cosr;
-		work[i__ - ll + 1 + nm1] = sinr;
-		work[i__ - ll + 1 + nm12] = cosl;
-		work[i__ - ll + 1 + nm13] = sinl;
-/* L140: */
-	    }
-	    e[m - 1] = f;
-
-/*           Update singular vectors */
-
-	    if (*ncvt > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
-			ll + vt_dim1], ldvt);
-	    }
-	    if (*nru > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
-			+ 1], &u[ll * u_dim1 + 1], ldu);
-	    }
-	    if (*ncc > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
-			+ 1], &c__[ll + c_dim1], ldc);
-	    }
-
-/*           Test convergence */
-
-	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
-		e[m - 1] = 0.;
-	    }
-
-	} else {
-
-/*
-             Chase bulge from bottom to top
-             Save cosines and sines for later singular vector updates
-*/
-
-	    f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b2865, &d__[
-		    m]) + shift / d__[m]);
-	    g = e[m - 1];
-	    i__1 = ll + 1;
-	    for (i__ = m; i__ >= i__1; --i__) {
-		dlartg_(&f, &g, &cosr, &sinr, &r__);
-		if (i__ < m) {
-		    e[i__] = r__;
-		}
-		f = cosr * d__[i__] + sinr * e[i__ - 1];
-		e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
-		g = sinr * d__[i__ - 1];
-		d__[i__ - 1] = cosr * d__[i__ - 1];
-		dlartg_(&f, &g, &cosl, &sinl, &r__);
-		d__[i__] = r__;
-		f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
-		d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
-		if (i__ > ll + 1) {
-		    g = sinl * e[i__ - 2];
-		    e[i__ - 2] = cosl * e[i__ - 2];
-		}
-		work[i__ - ll] = cosr;
-		work[i__ - ll + nm1] = -sinr;
-		work[i__ - ll + nm12] = cosl;
-		work[i__ - ll + nm13] = -sinl;
-/* L150: */
-	    }
-	    e[ll] = f;
-
-/*           Test convergence */
-
-	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
-		e[ll] = 0.;
-	    }
-
-/*           Update singular vectors if desired */
-
-	    if (*ncvt > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
-			nm13 + 1], &vt[ll + vt_dim1], ldvt);
-	    }
-	    if (*nru > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
-			 u_dim1 + 1], ldu);
-	    }
-	    if (*ncc > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
-			ll + c_dim1], ldc);
-	    }
-	}
-    }
-
-/*     QR iteration finished, go back and check convergence */
-
-    goto L60;
-
-/*     All singular values converged, so make them positive */
-
-L160:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (d__[i__] < 0.) {
-	    d__[i__] = -d__[i__];
-
-/*           Change sign of singular vectors, if desired */
-
-	    if (*ncvt > 0) {
-		dscal_(ncvt, &c_b3001, &vt[i__ + vt_dim1], ldvt);
-	    }
-	}
-/* L170: */
-    }
-
-/*
-       Sort the singular values into decreasing order (insertion sort on
-       singular values, but only one transposition per singular vector)
-*/
-
-    i__1 = *n - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*        Scan for smallest D(I) */
-
-	isub = 1;
-	smin = d__[1];
-	i__2 = *n + 1 - i__;
-	for (j = 2; j <= i__2; ++j) {
-	    if (d__[j] <= smin) {
-		isub = j;
-		smin = d__[j];
-	    }
-/* L180: */
-	}
-	if (isub != *n + 1 - i__) {
-
-/*           Swap singular values and vectors */
-
-	    d__[isub] = d__[*n + 1 - i__];
-	    d__[*n + 1 - i__] = smin;
-	    if (*ncvt > 0) {
-		dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ +
-			vt_dim1], ldvt);
-	    }
-	    if (*nru > 0) {
-		dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) *
-			u_dim1 + 1], &c__1);
-	    }
-	    if (*ncc > 0) {
-		dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ +
-			c_dim1], ldc);
-	    }
-	}
-/* L190: */
-    }
-    goto L220;
-
-/*     Maximum number of iterations exceeded, failure to converge */
-
-L200:
-    *info = 0;
-    i__1 = *n - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (e[i__] != 0.) {
-	    ++(*info);
-	}
-/* L210: */
-    }
-L220:
-    return 0;
-
-/*     End of DBDSQR */
-
-} /* dbdsqr_ */
-
-/* Subroutine */ int dgebak_(char *job, char *side, integer *n, integer *ilo,
-	integer *ihi, doublereal *scale, integer *m, doublereal *v, integer *
-	ldv, integer *info)
-{
-    /* System generated locals */
-    integer v_dim1, v_offset, i__1;
-
-    /* Local variables */
-    static integer i__, k;
-    static doublereal s;
-    static integer ii;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static logical leftv;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical rightv;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    DGEBAK forms the right or left eigenvectors of a real general matrix
-    by backward transformation on the computed eigenvectors of the
-    balanced matrix output by DGEBAL.
-
-    Arguments
-    =========
-
-    JOB     (input) CHARACTER*1
-            Specifies the type of backward transformation required:
-            = 'N', do nothing, return immediately;
-            = 'P', do backward transformation for permutation only;
-            = 'S', do backward transformation for scaling only;
-            = 'B', do backward transformations for both permutation and
-                   scaling.
-            JOB must be the same as the argument JOB supplied to DGEBAL.
-
-    SIDE    (input) CHARACTER*1
-            = 'R':  V contains right eigenvectors;
-            = 'L':  V contains left eigenvectors.
-
-    N       (input) INTEGER
-            The number of rows of the matrix V.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            The integers ILO and IHI determined by DGEBAL.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
-
-    SCALE   (input) DOUBLE PRECISION array, dimension (N)
-            Details of the permutation and scaling factors, as returned
-            by DGEBAL.
-
-    M       (input) INTEGER
-            The number of columns of the matrix V.  M >= 0.
-
-    V       (input/output) DOUBLE PRECISION array, dimension (LDV,M)
-            On entry, the matrix of right or left eigenvectors to be
-            transformed, as returned by DHSEIN or DTREVC.
-            On exit, V is overwritten by the transformed eigenvectors.
-
-    LDV     (input) INTEGER
-            The leading dimension of the array V. LDV >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    =====================================================================
-
-
-       Decode and Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    --scale;
-    v_dim1 = *ldv;
-    v_offset = 1 + v_dim1;
-    v -= v_offset;
-
-    /* Function Body */
-    rightv = lsame_(side, "R");
-    leftv = lsame_(side, "L");
-
-    *info = 0;
-    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
-	    && ! lsame_(job, "B")) {
-	*info = -1;
-    } else if (! rightv && ! leftv) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
-	*info = -4;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
-	*info = -5;
-    } else if (*m < 0) {
-	*info = -7;
-    } else if (*ldv < max(1,*n)) {
-	*info = -9;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGEBAK", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-    if (*m == 0) {
-	return 0;
-    }
-    if (lsame_(job, "N")) {
-	return 0;
-    }
-
-    if (*ilo == *ihi) {
-	goto L30;
-    }
-
-/*     Backward balance */
-
-    if ((lsame_(job, "S")) || (lsame_(job, "B"))) {
-
-	if (rightv) {
-	    i__1 = *ihi;
-	    for (i__ = *ilo; i__ <= i__1; ++i__) {
-		s = scale[i__];
-		dscal_(m, &s, &v[i__ + v_dim1], ldv);
-/* L10: */
-	    }
-	}
-
-	if (leftv) {
-	    i__1 = *ihi;
-	    for (i__ = *ilo; i__ <= i__1; ++i__) {
-		s = 1. / scale[i__];
-		dscal_(m, &s, &v[i__ + v_dim1], ldv);
-/* L20: */
-	    }
-	}
-
-    }
-
-/*
-       Backward permutation
-
-       For  I = ILO-1 step -1 until 1,
-                IHI+1 step 1 until N do --
-*/
-
-L30:
-    if ((lsame_(job, "P")) || (lsame_(job, "B"))) {
-	if (rightv) {
-	    i__1 = *n;
-	    for (ii = 1; ii <= i__1; ++ii) {
-		i__ = ii;
-		if (i__ >= *ilo && i__ <= *ihi) {
-		    goto L40;
-		}
-		if (i__ < *ilo) {
-		    i__ = *ilo - ii;
-		}
-		k = (integer) scale[i__];
-		if (k == i__) {
-		    goto L40;
-		}
-		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
-L40:
-		;
-	    }
-	}
-
-	if (leftv) {
-	    i__1 = *n;
-	    for (ii = 1; ii <= i__1; ++ii) {
-		i__ = ii;
-		if (i__ >= *ilo && i__ <= *ihi) {
-		    goto L50;
-		}
-		if (i__ < *ilo) {
-		    i__ = *ilo - ii;
-		}
-		k = (integer) scale[i__];
-		if (k == i__) {
-		    goto L50;
-		}
-		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
-L50:
-		;
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DGEBAK */
-
-} /* dgebak_ */
-
-/* Subroutine */ int dgebal_(char *job, integer *n, doublereal *a, integer *
-	lda, integer *ilo, integer *ihi, doublereal *scale, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    doublereal d__1, d__2;
-
-    /* Local variables */
-    static doublereal c__, f, g;
-    static integer i__, j, k, l, m;
-    static doublereal r__, s, ca, ra;
-    static integer ica, ira, iexc;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static doublereal sfmin1, sfmin2, sfmax1, sfmax2;
-
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical noconv;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DGEBAL balances a general real matrix A.  This involves, first,
-    permuting A by a similarity transformation to isolate eigenvalues
-    in the first 1 to ILO-1 and last IHI+1 to N elements on the
-    diagonal; and second, applying a diagonal similarity transformation
-    to rows and columns ILO to IHI to make the rows and columns as
-    close in norm as possible.  Both steps are optional.
-
-    Balancing may reduce the 1-norm of the matrix, and improve the
-    accuracy of the computed eigenvalues and/or eigenvectors.
-
-    Arguments
-    =========
-
-    JOB     (input) CHARACTER*1
-            Specifies the operations to be performed on A:
-            = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
-                    for i = 1,...,N;
-            = 'P':  permute only;
-            = 'S':  scale only;
-            = 'B':  both permute and scale.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the input matrix A.
-            On exit,  A is overwritten by the balanced matrix.
-            If JOB = 'N', A is not referenced.
-            See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    ILO     (output) INTEGER
-    IHI     (output) INTEGER
-            ILO and IHI are set to integers such that on exit
-            A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
-            If JOB = 'N' or 'S', ILO = 1 and IHI = N.
-
-    SCALE   (output) DOUBLE PRECISION array, dimension (N)
-            Details of the permutations and scaling factors applied to
-            A.  If P(j) is the index of the row and column interchanged
-            with row and column j and D(j) is the scaling factor
-            applied to row and column j, then
-            SCALE(j) = P(j)    for j = 1,...,ILO-1
-                     = D(j)    for j = ILO,...,IHI
-                     = P(j)    for j = IHI+1,...,N.
-            The order in which the interchanges are made is N to IHI+1,
-            then 1 to ILO-1.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    The permutations consist of row and column interchanges which put
-    the matrix in the form
-
-               ( T1   X   Y  )
-       P A P = (  0   B   Z  )
-               (  0   0   T2 )
-
-    where T1 and T2 are upper triangular matrices whose eigenvalues lie
-    along the diagonal.  The column indices ILO and IHI mark the starting
-    and ending columns of the submatrix B. Balancing consists of applying
-    a diagonal similarity transformation inv(D) * B * D to make the
-    1-norms of each row of B and its corresponding column nearly equal.
-    The output matrix is
-
-       ( T1     X*D          Y    )
-       (  0  inv(D)*B*D  inv(D)*Z ).
-       (  0      0           T2   )
-
-    Information about the permutations P and the diagonal matrix D is
-    returned in the vector SCALE.
-
-    This subroutine is based on the EISPACK routine BALANC.
-
-    Modified by Tzu-Yi Chen, Computer Science Division, University of
-      California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --scale;
-
-    /* Function Body */
-    *info = 0;
-    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
-	    && ! lsame_(job, "B")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGEBAL", &i__1);
-	return 0;
-    }
-
-    k = 1;
-    l = *n;
-
-    if (*n == 0) {
-	goto L210;
-    }
-
-    if (lsame_(job, "N")) {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    scale[i__] = 1.;
-/* L10: */
-	}
-	goto L210;
-    }
-
-    if (lsame_(job, "S")) {
-	goto L120;
-    }
-
-/*     Permutation to isolate eigenvalues if possible */
-
-    goto L50;
-
-/*     Row and column exchange. */
-
-L20:
-    scale[m] = (doublereal) j;
-    if (j == m) {
-	goto L30;
-    }
-
-    dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
-    i__1 = *n - k + 1;
-    dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
-
-L30:
-    switch (iexc) {
-	case 1:  goto L40;
-	case 2:  goto L80;
-    }
-
-/*     Search for rows isolating an eigenvalue and push them down. */
-
-L40:
-    if (l == 1) {
-	goto L210;
-    }
-    --l;
-
-L50:
-    for (j = l; j >= 1; --j) {
-
-	i__1 = l;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (i__ == j) {
-		goto L60;
-	    }
-	    if (a[j + i__ * a_dim1] != 0.) {
-		goto L70;
-	    }
-L60:
-	    ;
-	}
-
-	m = l;
-	iexc = 1;
-	goto L20;
-L70:
-	;
-    }
-
-    goto L90;
-
-/*     Search for columns isolating an eigenvalue and push them left. */
-
-L80:
-    ++k;
-
-L90:
-    i__1 = l;
-    for (j = k; j <= i__1; ++j) {
-
-	i__2 = l;
-	for (i__ = k; i__ <= i__2; ++i__) {
-	    if (i__ == j) {
-		goto L100;
-	    }
-	    if (a[i__ + j * a_dim1] != 0.) {
-		goto L110;
-	    }
-L100:
-	    ;
-	}
-
-	m = k;
-	iexc = 2;
-	goto L20;
-L110:
-	;
-    }
-
-L120:
-    i__1 = l;
-    for (i__ = k; i__ <= i__1; ++i__) {
-	scale[i__] = 1.;
-/* L130: */
-    }
-
-    if (lsame_(job, "P")) {
-	goto L210;
-    }
-
-/*
-       Balance the submatrix in rows K to L.
-
-       Iterative loop for norm reduction
-*/
-
-    sfmin1 = SAFEMINIMUM / PRECISION;
-    sfmax1 = 1. / sfmin1;
-    sfmin2 = sfmin1 * 8.;
-    sfmax2 = 1. / sfmin2;
-L140:
-    noconv = FALSE_;
-
-    i__1 = l;
-    for (i__ = k; i__ <= i__1; ++i__) {
-	c__ = 0.;
-	r__ = 0.;
-
-	i__2 = l;
-	for (j = k; j <= i__2; ++j) {
-	    if (j == i__) {
-		goto L150;
-	    }
-	    c__ += (d__1 = a[j + i__ * a_dim1], abs(d__1));
-	    r__ += (d__1 = a[i__ + j * a_dim1], abs(d__1));
-L150:
-	    ;
-	}
-	ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
-	ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1));
-	i__2 = *n - k + 1;
-	ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda);
-	ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1));
-
-/*        Guard against zero C or R due to underflow. */
-
-	if ((c__ == 0.) || (r__ == 0.)) {
-	    goto L200;
-	}
-	g = r__ / 8.;
-	f = 1.;
-	s = c__ + r__;
-L160:
-/* Computing MAX */
-	d__1 = max(f,c__);
-/* Computing MIN */
-	d__2 = min(r__,g);
-	if (((c__ >= g) || (max(d__1,ca) >= sfmax2)) || (min(d__2,ra) <=
-		sfmin2)) {
-	    goto L170;
-	}
-	f *= 8.;
-	c__ *= 8.;
-	ca *= 8.;
-	r__ /= 8.;
-	g /= 8.;
-	ra /= 8.;
-	goto L160;
-
-L170:
-	g = c__ / 8.;
-L180:
-/* Computing MIN */
-	d__1 = min(f,c__), d__1 = min(d__1,g);
-	if (((g < r__) || (max(r__,ra) >= sfmax2)) || (min(d__1,ca) <= sfmin2)
-		) {
-	    goto L190;
-	}
-	f /= 8.;
-	c__ /= 8.;
-	g /= 8.;
-	ca /= 8.;
-	r__ *= 8.;
-	ra *= 8.;
-	goto L180;
-
-/*        Now balance. */
-
-L190:
-	if (c__ + r__ >= s * .95) {
-	    goto L200;
-	}
-	if (f < 1. && scale[i__] < 1.) {
-	    if (f * scale[i__] <= sfmin1) {
-		goto L200;
-	    }
-	}
-	if (f > 1. && scale[i__] > 1.) {
-	    if (scale[i__] >= sfmax1 / f) {
-		goto L200;
-	    }
-	}
-	g = 1. / f;
-	scale[i__] *= f;
-	noconv = TRUE_;
-
-	i__2 = *n - k + 1;
-	dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
-	dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
-
-L200:
-	;
-    }
-
-    if (noconv) {
-	goto L140;
-    }
-
-L210:
-    *ilo = k;
-    *ihi = l;
-
-    return 0;
-
-/*     End of DGEBAL */
-
-} /* dgebal_ */
-
-/* Subroutine */ int dgebd2_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
-	taup, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *), dlarfg_(integer *, doublereal *,
-	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DGEBD2 reduces a real general m by n matrix A to upper or lower
-    bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
-
-    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows in the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns in the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the m by n general matrix to be reduced.
-            On exit,
-            if m >= n, the diagonal and the first superdiagonal are
-              overwritten with the upper bidiagonal matrix B; the
-              elements below the diagonal, with the array TAUQ, represent
-              the orthogonal matrix Q as a product of elementary
-              reflectors, and the elements above the first superdiagonal,
-              with the array TAUP, represent the orthogonal matrix P as
-              a product of elementary reflectors;
-            if m < n, the diagonal and the first subdiagonal are
-              overwritten with the lower bidiagonal matrix B; the
-              elements below the first subdiagonal, with the array TAUQ,
-              represent the orthogonal matrix Q as a product of
-              elementary reflectors, and the elements above the diagonal,
-              with the array TAUP, represent the orthogonal matrix P as
-              a product of elementary reflectors.
-            See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    D       (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The diagonal elements of the bidiagonal matrix B:
-            D(i) = A(i,i).
-
-    E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
-            The off-diagonal elements of the bidiagonal matrix B:
-            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
-            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
-
-    TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
-            The scalar factors of the elementary reflectors which
-            represent the orthogonal matrix Q. See Further Details.
-
-    TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors which
-            represent the orthogonal matrix P. See Further Details.
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (max(M,N))
-
-    INFO    (output) INTEGER
-            = 0: successful exit.
-            < 0: if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    The matrices Q and P are represented as products of elementary
-    reflectors:
-
-    If m >= n,
-
-       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
-
-    Each H(i) and G(i) has the form:
-
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
-
-    where tauq and taup are real scalars, and v and u are real vectors;
-    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
-    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
-    tauq is stored in TAUQ(i) and taup in TAUP(i).
-
-    If m < n,
-
-       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
-
-    Each H(i) and G(i) has the form:
-
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
-
-    where tauq and taup are real scalars, and v and u are real vectors;
-    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
-    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
-    tauq is stored in TAUQ(i) and taup in TAUP(i).
-
-    The contents of A on exit are illustrated by the following examples:
-
-    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
-
-      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
-      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
-      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
-      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
-      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
-      (  v1  v2  v3  v4  v5 )
-
-    where d and e denote diagonal and off-diagonal elements of B, vi
-    denotes an element of the vector defining H(i), and ui an element of
-    the vector defining G(i).
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --d__;
-    --e;
-    --tauq;
-    --taup;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info < 0) {
-	i__1 = -(*info);
-	xerbla_("DGEBD2", &i__1);
-	return 0;
-    }
-
-    if (*m >= *n) {
-
-/*        Reduce to upper bidiagonal form */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
-
-	    i__2 = *m - i__ + 1;
-/* Computing MIN */
-	    i__3 = i__ + 1;
-	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
-		    a_dim1], &c__1, &tauq[i__]);
-	    d__[i__] = a[i__ + i__ * a_dim1];
-	    a[i__ + i__ * a_dim1] = 1.;
-
-/*           Apply H(i) to A(i:m,i+1:n) from the left */
-
-	    i__2 = *m - i__ + 1;
-	    i__3 = *n - i__;
-	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[
-		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
-	    a[i__ + i__ * a_dim1] = d__[i__];
-
-	    if (i__ < *n) {
-
-/*
-                Generate elementary reflector G(i) to annihilate
-                A(i,i+2:n)
-*/
-
-		i__2 = *n - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
-			i__3,*n) * a_dim1], lda, &taup[i__]);
-		e[i__] = a[i__ + (i__ + 1) * a_dim1];
-		a[i__ + (i__ + 1) * a_dim1] = 1.;
-
-/*              Apply G(i) to A(i+1:m,i+1:n) from the right */
-
-		i__2 = *m - i__;
-		i__3 = *n - i__;
-		dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1],
-			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
-			lda, &work[1]);
-		a[i__ + (i__ + 1) * a_dim1] = e[i__];
-	    } else {
-		taup[i__] = 0.;
-	    }
-/* L10: */
-	}
-    } else {
-
-/*        Reduce to lower bidiagonal form */
-
-	i__1 = *m;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
-
-	    i__2 = *n - i__ + 1;
-/* Computing MIN */
-	    i__3 = i__ + 1;
-	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
-		    a_dim1], lda, &taup[i__]);
-	    d__[i__] = a[i__ + i__ * a_dim1];
-	    a[i__ + i__ * a_dim1] = 1.;
-
-/*           Apply G(i) to A(i+1:m,i:n) from the right */
-
-	    i__2 = *m - i__;
-	    i__3 = *n - i__ + 1;
-/* Computing MIN */
-	    i__4 = i__ + 1;
-	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
-		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
-	    a[i__ + i__ * a_dim1] = d__[i__];
-
-	    if (i__ < *m) {
-
-/*
-                Generate elementary reflector H(i) to annihilate
-                A(i+2:m,i)
-*/
-
-		i__2 = *m - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
-			i__ * a_dim1], &c__1, &tauq[i__]);
-		e[i__] = a[i__ + 1 + i__ * a_dim1];
-		a[i__ + 1 + i__ * a_dim1] = 1.;
-
-/*              Apply H(i) to A(i+1:m,i+1:n) from the left */
-
-		i__2 = *m - i__;
-		i__3 = *n - i__;
-		dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
-			c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
-			lda, &work[1]);
-		a[i__ + 1 + i__ * a_dim1] = e[i__];
-	    } else {
-		tauq[i__] = 0.;
-	    }
-/* L20: */
-	}
-    }
-    return 0;
-
-/*     End of DGEBD2 */
-
-} /* dgebd2_ */
-
-/* Subroutine */ int dgebrd_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
-	taup, doublereal *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j, nb, nx;
-    static doublereal ws;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer nbmin, iinfo, minmn;
-    extern /* Subroutine */ int dgebd2_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *), dlabrd_(integer *, integer *, integer *
-	    , doublereal *, integer *, doublereal *, doublereal *, doublereal
-	    *, doublereal *, doublereal *, integer *, doublereal *, integer *)
-	    , xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwrkx, ldwrky, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DGEBRD reduces a general real M-by-N matrix A to upper or lower
-    bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
-
-    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows in the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns in the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the M-by-N general matrix to be reduced.
-            On exit,
-            if m >= n, the diagonal and the first superdiagonal are
-              overwritten with the upper bidiagonal matrix B; the
-              elements below the diagonal, with the array TAUQ, represent
-              the orthogonal matrix Q as a product of elementary
-              reflectors, and the elements above the first superdiagonal,
-              with the array TAUP, represent the orthogonal matrix P as
-              a product of elementary reflectors;
-            if m < n, the diagonal and the first subdiagonal are
-              overwritten with the lower bidiagonal matrix B; the
-              elements below the first subdiagonal, with the array TAUQ,
-              represent the orthogonal matrix Q as a product of
-              elementary reflectors, and the elements above the diagonal,
-              with the array TAUP, represent the orthogonal matrix P as
-              a product of elementary reflectors.
-            See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    D       (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The diagonal elements of the bidiagonal matrix B:
-            D(i) = A(i,i).
-
-    E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
-            The off-diagonal elements of the bidiagonal matrix B:
-            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
-            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
-
-    TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
-            The scalar factors of the elementary reflectors which
-            represent the orthogonal matrix Q. See Further Details.
-
-    TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors which
-            represent the orthogonal matrix P. See Further Details.
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The length of the array WORK.  LWORK >= max(1,M,N).
-            For optimum performance LWORK >= (M+N)*NB, where NB
-            is the optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    The matrices Q and P are represented as products of elementary
-    reflectors:
-
-    If m >= n,
-
-       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
-
-    Each H(i) and G(i) has the form:
-
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
-
-    where tauq and taup are real scalars, and v and u are real vectors;
-    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
-    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
-    tauq is stored in TAUQ(i) and taup in TAUP(i).
-
-    If m < n,
-
-       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
-
-    Each H(i) and G(i) has the form:
-
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
-
-    where tauq and taup are real scalars, and v and u are real vectors;
-    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
-    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
-    tauq is stored in TAUQ(i) and taup in TAUP(i).
-
-    The contents of A on exit are illustrated by the following examples:
-
-    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
-
-      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
-      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
-      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
-      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
-      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
-      (  v1  v2  v3  v4  v5 )
-
-    where d and e denote diagonal and off-diagonal elements of B, vi
-    denotes an element of the vector defining H(i), and ui an element of
-    the vector defining G(i).
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --d__;
-    --e;
-    --tauq;
-    --taup;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-/* Computing MAX */
-    i__1 = 1, i__2 = ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
-	    ftnlen)6, (ftnlen)1);
-    nb = max(i__1,i__2);
-    lwkopt = (*m + *n) * nb;
-    work[1] = (doublereal) lwkopt;
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    } else /* if(complicated condition) */ {
-/* Computing MAX */
-	i__1 = max(1,*m);
-	if (*lwork < max(i__1,*n) && ! lquery) {
-	    *info = -10;
-	}
-    }
-    if (*info < 0) {
-	i__1 = -(*info);
-	xerbla_("DGEBRD", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    minmn = min(*m,*n);
-    if (minmn == 0) {
-	work[1] = 1.;
-	return 0;
-    }
-
-    ws = (doublereal) max(*m,*n);
-    ldwrkx = *m;
-    ldwrky = *n;
-
-    if (nb > 1 && nb < minmn) {
-
-/*
-          Set the crossover point NX.
-
-   Computing MAX
-*/
-	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-
-/*        Determine when to switch from blocked to unblocked code. */
-
-	if (nx < minmn) {
-	    ws = (doublereal) ((*m + *n) * nb);
-	    if ((doublereal) (*lwork) < ws) {
-
-/*
-                Not enough work space for the optimal NB, consider using
-                a smaller block size.
-*/
-
-		nbmin = ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
-			ftnlen)6, (ftnlen)1);
-		if (*lwork >= (*m + *n) * nbmin) {
-		    nb = *lwork / (*m + *n);
-		} else {
-		    nb = 1;
-		    nx = minmn;
-		}
-	    }
-	}
-    } else {
-	nx = minmn;
-    }
-
-    i__1 = minmn - nx;
-    i__2 = nb;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-
-/*
-          Reduce rows and columns i:i+nb-1 to bidiagonal form and return
-          the matrices X and Y which are needed to update the unreduced
-          part of the matrix
-*/
-
-	i__3 = *m - i__ + 1;
-	i__4 = *n - i__ + 1;
-	dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
-		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx
-		* nb + 1], &ldwrky);
-
-/*
-          Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
-          of the form  A := A - V*Y' - X*U'
-*/
-
-	i__3 = *m - i__ - nb + 1;
-	i__4 = *n - i__ - nb + 1;
-	dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b3001, &a[
-		i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], &
-		ldwrky, &c_b2865, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
-	i__3 = *m - i__ - nb + 1;
-	i__4 = *n - i__ - nb + 1;
-	dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b3001, &
-		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
-		c_b2865, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
-
-/*        Copy diagonal and off-diagonal elements of B back into A */
-
-	if (*m >= *n) {
-	    i__3 = i__ + nb - 1;
-	    for (j = i__; j <= i__3; ++j) {
-		a[j + j * a_dim1] = d__[j];
-		a[j + (j + 1) * a_dim1] = e[j];
-/* L10: */
-	    }
-	} else {
-	    i__3 = i__ + nb - 1;
-	    for (j = i__; j <= i__3; ++j) {
-		a[j + j * a_dim1] = d__[j];
-		a[j + 1 + j * a_dim1] = e[j];
-/* L20: */
-	    }
-	}
-/* L30: */
-    }
-
-/*     Use unblocked code to reduce the remainder of the matrix */
-
-    i__2 = *m - i__ + 1;
-    i__1 = *n - i__ + 1;
-    dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
-	    tauq[i__], &taup[i__], &work[1], &iinfo);
-    work[1] = ws;
-    return 0;
-
-/*     End of DGEBRD */
-
-} /* dgebrd_ */
-
-/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal *
-	a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl,
-	integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work,
-	integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
-	    i__2, i__3, i__4;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, k;
-    static doublereal r__, cs, sn;
-    static integer ihi;
-    static doublereal scl;
-    static integer ilo;
-    static doublereal dum[1], eps;
-    static integer ibal;
-    static char side[1];
-    static integer maxb;
-    static doublereal anrm;
-    static integer ierr, itau;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static integer iwrk, nout;
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern doublereal dlapy2_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_(
-	    char *, char *, integer *, integer *, integer *, doublereal *,
-	    integer *, doublereal *, integer *, integer *),
-	    dgebal_(char *, integer *, doublereal *, integer *, integer *,
-	    integer *, doublereal *, integer *);
-    static logical scalea;
-
-    static doublereal cscale;
-    extern doublereal dlange_(char *, integer *, integer *, doublereal *,
-	    integer *, doublereal *);
-    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    integer *), dlascl_(char *, integer *, integer *, doublereal *,
-	    doublereal *, integer *, integer *, doublereal *, integer *,
-	    integer *);
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *),
-	    dlartg_(doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *), xerbla_(char *, integer *);
-    static logical select[1];
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static doublereal bignum;
-    extern /* Subroutine */ int dorghr_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    integer *), dhseqr_(char *, char *, integer *, integer *, integer
-	    *, doublereal *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, integer *), dtrevc_(char *, char *, logical *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, integer *, integer *, doublereal *, integer *);
-    static integer minwrk, maxwrk;
-    static logical wantvl;
-    static doublereal smlnum;
-    static integer hswork;
-    static logical lquery, wantvr;
-
-
-/*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       December 8, 1999
-
-
-    Purpose
-    =======
-
-    DGEEV computes for an N-by-N real nonsymmetric matrix A, the
-    eigenvalues and, optionally, the left and/or right eigenvectors.
-
-    The right eigenvector v(j) of A satisfies
-                     A * v(j) = lambda(j) * v(j)
-    where lambda(j) is its eigenvalue.
-    The left eigenvector u(j) of A satisfies
-                  u(j)**H * A = lambda(j) * u(j)**H
-    where u(j)**H denotes the conjugate transpose of u(j).
-
-    The computed eigenvectors are normalized to have Euclidean norm
-    equal to 1 and largest component real.
-
-    Arguments
-    =========
-
-    JOBVL   (input) CHARACTER*1
-            = 'N': left eigenvectors of A are not computed;
-            = 'V': left eigenvectors of A are computed.
-
-    JOBVR   (input) CHARACTER*1
-            = 'N': right eigenvectors of A are not computed;
-            = 'V': right eigenvectors of A are computed.
-
-    N       (input) INTEGER
-            The order of the matrix A. N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the N-by-N matrix A.
-            On exit, A has been overwritten.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    WR      (output) DOUBLE PRECISION array, dimension (N)
-    WI      (output) DOUBLE PRECISION array, dimension (N)
-            WR and WI contain the real and imaginary parts,
-            respectively, of the computed eigenvalues.  Complex
-            conjugate pairs of eigenvalues appear consecutively
-            with the eigenvalue having the positive imaginary part
-            first.
-
-    VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
-            If JOBVL = 'V', the left eigenvectors u(j) are stored one
-            after another in the columns of VL, in the same order
-            as their eigenvalues.
-            If JOBVL = 'N', VL is not referenced.
-            If the j-th eigenvalue is real, then u(j) = VL(:,j),
-            the j-th column of VL.
-            If the j-th and (j+1)-st eigenvalues form a complex
-            conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
-            u(j+1) = VL(:,j) - i*VL(:,j+1).
-
-    LDVL    (input) INTEGER
-            The leading dimension of the array VL.  LDVL >= 1; if
-            JOBVL = 'V', LDVL >= N.
-
-    VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
-            If JOBVR = 'V', the right eigenvectors v(j) are stored one
-            after another in the columns of VR, in the same order
-            as their eigenvalues.
-            If JOBVR = 'N', VR is not referenced.
-            If the j-th eigenvalue is real, then v(j) = VR(:,j),
-            the j-th column of VR.
-            If the j-th and (j+1)-st eigenvalues form a complex
-            conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
-            v(j+1) = VR(:,j) - i*VR(:,j+1).
-
-    LDVR    (input) INTEGER
-            The leading dimension of the array VR.  LDVR >= 1; if
-            JOBVR = 'V', LDVR >= N.
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,3*N), and
-            if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N.  For good
-            performance, LWORK must generally be larger.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = i, the QR algorithm failed to compute all the
-                  eigenvalues, and no eigenvectors have been computed;
-                  elements i+1:N of WR and WI contain eigenvalues which
-                  have converged.
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --wr;
-    --wi;
-    vl_dim1 = *ldvl;
-    vl_offset = 1 + vl_dim1;
-    vl -= vl_offset;
-    vr_dim1 = *ldvr;
-    vr_offset = 1 + vr_dim1;
-    vr -= vr_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    lquery = *lwork == -1;
-    wantvl = lsame_(jobvl, "V");
-    wantvr = lsame_(jobvr, "V");
-    if (! wantvl && ! lsame_(jobvl, "N")) {
-	*info = -1;
-    } else if (! wantvr && ! lsame_(jobvr, "N")) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if ((*ldvl < 1) || (wantvl && *ldvl < *n)) {
-	*info = -9;
-    } else if ((*ldvr < 1) || (wantvr && *ldvr < *n)) {
-	*info = -11;
-    }
-
-/*
-       Compute workspace
-        (Note: Comments in the code beginning "Workspace:" describe the
-         minimal amount of workspace needed at that point in the code,
-         as well as the preferred amount for good performance.
-         NB refers to the optimal block size for the immediately
-         following subroutine, as returned by ILAENV.
-         HSWORK refers to the workspace preferred by DHSEQR, as
-         calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
-         the worst case.)
-*/
-
-    minwrk = 1;
-    if (*info == 0 && ((*lwork >= 1) || (lquery))) {
-	maxwrk = ((*n) << (1)) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1,
-		n, &c__0, (ftnlen)6, (ftnlen)1);
-	if (! wantvl && ! wantvr) {
-/* Computing MAX */
-	    i__1 = 1, i__2 = *n * 3;
-	    minwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "DHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "EN", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
-/* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = (*n) << (1);
-	    hswork = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
-		    hswork;
-	    maxwrk = max(i__1,i__2);
-	} else {
-/* Computing MAX */
-	    i__1 = 1, i__2 = (*n) << (2);
-	    minwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = ((*n) << (1)) + (*n - 1) * ilaenv_(&c__1,
-		    "DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "DHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "SV", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
-/* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = (*n) << (1);
-	    hswork = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
-		    hswork;
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = (*n) << (2);
-	    maxwrk = max(i__1,i__2);
-	}
-	work[1] = (doublereal) maxwrk;
-    }
-    if (*lwork < minwrk && ! lquery) {
-	*info = -13;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGEEV ", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Get machine constants */
-
-    eps = PRECISION;
-    smlnum = SAFEMINIMUM;
-    bignum = 1. / smlnum;
-    dlabad_(&smlnum, &bignum);
-    smlnum = sqrt(smlnum) / eps;
-    bignum = 1. / smlnum;
-
-/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
-
-    anrm = dlange_("M", n, n, &a[a_offset], lda, dum);
-    scalea = FALSE_;
-    if (anrm > 0. && anrm < smlnum) {
-	scalea = TRUE_;
-	cscale = smlnum;
-    } else if (anrm > bignum) {
-	scalea = TRUE_;
-	cscale = bignum;
-    }
-    if (scalea) {
-	dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
-		ierr);
-    }
-
-/*
-       Balance the matrix
-       (Workspace: need N)
-*/
-
-    ibal = 1;
-    dgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
-
-/*
-       Reduce to upper Hessenberg form
-       (Workspace: need 3*N, prefer 2*N+N*NB)
-*/
-
-    itau = ibal + *n;
-    iwrk = itau + *n;
-    i__1 = *lwork - iwrk + 1;
-    dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
-	     &ierr);
-
-    if (wantvl) {
-
-/*
-          Want left eigenvectors
-          Copy Householder vectors to VL
-*/
-
-	*(unsigned char *)side = 'L';
-	dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
-		;
-
-/*
-          Generate orthogonal matrix in VL
-          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
-*/
-
-	i__1 = *lwork - iwrk + 1;
-	dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
-		 &i__1, &ierr);
-
-/*
-          Perform QR iteration, accumulating Schur vectors in VL
-          (Workspace: need N+1, prefer N+HSWORK (see comments) )
-*/
-
-	iwrk = itau;
-	i__1 = *lwork - iwrk + 1;
-	dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
-		vl[vl_offset], ldvl, &work[iwrk], &i__1, info);
-
-	if (wantvr) {
-
-/*
-             Want left and right eigenvectors
-             Copy Schur vectors to VR
-*/
-
-	    *(unsigned char *)side = 'B';
-	    dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
-	}
-
-    } else if (wantvr) {
-
-/*
-          Want right eigenvectors
-          Copy Householder vectors to VR
-*/
-
-	*(unsigned char *)side = 'R';
-	dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
-		;
-
-/*
-          Generate orthogonal matrix in VR
-          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
-*/
-
-	i__1 = *lwork - iwrk + 1;
-	dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
-		 &i__1, &ierr);
-
-/*
-          Perform QR iteration, accumulating Schur vectors in VR
-          (Workspace: need N+1, prefer N+HSWORK (see comments) )
-*/
-
-	iwrk = itau;
-	i__1 = *lwork - iwrk + 1;
-	dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
-		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
-
-    } else {
-
-/*
-          Compute eigenvalues only
-          (Workspace: need N+1, prefer N+HSWORK (see comments) )
-*/
-
-	iwrk = itau;
-	i__1 = *lwork - iwrk + 1;
-	dhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
-		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
-    }
-
-/*     If INFO > 0 from DHSEQR, then quit */
-
-    if (*info > 0) {
-	goto L50;
-    }
-
-    if ((wantvl) || (wantvr)) {
-
-/*
-          Compute left and/or right eigenvectors
-          (Workspace: need 4*N)
-*/
-
-	dtrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl,
-		 &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr);
-    }
-
-    if (wantvl) {
-
-/*
-          Undo balancing of left eigenvectors
-          (Workspace: need N)
-*/
-
-	dgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl,
-		 &ierr);
-
-/*        Normalize left eigenvectors and make largest component real */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (wi[i__] == 0.) {
-		scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
-		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
-	    } else if (wi[i__] > 0.) {
-		d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
-		d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
-		scl = 1. / dlapy2_(&d__1, &d__2);
-		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
-		dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
-		i__2 = *n;
-		for (k = 1; k <= i__2; ++k) {
-/* Computing 2nd power */
-		    d__1 = vl[k + i__ * vl_dim1];
-/* Computing 2nd power */
-		    d__2 = vl[k + (i__ + 1) * vl_dim1];
-		    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
-/* L10: */
-		}
-		k = idamax_(n, &work[iwrk], &c__1);
-		dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1],
-			&cs, &sn, &r__);
-		drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) *
-			vl_dim1 + 1], &c__1, &cs, &sn);
-		vl[k + (i__ + 1) * vl_dim1] = 0.;
-	    }
-/* L20: */
-	}
-    }
-
-    if (wantvr) {
-
-/*
-          Undo balancing of right eigenvectors
-          (Workspace: need N)
-*/
-
-	dgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr,
-		 &ierr);
-
-/*        Normalize right eigenvectors and make largest component real */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (wi[i__] == 0.) {
-		scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
-		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
-	    } else if (wi[i__] > 0.) {
-		d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
-		d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
-		scl = 1. / dlapy2_(&d__1, &d__2);
-		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
-		dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
-		i__2 = *n;
-		for (k = 1; k <= i__2; ++k) {
-/* Computing 2nd power */
-		    d__1 = vr[k + i__ * vr_dim1];
-/* Computing 2nd power */
-		    d__2 = vr[k + (i__ + 1) * vr_dim1];
-		    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
-/* L30: */
-		}
-		k = idamax_(n, &work[iwrk], &c__1);
-		dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1],
-			&cs, &sn, &r__);
-		drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) *
-			vr_dim1 + 1], &c__1, &cs, &sn);
-		vr[k + (i__ + 1) * vr_dim1] = 0.;
-	    }
-/* L40: */
-	}
-    }
-
-/*     Undo scaling if necessary */
-
-L50:
-    if (scalea) {
-	i__1 = *n - *info;
-/* Computing MAX */
-	i__3 = *n - *info;
-	i__2 = max(i__3,1);
-	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info +
-		1], &i__2, &ierr);
-	i__1 = *n - *info;
-/* Computing MAX */
-	i__3 = *n - *info;
-	i__2 = max(i__3,1);
-	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info +
-		1], &i__2, &ierr);
-	if (*info > 0) {
-	    i__1 = ilo - 1;
-	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1],
-		    n, &ierr);
-	    i__1 = ilo - 1;
-	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1],
-		    n, &ierr);
-	}
-    }
-
-    work[1] = (doublereal) maxwrk;
-    return 0;
-
-/*     End of DGEEV */
-
-} /* dgeev_ */
-
-/* Subroutine */ int dgehd2_(integer *n, integer *ilo, integer *ihi,
-	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
-	integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__;
-    static doublereal aii;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *), dlarfg_(integer *, doublereal *,
-	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DGEHD2 reduces a real general matrix A to upper Hessenberg form H by
-    an orthogonal similarity transformation:  Q' * A * Q = H .
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that A is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to DGEBAL; otherwise they should be
-            set to 1 and N respectively. See Further Details.
-            1 <= ILO <= IHI <= max(1,N).
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the n by n general matrix to be reduced.
-            On exit, the upper triangle and the first subdiagonal of A
-            are overwritten with the upper Hessenberg matrix H, and the
-            elements below the first subdiagonal, with the array TAU,
-            represent the orthogonal matrix Q as a product of elementary
-            reflectors. See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
-
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of (ihi-ilo) elementary
-    reflectors
-
-       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
-    exit in A(i+2:ihi,i), and tau in TAU(i).
-
-    The contents of A are illustrated by the following example, with
-    n = 7, ilo = 2 and ihi = 6:
-
-    on entry,                        on exit,
-
-    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
-    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
-    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
-    (                         a )    (                          a )
-
-    where a denotes an element of the original matrix A, h denotes a
-    modified element of the upper Hessenberg matrix H, and vi denotes an
-    element of the vector defining H(i).
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    if (*n < 0) {
-	*info = -1;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
-	*info = -2;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGEHD2", &i__1);
-	return 0;
-    }
-
-    i__1 = *ihi - 1;
-    for (i__ = *ilo; i__ <= i__1; ++i__) {
-
-/*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
-
-	i__2 = *ihi - i__;
-/* Computing MIN */
-	i__3 = i__ + 2;
-	dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
-		a_dim1], &c__1, &tau[i__]);
-	aii = a[i__ + 1 + i__ * a_dim1];
-	a[i__ + 1 + i__ * a_dim1] = 1.;
-
-/*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
-
-	i__2 = *ihi - i__;
-	dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
-		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
-
-/*        Apply H(i) to A(i+1:ihi,i+1:n) from the left */
-
-	i__2 = *ihi - i__;
-	i__3 = *n - i__;
-	dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
-		i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
-
-	a[i__ + 1 + i__ * a_dim1] = aii;
-/* L10: */
-    }
-
-    return 0;
-
-/*     End of DGEHD2 */
-
-} /* dgehd2_ */
-
-/* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi,
-	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
-	integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__;
-    static doublereal t[4160]	/* was [65][64] */;
-    static integer ib;
-    static doublereal ei;
-    static integer nb, nh, nx, iws;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer nbmin, iinfo;
-    extern /* Subroutine */ int dgehd2_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *),
-	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, integer *), dlahrd_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DGEHRD reduces a real general matrix A to upper Hessenberg form H by
-    an orthogonal similarity transformation:  Q' * A * Q = H .
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that A is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to DGEBAL; otherwise they should be
-            set to 1 and N respectively. See Further Details.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the N-by-N general matrix to be reduced.
-            On exit, the upper triangle and the first subdiagonal of A
-            are overwritten with the upper Hessenberg matrix H, and the
-            elements below the first subdiagonal, with the array TAU,
-            represent the orthogonal matrix Q as a product of elementary
-            reflectors. See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
-            The scalar factors of the elementary reflectors (see Further
-            Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
-            zero.
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The length of the array WORK.  LWORK >= max(1,N).
-            For optimum performance LWORK >= N*NB, where NB is the
-            optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of (ihi-ilo) elementary
-    reflectors
-
-       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
-    exit in A(i+2:ihi,i), and tau in TAU(i).
-
-    The contents of A are illustrated by the following example, with
-    n = 7, ilo = 2 and ihi = 6:
-
-    on entry,                        on exit,
-
-    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
-    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
-    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
-    (                         a )    (                          a )
-
-    where a denotes an element of the original matrix A, h denotes a
-    modified element of the upper Hessenberg matrix H, and vi denotes an
-    element of the vector defining H(i).
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-/* Computing MIN */
-    i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
-	    ftnlen)6, (ftnlen)1);
-    nb = min(i__1,i__2);
-    lwkopt = *n * nb;
-    work[1] = (doublereal) lwkopt;
-    lquery = *lwork == -1;
-    if (*n < 0) {
-	*info = -1;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
-	*info = -2;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*lwork < max(1,*n) && ! lquery) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGEHRD", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
-
-    i__1 = *ilo - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	tau[i__] = 0.;
-/* L10: */
-    }
-    i__1 = *n - 1;
-    for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
-	tau[i__] = 0.;
-/* L20: */
-    }
-
-/*     Quick return if possible */
-
-    nh = *ihi - *ilo + 1;
-    if (nh <= 1) {
-	work[1] = 1.;
-	return 0;
-    }
-
-/*
-       Determine the block size.
-
-   Computing MIN
-*/
-    i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
-	    ftnlen)6, (ftnlen)1);
-    nb = min(i__1,i__2);
-    nbmin = 2;
-    iws = 1;
-    if (nb > 1 && nb < nh) {
-
-/*
-          Determine when to cross over from blocked to unblocked code
-          (last block is always handled by unblocked code).
-
-   Computing MAX
-*/
-	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < nh) {
-
-/*           Determine if workspace is large enough for blocked code. */
-
-	    iws = *n * nb;
-	    if (*lwork < iws) {
-
-/*
-                Not enough workspace to use optimal NB:  determine the
-                minimum value of NB, and reduce NB or force use of
-                unblocked code.
-
-   Computing MAX
-*/
-		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, &
-			c_n1, (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-		if (*lwork >= *n * nbmin) {
-		    nb = *lwork / *n;
-		} else {
-		    nb = 1;
-		}
-	    }
-	}
-    }
-    ldwork = *n;
-
-    if ((nb < nbmin) || (nb >= nh)) {
-
-/*        Use unblocked code below */
-
-	i__ = *ilo;
-
-    } else {
-
-/*        Use blocked code */
-
-	i__1 = *ihi - 1 - nx;
-	i__2 = nb;
-	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__3 = nb, i__4 = *ihi - i__;
-	    ib = min(i__3,i__4);
-
-/*
-             Reduce columns i:i+ib-1 to Hessenberg form, returning the
-             matrices V and T of the block reflector H = I - V*T*V'
-             which performs the reduction, and also the matrix Y = A*V*T
-*/
-
-	    dlahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
-		    c__65, &work[1], &ldwork);
-
-/*
-             Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
-             right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
-             to 1.
-*/
-
-	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
-	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.;
-	    i__3 = *ihi - i__ - ib + 1;
-	    dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b3001, &
-		    work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
-		    c_b2865, &a[(i__ + ib) * a_dim1 + 1], lda);
-	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
-
-/*
-             Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
-             left
-*/
-
-	    i__3 = *ihi - i__;
-	    i__4 = *n - i__ - ib + 1;
-	    dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
-		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[
-		    i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork);
-/* L30: */
-	}
-    }
-
-/*     Use unblocked code to reduce the rest of the matrix */
-
-    dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
-    work[1] = (doublereal) iws;
-
-    return 0;
-
-/*     End of DGEHRD */
-
-} /* dgehrd_ */
-
-/* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublereal *tau, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, k;
-    static doublereal aii;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *), dlarfg_(integer *, doublereal *,
-	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DGELQ2 computes an LQ factorization of a real m by n matrix A:
-    A = L * Q.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the m by n matrix A.
-            On exit, the elements on and below the diagonal of the array
-            contain the m by min(m,n) lower trapezoidal matrix L (L is
-            lower triangular if m <= n); the elements above the diagonal,
-            with the array TAU, represent the orthogonal matrix Q as a
-            product of elementary reflectors (see Further Details).
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (M)
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of elementary reflectors
-
-       Q = H(k) . . . H(2) H(1), where k = min(m,n).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
-    and tau in TAU(i).
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGELQ2", &i__1);
-	return 0;
-    }
-
-    k = min(*m,*n);
-
-    i__1 = k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
-
-	i__2 = *n - i__ + 1;
-/* Computing MIN */
-	i__3 = i__ + 1;
-	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1]
-		, lda, &tau[i__]);
-	if (i__ < *m) {
-
-/*           Apply H(i) to A(i+1:m,i:n) from the right */
-
-	    aii = a[i__ + i__ * a_dim1];
-	    a[i__ + i__ * a_dim1] = 1.;
-	    i__2 = *m - i__;
-	    i__3 = *n - i__ + 1;
-	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
-		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
-	    a[i__ + i__ * a_dim1] = aii;
-	}
-/* L10: */
-    }
-    return 0;
-
-/*     End of DGELQ2 */
-
-} /* dgelq2_ */
-
-/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *), dlarfb_(char *,
-	     char *, char *, char *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DGELQF computes an LQ factorization of a real M-by-N matrix A:
-    A = L * Q.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the M-by-N matrix A.
-            On exit, the elements on and below the diagonal of the array
-            contain the m-by-min(m,n) lower trapezoidal matrix L (L is
-            lower triangular if m <= n); the elements above the diagonal,
-            with the array TAU, represent the orthogonal matrix Q as a
-            product of elementary reflectors (see Further Details).
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,M).
-            For optimum performance LWORK >= M*NB, where NB is the
-            optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of elementary reflectors
-
-       Q = H(k) . . . H(2) H(1), where k = min(m,n).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
-    and tau in TAU(i).
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
-	    1);
-    lwkopt = *m * nb;
-    work[1] = (doublereal) lwkopt;
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    } else if (*lwork < max(1,*m) && ! lquery) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGELQF", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    k = min(*m,*n);
-    if (k == 0) {
-	work[1] = 1.;
-	return 0;
-    }
-
-    nbmin = 2;
-    nx = 0;
-    iws = *m;
-    if (nb > 1 && nb < k) {
-
-/*
-          Determine when to cross over from blocked to unblocked code.
-
-   Computing MAX
-*/
-	i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < k) {
-
-/*           Determine if workspace is large enough for blocked code. */
-
-	    ldwork = *m;
-	    iws = ldwork * nb;
-	    if (*lwork < iws) {
-
-/*
-                Not enough workspace to use optimal NB:  reduce NB and
-                determine the minimum value of NB.
-*/
-
-		nb = *lwork / ldwork;
-/* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, &
-			c_n1, (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-	    }
-	}
-    }
-
-    if (nb >= nbmin && nb < k && nx < k) {
-
-/*        Use blocked code initially */
-
-	i__1 = k - nx;
-	i__2 = nb;
-	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__3 = k - i__ + 1;
-	    ib = min(i__3,nb);
-
-/*
-             Compute the LQ factorization of the current block
-             A(i:i+ib-1,i:n)
-*/
-
-	    i__3 = *n - i__ + 1;
-	    dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
-		    1], &iinfo);
-	    if (i__ + ib <= *m) {
-
-/*
-                Form the triangular factor of the block reflector
-                H = H(i) H(i+1) . . . H(i+ib-1)
-*/
-
-		i__3 = *n - i__ + 1;
-		dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ *
-			a_dim1], lda, &tau[i__], &work[1], &ldwork);
-
-/*              Apply H to A(i+ib:m,i:n) from the right */
-
-		i__3 = *m - i__ - ib + 1;
-		i__4 = *n - i__ + 1;
-		dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
-			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
-			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
-			1], &ldwork);
-	    }
-/* L10: */
-	}
-    } else {
-	i__ = 1;
-    }
-
-/*     Use unblocked code to factor the last or only block. */
-
-    if (i__ <= k) {
-	i__2 = *m - i__ + 1;
-	i__1 = *n - i__ + 1;
-	dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
-		, &iinfo);
-    }
-
-    work[1] = (doublereal) iws;
-    return 0;
-
-/*     End of DGELQF */
-
-} /* dgelqf_ */
-
-/* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs,
-	doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *
-	s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork,
-	 integer *iwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
-
-    /* Builtin functions */
-    double log(doublereal);
-
-    /* Local variables */
-    static integer ie, il, mm;
-    static doublereal eps, anrm, bnrm;
-    static integer itau, nlvl, iascl, ibscl;
-    static doublereal sfmin;
-    static integer minmn, maxmn, itaup, itauq, mnthr, nwork;
-    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebrd_(
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
-	     integer *);
-    extern doublereal dlamch_(char *), dlange_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *);
-    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *, integer *),
-	    dlalsd_(char *, integer *, integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *, integer *), dlascl_(char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    integer *, doublereal *, integer *, integer *), dgeqrf_(
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *, integer *), dlacpy_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *), xerbla_(char *,
-	    integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static doublereal bignum;
-    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *,
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, integer *);
-    static integer wlalsd;
-    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, integer *);
-    static integer ldwork;
-    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, integer *);
-    static integer minwrk, maxwrk;
-    static doublereal smlnum;
-    static logical lquery;
-    static integer smlsiz;
-
-
-/*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    DGELSD computes the minimum-norm solution to a real linear least
-    squares problem:
-        minimize 2-norm(| b - A*x |)
-    using the singular value decomposition (SVD) of A. A is an M-by-N
-    matrix which may be rank-deficient.
-
-    Several right hand side vectors b and solution vectors x can be
-    handled in a single call; they are stored as the columns of the
-    M-by-NRHS right hand side matrix B and the N-by-NRHS solution
-    matrix X.
-
-    The problem is solved in three steps:
-    (1) Reduce the coefficient matrix A to bidiagonal form with
-        Householder transformations, reducing the original problem
-        into a "bidiagonal least squares problem" (BLS)
-    (2) Solve the BLS using a divide and conquer approach.
-    (3) Apply back all the Householder tranformations to solve
-        the original least squares problem.
-
-    The effective rank of A is determined by treating as zero those
-    singular values which are less than RCOND times the largest singular
-    value.
-
-    The divide and conquer algorithm makes very mild assumptions about
-    floating point arithmetic. It will work on machines with a guard
-    digit in add/subtract, or on those binary machines without guard
-    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
-    Cray-2. It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of A. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of A. N >= 0.
-
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrices B and X. NRHS >= 0.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the M-by-N matrix A.
-            On exit, A has been destroyed.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
-            On entry, the M-by-NRHS right hand side matrix B.
-            On exit, B is overwritten by the N-by-NRHS solution
-            matrix X.  If m >= n and RANK = n, the residual
-            sum-of-squares for the solution in the i-th column is given
-            by the sum of squares of elements n+1:m in that column.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B. LDB >= max(1,max(M,N)).
-
-    S       (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The singular values of A in decreasing order.
-            The condition number of A in the 2-norm = S(1)/S(min(m,n)).
-
-    RCOND   (input) DOUBLE PRECISION
-            RCOND is used to determine the effective rank of A.
-            Singular values S(i) <= RCOND*S(1) are treated as zero.
-            If RCOND < 0, machine precision is used instead.
-
-    RANK    (output) INTEGER
-            The effective rank of A, i.e., the number of singular values
-            which are greater than RCOND*S(1).
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK must be at least 1.
-            The exact minimum amount of workspace needed depends on M,
-            N and NRHS. As long as LWORK is at least
-                12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
-            if M is greater than or equal to N or
-                12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
-            if M is less than N, the code will execute correctly.
-            SMLSIZ is returned by ILAENV and is equal to the maximum
-            size of the subproblems at the bottom of the computation
-            tree (usually about 25), and
-               NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
-            For good performance, LWORK should generally be larger.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    IWORK   (workspace) INTEGER array, dimension (LIWORK)
-            LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
-            where MINMN = MIN( M,N ).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  the algorithm for computing the SVD failed to converge;
-                  if INFO = i, i off-diagonal elements of an intermediate
-                  bidiagonal form did not converge to zero.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
-
-    =====================================================================
-
-
-       Test the input arguments.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    --s;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-    minmn = min(*m,*n);
-    maxmn = max(*m,*n);
-    mnthr = ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
-	    ftnlen)1);
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*nrhs < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    } else if (*ldb < max(1,maxmn)) {
-	*info = -7;
-    }
-
-    smlsiz = ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-
-/*
-       Compute workspace.
-       (Note: Comments in the code beginning "Workspace:" describe the
-       minimal amount of workspace needed at that point in the code,
-       as well as the preferred amount for good performance.
-       NB refers to the optimal block size for the immediately
-       following subroutine, as returned by ILAENV.)
-*/
-
-    minwrk = 1;
-    minmn = max(1,minmn);
-/* Computing MAX */
-    i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) /
-	    log(2.)) + 1;
-    nlvl = max(i__1,0);
-
-    if (*info == 0) {
-	maxwrk = 0;
-	mm = *m;
-	if (*m >= *n && *m >= mnthr) {
-
-/*           Path 1a - overdetermined, with many more rows than columns. */
-
-	    mm = *n;
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m,
-		    n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "DORMQR", "LT",
-		    m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
-	    maxwrk = max(i__1,i__2);
-	}
-	if (*m >= *n) {
-
-/*
-             Path 1 - overdetermined or exactly determined.
-
-   Computing MAX
-*/
-	    i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "DGEBRD"
-		    , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "DORMBR",
-		    "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "DORMBR",
-		     "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
-	    maxwrk = max(i__1,i__2);
-/* Computing 2nd power */
-	    i__1 = smlsiz + 1;
-	    wlalsd = *n * 9 + ((*n) << (1)) * smlsiz + ((*n) << (3)) * nlvl +
-		    *n * *nrhs + i__1 * i__1;
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2),
-		    i__2 = *n * 3 + wlalsd;
-	    minwrk = max(i__1,i__2);
-	}
-	if (*n > *m) {
-/* Computing 2nd power */
-	    i__1 = smlsiz + 1;
-	    wlalsd = *m * 9 + ((*m) << (1)) * smlsiz + ((*m) << (3)) * nlvl +
-		    *m * *nrhs + i__1 * i__1;
-	    if (*n >= mnthr) {
-
-/*
-                Path 2a - underdetermined, with many more columns
-                than rows.
-*/
-
-		maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1,
-			&c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + ((*m) << (1))
-			* ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (
-			ftnlen)6, (ftnlen)1);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + *nrhs *
-			ilaenv_(&c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1, (
-			ftnlen)6, (ftnlen)3);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + (*m - 1) *
-			ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1, (
-			ftnlen)6, (ftnlen)3);
-		maxwrk = max(i__1,i__2);
-		if (*nrhs > 1) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
-		    maxwrk = max(i__1,i__2);
-		} else {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = *m * *m + ((*m) << (1));
-		    maxwrk = max(i__1,i__2);
-		}
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "DORMLQ",
-			"LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + wlalsd;
-		maxwrk = max(i__1,i__2);
-	    } else {
-
-/*              Path 2 - remaining underdetermined cases. */
-
-		maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "DGEBRD", " ", m,
-			 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "DORMBR"
-			, "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR",
-			"PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
-		maxwrk = max(i__1,i__2);
-	    }
-/* Computing MAX */
-	    i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2),
-		    i__2 = *m * 3 + wlalsd;
-	    minwrk = max(i__1,i__2);
-	}
-	minwrk = min(minwrk,maxwrk);
-	work[1] = (doublereal) maxwrk;
-	if (*lwork < minwrk && ! lquery) {
-	    *info = -12;
-	}
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGELSD", &i__1);
-	return 0;
-    } else if (lquery) {
-	goto L10;
-    }
-
-/*     Quick return if possible. */
-
-    if ((*m == 0) || (*n == 0)) {
-	*rank = 0;
-	return 0;
-    }
-
-/*     Get machine parameters. */
-
-    eps = PRECISION;
-    sfmin = SAFEMINIMUM;
-    smlnum = sfmin / eps;
-    bignum = 1. / smlnum;
-    dlabad_(&smlnum, &bignum);
-
-/*     Scale A if max entry outside range [SMLNUM,BIGNUM]. */
-
-    anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]);
-    iascl = 0;
-    if (anrm > 0. && anrm < smlnum) {
-
-/*        Scale matrix norm up to SMLNUM. */
-
-	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda,
-		info);
-	iascl = 1;
-    } else if (anrm > bignum) {
-
-/*        Scale matrix norm down to BIGNUM. */
-
-	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda,
-		info);
-	iascl = 2;
-    } else if (anrm == 0.) {
-
-/*        Matrix all zero. Return zero solution. */
-
-	i__1 = max(*m,*n);
-	dlaset_("F", &i__1, nrhs, &c_b2879, &c_b2879, &b[b_offset], ldb);
-	dlaset_("F", &minmn, &c__1, &c_b2879, &c_b2879, &s[1], &c__1);
-	*rank = 0;
-	goto L10;
-    }
-
-/*     Scale B if max entry outside range [SMLNUM,BIGNUM]. */
-
-    bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
-    ibscl = 0;
-    if (bnrm > 0. && bnrm < smlnum) {
-
-/*        Scale matrix norm up to SMLNUM. */
-
-	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
-		 info);
-	ibscl = 1;
-    } else if (bnrm > bignum) {
-
-/*        Scale matrix norm down to BIGNUM. */
-
-	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
-		 info);
-	ibscl = 2;
-    }
-
-/*     If M < N make sure certain entries of B are zero. */
-
-    if (*m < *n) {
-	i__1 = *n - *m;
-	dlaset_("F", &i__1, nrhs, &c_b2879, &c_b2879, &b[*m + 1 + b_dim1],
-		ldb);
-    }
-
-/*     Overdetermined case. */
-
-    if (*m >= *n) {
-
-/*        Path 1 - overdetermined or exactly determined. */
-
-	mm = *m;
-	if (*m >= mnthr) {
-
-/*           Path 1a - overdetermined, with many more rows than columns. */
-
-	    mm = *n;
-	    itau = 1;
-	    nwork = itau + *n;
-
-/*
-             Compute A=Q*R.
-             (Workspace: need 2*N, prefer N+N*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
-		     info);
-
-/*
-             Multiply B by transpose(Q).
-             (Workspace: need N+NRHS, prefer N+NRHS*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
-		    b_offset], ldb, &work[nwork], &i__1, info);
-
-/*           Zero out below R. */
-
-	    if (*n > 1) {
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		dlaset_("L", &i__1, &i__2, &c_b2879, &c_b2879, &a[a_dim1 + 2],
-			 lda);
-	    }
-	}
-
-	ie = 1;
-	itauq = ie + *n;
-	itaup = itauq + *n;
-	nwork = itaup + *n;
-
-/*
-          Bidiagonalize R in A.
-          (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
-*/
-
-	i__1 = *lwork - nwork + 1;
-	dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
-		work[itaup], &work[nwork], &i__1, info);
-
-/*
-          Multiply B by transpose of left bidiagonalizing vectors of R.
-          (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
-*/
-
-	i__1 = *lwork - nwork + 1;
-	dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq],
-		&b[b_offset], ldb, &work[nwork], &i__1, info);
-
-/*        Solve the bidiagonal least squares problem. */
-
-	dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb,
-		rcond, rank, &work[nwork], &iwork[1], info);
-	if (*info != 0) {
-	    goto L10;
-	}
-
-/*        Multiply B by right bidiagonalizing vectors of R. */
-
-	i__1 = *lwork - nwork + 1;
-	dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
-		b[b_offset], ldb, &work[nwork], &i__1, info);
-
-    } else /* if(complicated condition) */ {
-/* Computing MAX */
-	i__1 = *m, i__2 = ((*m) << (1)) - 4, i__1 = max(i__1,i__2), i__1 =
-		max(i__1,*nrhs), i__2 = *n - *m * 3;
-	if (*n >= mnthr && *lwork >= ((*m) << (2)) + *m * *m + max(i__1,i__2))
-		 {
-
-/*
-          Path 2a - underdetermined, with many more columns than rows
-          and sufficient workspace for an efficient algorithm.
-*/
-
-	    ldwork = *m;
-/*
-   Computing MAX
-   Computing MAX
-*/
-	    i__3 = *m, i__4 = ((*m) << (1)) - 4, i__3 = max(i__3,i__4), i__3 =
-		     max(i__3,*nrhs), i__4 = *n - *m * 3;
-	    i__1 = ((*m) << (2)) + *m * *lda + max(i__3,i__4), i__2 = *m * *
-		    lda + *m + *m * *nrhs;
-	    if (*lwork >= max(i__1,i__2)) {
-		ldwork = *lda;
-	    }
-	    itau = 1;
-	    nwork = *m + 1;
-
-/*
-          Compute A=L*Q.
-          (Workspace: need 2*M, prefer M+M*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
-		     info);
-	    il = nwork;
-
-/*        Copy L to WORK(IL), zeroing out above its diagonal. */
-
-	    dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
-	    i__1 = *m - 1;
-	    i__2 = *m - 1;
-	    dlaset_("U", &i__1, &i__2, &c_b2879, &c_b2879, &work[il + ldwork],
-		     &ldwork);
-	    ie = il + ldwork * *m;
-	    itauq = ie + *m;
-	    itaup = itauq + *m;
-	    nwork = itaup + *m;
-
-/*
-          Bidiagonalize L in WORK(IL).
-          (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq],
-		    &work[itaup], &work[nwork], &i__1, info);
-
-/*
-          Multiply B by transpose of left bidiagonalizing vectors of L.
-          (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
-		    itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);
-
-/*        Solve the bidiagonal least squares problem. */
-
-	    dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
-		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
-	    if (*info != 0) {
-		goto L10;
-	    }
-
-/*        Multiply B by right bidiagonalizing vectors of L. */
-
-	    i__1 = *lwork - nwork + 1;
-	    dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
-		    itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);
-
-/*        Zero out below first M rows of B. */
-
-	    i__1 = *n - *m;
-	    dlaset_("F", &i__1, nrhs, &c_b2879, &c_b2879, &b[*m + 1 + b_dim1],
-		     ldb);
-	    nwork = itau + *m;
-
-/*
-          Multiply transpose(Q) by B.
-          (Workspace: need M+NRHS, prefer M+NRHS*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
-		    b_offset], ldb, &work[nwork], &i__1, info);
-
-	} else {
-
-/*        Path 2 - remaining underdetermined cases. */
-
-	    ie = 1;
-	    itauq = ie + *m;
-	    itaup = itauq + *m;
-	    nwork = itaup + *m;
-
-/*
-          Bidiagonalize A.
-          (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
-		    work[itaup], &work[nwork], &i__1, info);
-
-/*
-          Multiply B by transpose of left bidiagonalizing vectors.
-          (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
-*/
-
-	    i__1 = *lwork - nwork + 1;
-	    dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
-		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
-
-/*        Solve the bidiagonal least squares problem. */
-
-	    dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
-		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
-	    if (*info != 0) {
-		goto L10;
-	    }
-
-/*        Multiply B by right bidiagonalizing vectors of A. */
-
-	    i__1 = *lwork - nwork + 1;
-	    dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
-		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
-
-	}
-    }
-
-/*     Undo scaling. */
-
-    if (iascl == 1) {
-	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
-		 info);
-	dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
-		minmn, info);
-    } else if (iascl == 2) {
-	dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
-		 info);
-	dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
-		minmn, info);
-    }
-    if (ibscl == 1) {
-	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
-		 info);
-    } else if (ibscl == 2) {
-	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
-		 info);
-    }
-
-L10:
-    work[1] = (doublereal) maxwrk;
-    return 0;
-
-/*     End of DGELSD */
-
-} /* dgelsd_ */
-
-/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublereal *tau, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, k;
-    static doublereal aii;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *), dlarfg_(integer *, doublereal *,
-	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DGEQR2 computes a QR factorization of a real m by n matrix A:
-    A = Q * R.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the m by n matrix A.
-            On exit, the elements on and above the diagonal of the array
-            contain the min(m,n) by n upper trapezoidal matrix R (R is
-            upper triangular if m >= n); the elements below the diagonal,
-            with the array TAU, represent the orthogonal matrix Q as a
-            product of elementary reflectors (see Further Details).
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of elementary reflectors
-
-       Q = H(1) H(2) . . . H(k), where k = min(m,n).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
-    and tau in TAU(i).
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGEQR2", &i__1);
-	return 0;
-    }
-
-    k = min(*m,*n);
-
-    i__1 = k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
-
-	i__2 = *m - i__ + 1;
-/* Computing MIN */
-	i__3 = i__ + 1;
-	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1]
-		, &c__1, &tau[i__]);
-	if (i__ < *n) {
-
-/*           Apply H(i) to A(i:m,i+1:n) from the left */
-
-	    aii = a[i__ + i__ * a_dim1];
-	    a[i__ + i__ * a_dim1] = 1.;
-	    i__2 = *m - i__ + 1;
-	    i__3 = *n - i__;
-	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
-		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
-	    a[i__ + i__ * a_dim1] = aii;
-	}
-/* L10: */
-    }
-    return 0;
-
-/*     End of DGEQR2 */
-
-} /* dgeqr2_ */
-
-/* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *), dlarfb_(char *,
-	     char *, char *, char *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DGEQRF computes a QR factorization of a real M-by-N matrix A:
-    A = Q * R.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the M-by-N matrix A.
-            On exit, the elements on and above the diagonal of the array
-            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
-            upper triangular if m >= n); the elements below the diagonal,
-            with the array TAU, represent the orthogonal matrix Q as a
-            product of min(m,n) elementary reflectors (see Further
-            Details).
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,N).
-            For optimum performance LWORK >= N*NB, where NB is
-            the optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of elementary reflectors
-
-       Q = H(1) H(2) . . . H(k), where k = min(m,n).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
-    and tau in TAU(i).
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
-	    1);
-    lwkopt = *n * nb;
-    work[1] = (doublereal) lwkopt;
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    } else if (*lwork < max(1,*n) && ! lquery) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGEQRF", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    k = min(*m,*n);
-    if (k == 0) {
-	work[1] = 1.;
-	return 0;
-    }
-
-    nbmin = 2;
-    nx = 0;
-    iws = *n;
-    if (nb > 1 && nb < k) {
-
-/*
-          Determine when to cross over from blocked to unblocked code.
-
-   Computing MAX
-*/
-	i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < k) {
-
-/*           Determine if workspace is large enough for blocked code. */
-
-	    ldwork = *n;
-	    iws = ldwork * nb;
-	    if (*lwork < iws) {
-
-/*
-                Not enough workspace to use optimal NB:  reduce NB and
-                determine the minimum value of NB.
-*/
-
-		nb = *lwork / ldwork;
-/* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, &
-			c_n1, (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-	    }
-	}
-    }
-
-    if (nb >= nbmin && nb < k && nx < k) {
-
-/*        Use blocked code initially */
-
-	i__1 = k - nx;
-	i__2 = nb;
-	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__3 = k - i__ + 1;
-	    ib = min(i__3,nb);
-
-/*
-             Compute the QR factorization of the current block
-             A(i:m,i:i+ib-1)
-*/
-
-	    i__3 = *m - i__ + 1;
-	    dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
-		    1], &iinfo);
-	    if (i__ + ib <= *n) {
-
-/*
-                Form the triangular factor of the block reflector
-                H = H(i) H(i+1) . . . H(i+ib-1)
-*/
-
-		i__3 = *m - i__ + 1;
-		dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ *
-			a_dim1], lda, &tau[i__], &work[1], &ldwork);
-
-/*              Apply H' to A(i:m,i+ib:n) from the left */
-
-		i__3 = *m - i__ + 1;
-		i__4 = *n - i__ - ib + 1;
-		dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
-			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
-			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib
-			+ 1], &ldwork);
-	    }
-/* L10: */
-	}
-    } else {
-	i__ = 1;
-    }
-
-/*     Use unblocked code to factor the last or only block. */
-
-    if (i__ <= k) {
-	i__2 = *m - i__ + 1;
-	i__1 = *n - i__ + 1;
-	dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
-		, &iinfo);
-    }
-
-    work[1] = (doublereal) iws;
-    return 0;
-
-/*     End of DGEQRF */
-
-} /* dgeqrf_ */
-
-/* Subroutine */ int dgesdd_(char *jobz, integer *m, integer *n, doublereal *
-	a, integer *lda, doublereal *s, doublereal *u, integer *ldu,
-	doublereal *vt, integer *ldvt, doublereal *work, integer *lwork,
-	integer *iwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
-	    i__2, i__3;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, ie, il, ir, iu, blk;
-    static doublereal dum[1], eps;
-    static integer ivt, iscl;
-    static doublereal anrm;
-    static integer idum[1], ierr, itau;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    extern logical lsame_(char *, char *);
-    static integer chunk, minmn, wrkbl, itaup, itauq, mnthr;
-    static logical wntqa;
-    static integer nwork;
-    static logical wntqn, wntqo, wntqs;
-    extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal
-	    *, doublereal *, doublereal *, integer *, doublereal *, integer *,
-	     doublereal *, integer *, doublereal *, integer *, integer *), dgebrd_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *, integer *);
-    extern doublereal dlamch_(char *), dlange_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *);
-    static integer bdspac;
-    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *, integer *),
-	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *,
-	    integer *, integer *, doublereal *, integer *, integer *),
-	     dgeqrf_(integer *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *, integer *), dlacpy_(char *,
-	     integer *, integer *, doublereal *, integer *, doublereal *,
-	    integer *), dlaset_(char *, integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, integer *),
-	    xerbla_(char *, integer *), dorgbr_(char *, integer *,
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static doublereal bignum;
-    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *,
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, integer *), dorglq_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    integer *), dorgqr_(integer *, integer *, integer *, doublereal *,
-	     integer *, doublereal *, doublereal *, integer *, integer *);
-    static integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt;
-    static doublereal smlnum;
-    static logical wntqas, lquery;
-
-
-/*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    DGESDD computes the singular value decomposition (SVD) of a real
-    M-by-N matrix A, optionally computing the left and right singular
-    vectors.  If singular vectors are desired, it uses a
-    divide-and-conquer algorithm.
-
-    The SVD is written
-
-         A = U * SIGMA * transpose(V)
-
-    where SIGMA is an M-by-N matrix which is zero except for its
-    min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
-    V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
-    are the singular values of A; they are real and non-negative, and
-    are returned in descending order.  The first min(m,n) columns of
-    U and V are the left and right singular vectors of A.
-
-    Note that the routine returns VT = V**T, not V.
-
-    The divide and conquer algorithm makes very mild assumptions about
-    floating point arithmetic. It will work on machines with a guard
-    digit in add/subtract, or on those binary machines without guard
-    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
-    Cray-2. It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
-
-    Arguments
-    =========
-
-    JOBZ    (input) CHARACTER*1
-            Specifies options for computing all or part of the matrix U:
-            = 'A':  all M columns of U and all N rows of V**T are
-                    returned in the arrays U and VT;
-            = 'S':  the first min(M,N) columns of U and the first
-                    min(M,N) rows of V**T are returned in the arrays U
-                    and VT;
-            = 'O':  If M >= N, the first N columns of U are overwritten
-                    on the array A and all rows of V**T are returned in
-                    the array VT;
-                    otherwise, all columns of U are returned in the
-                    array U and the first M rows of V**T are overwritten
-                    in the array VT;
-            = 'N':  no columns of U or rows of V**T are computed.
-
-    M       (input) INTEGER
-            The number of rows of the input matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the input matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the M-by-N matrix A.
-            On exit,
-            if JOBZ = 'O',  A is overwritten with the first N columns
-                            of U (the left singular vectors, stored
-                            columnwise) if M >= N;
-                            A is overwritten with the first M rows
-                            of V**T (the right singular vectors, stored
-                            rowwise) otherwise.
-            if JOBZ .ne. 'O', the contents of A are destroyed.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    S       (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The singular values of A, sorted so that S(i) >= S(i+1).
-
-    U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
-            UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
-            UCOL = min(M,N) if JOBZ = 'S'.
-            If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
-            orthogonal matrix U;
-            if JOBZ = 'S', U contains the first min(M,N) columns of U
-            (the left singular vectors, stored columnwise);
-            if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
-
-    LDU     (input) INTEGER
-            The leading dimension of the array U.  LDU >= 1; if
-            JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
-
-    VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
-            If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
-            N-by-N orthogonal matrix V**T;
-            if JOBZ = 'S', VT contains the first min(M,N) rows of
-            V**T (the right singular vectors, stored rowwise);
-            if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
-
-    LDVT    (input) INTEGER
-            The leading dimension of the array VT.  LDVT >= 1; if
-            JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
-            if JOBZ = 'S', LDVT >= min(M,N).
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK >= 1.
-            If JOBZ = 'N',
-              LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)).
-            If JOBZ = 'O',
-              LWORK >= 3*min(M,N)*min(M,N) +
-                       max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)).
-            If JOBZ = 'S' or 'A'
-              LWORK >= 3*min(M,N)*min(M,N) +
-                       max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)).
-            For good performance, LWORK should generally be larger.
-            If LWORK < 0 but other input arguments are legal, WORK(1)
-            returns the optimal LWORK.
-
-    IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
-
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  DBDSDC did not converge, updating process failed.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --s;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-    minmn = min(*m,*n);
-    mnthr = (integer) (minmn * 11. / 6.);
-    wntqa = lsame_(jobz, "A");
-    wntqs = lsame_(jobz, "S");
-    wntqas = (wntqa) || (wntqs);
-    wntqo = lsame_(jobz, "O");
-    wntqn = lsame_(jobz, "N");
-    minwrk = 1;
-    maxwrk = 1;
-    lquery = *lwork == -1;
-
-    if (! ((((wntqa) || (wntqs)) || (wntqo)) || (wntqn))) {
-	*info = -1;
-    } else if (*m < 0) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    } else if (((*ldu < 1) || (wntqas && *ldu < *m)) || (wntqo && *m < *n && *
-	    ldu < *m)) {
-	*info = -8;
-    } else if ((((*ldvt < 1) || (wntqa && *ldvt < *n)) || (wntqs && *ldvt <
-	    minmn)) || (wntqo && *m >= *n && *ldvt < *n)) {
-	*info = -10;
-    }
-
-/*
-       Compute workspace
-        (Note: Comments in the code beginning "Workspace:" describe the
-         minimal amount of workspace needed at that point in the code,
-         as well as the preferred amount for good performance.
-         NB refers to the optimal block size for the immediately
-         following subroutine, as returned by ILAENV.)
-*/
-
-    if (*info == 0 && *m > 0 && *n > 0) {
-	if (*m >= *n) {
-
-/*           Compute space needed for DBDSDC */
-
-	    if (wntqn) {
-		bdspac = *n * 7;
-	    } else {
-		bdspac = *n * 3 * *n + ((*n) << (2));
-	    }
-	    if (*m >= mnthr) {
-		if (wntqn) {
-
-/*                 Path 1 (M much larger than N, JOBZ='N') */
-
-		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
-			    c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = bdspac + *n;
-		} else if (wntqo) {
-
-/*                 Path 2 (M much larger than N, JOBZ='O') */
-
-		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR",
-			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
-			    c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + ((*n) << (1)) * *n;
-		    minwrk = bdspac + ((*n) << (1)) * *n + *n * 3;
-		} else if (wntqs) {
-
-/*                 Path 3 (M much larger than N, JOBZ='S') */
-
-		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR",
-			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
-			    c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + *n * *n;
-		    minwrk = bdspac + *n * *n + *n * 3;
-		} else if (wntqa) {
-
-/*                 Path 4 (M much larger than N, JOBZ='A') */
-
-		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "DORGQR",
-			    " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
-			    c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + *n * *n;
-		    minwrk = bdspac + *n * *n + *n * 3;
-		}
-	    } else {
-
-/*              Path 5 (M at least N, but not much larger) */
-
-		wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m,
-			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-		if (wntqn) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *n * 3 + max(*m,bdspac);
-		} else if (wntqo) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + *m * *n;
-/* Computing MAX */
-		    i__1 = *m, i__2 = *n * *n + bdspac;
-		    minwrk = *n * 3 + max(i__1,i__2);
-		} else if (wntqs) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *n * 3 + max(*m,bdspac);
-		} else if (wntqa) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = bdspac + *n * 3;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *n * 3 + max(*m,bdspac);
-		}
-	    }
-	} else {
-
-/*           Compute space needed for DBDSDC */
-
-	    if (wntqn) {
-		bdspac = *m * 7;
-	    } else {
-		bdspac = *m * 3 * *m + ((*m) << (2));
-	    }
-	    if (*n >= mnthr) {
-		if (wntqn) {
-
-/*                 Path 1t (N much larger than M, JOBZ='N') */
-
-		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
-			    c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = bdspac + *m;
-		} else if (wntqo) {
-
-/*                 Path 2t (N much larger than M, JOBZ='O') */
-
-		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ",
-			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
-			    c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + ((*m) << (1)) * *m;
-		    minwrk = bdspac + ((*m) << (1)) * *m + *m * 3;
-		} else if (wntqs) {
-
-/*                 Path 3t (N much larger than M, JOBZ='S') */
-
-		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ",
-			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
-			    c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + *m * *m;
-		    minwrk = bdspac + *m * *m + *m * 3;
-		} else if (wntqa) {
-
-/*                 Path 4t (N much larger than M, JOBZ='A') */
-
-		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "DORGLQ",
-			    " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
-			    c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + *m * *m;
-		    minwrk = bdspac + *m * *m + *m * 3;
-		}
-	    } else {
-
-/*              Path 5t (N greater than M, but not much larger) */
-
-		wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m,
-			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-		if (wntqn) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *m * 3 + max(*n,bdspac);
-		} else if (wntqo) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + *m * *n;
-/* Computing MAX */
-		    i__1 = *n, i__2 = *m * *m + bdspac;
-		    minwrk = *m * 3 + max(i__1,i__2);
-		} else if (wntqs) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *m * 3 + max(*n,bdspac);
-		} else if (wntqa) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *m * 3 + max(*n,bdspac);
-		}
-	    }
-	}
-	work[1] = (doublereal) maxwrk;
-    }
-
-    if (*lwork < minwrk && ! lquery) {
-	*info = -12;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGESDD", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*m == 0) || (*n == 0)) {
-	if (*lwork >= 1) {
-	    work[1] = 1.;
-	}
-	return 0;
-    }
-
-/*     Get machine constants */
-
-    eps = PRECISION;
-    smlnum = sqrt(SAFEMINIMUM) / eps;
-    bignum = 1. / smlnum;
-
-/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
-
-    anrm = dlange_("M", m, n, &a[a_offset], lda, dum);
-    iscl = 0;
-    if (anrm > 0. && anrm < smlnum) {
-	iscl = 1;
-	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
-		ierr);
-    } else if (anrm > bignum) {
-	iscl = 1;
-	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
-		ierr);
-    }
-
-    if (*m >= *n) {
-
-/*
-          A has at least as many rows as columns. If A has sufficiently
-          more rows than columns, first reduce using the QR
-          decomposition (if sufficient workspace available)
-*/
-
-	if (*m >= mnthr) {
-
-	    if (wntqn) {
-
-/*
-                Path 1 (M much larger than N, JOBZ='N')
-                No singular vectors to be computed
-*/
-
-		itau = 1;
-		nwork = itau + *n;
-
-/*
-                Compute A=Q*R
-                (Workspace: need 2*N, prefer N+N*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__1, &ierr);
-
-/*              Zero out below R */
-
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		dlaset_("L", &i__1, &i__2, &c_b2879, &c_b2879, &a[a_dim1 + 2],
-			 lda);
-		ie = 1;
-		itauq = ie + *n;
-		itaup = itauq + *n;
-		nwork = itaup + *n;
-
-/*
-                Bidiagonalize R in A
-                (Workspace: need 4*N, prefer 3*N+2*N*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
-		nwork = ie + *n;
-
-/*
-                Perform bidiagonal SVD, computing singular values only
-                (Workspace: need N+BDSPAC)
-*/
-
-		dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
-			 dum, idum, &work[nwork], &iwork[1], info);
-
-	    } else if (wntqo) {
-
-/*
-                Path 2 (M much larger than N, JOBZ = 'O')
-                N left singular vectors to be overwritten on A and
-                N right singular vectors to be computed in VT
-*/
-
-		ir = 1;
-
-/*              WORK(IR) is LDWRKR by N */
-
-		if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) {
-		    ldwrkr = *lda;
-		} else {
-		    ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n;
-		}
-		itau = ir + ldwrkr * *n;
-		nwork = itau + *n;
-
-/*
-                Compute A=Q*R
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__1, &ierr);
-
-/*              Copy R to WORK(IR), zeroing out below it */
-
-		dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		dlaset_("L", &i__1, &i__2, &c_b2879, &c_b2879, &work[ir + 1],
-			&ldwrkr);
-
-/*
-                Generate Q in A
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
-			 &i__1, &ierr);
-		ie = itau;
-		itauq = ie + *n;
-		itaup = itauq + *n;
-		nwork = itaup + *n;
-
-/*
-                Bidiagonalize R in VT, copying result to WORK(IR)
-                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
-
-/*              WORK(IU) is N by N */
-
-		iu = nwork;
-		nwork = iu + *n * *n;
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in WORK(IU) and computing right
-                singular vectors of bidiagonal matrix in VT
-                (Workspace: need N+N*N+BDSPAC)
-*/
-
-		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
-
-/*
-                Overwrite WORK(IU) by left singular vectors of R
-                and VT by right singular vectors of R
-                (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
-			itauq], &work[iu], n, &work[nwork], &i__1, &ierr);
-		i__1 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-
-/*
-                Multiply Q in A by left singular vectors of R in
-                WORK(IU), storing result in WORK(IR) and copying to A
-                (Workspace: need 2*N*N, prefer N*N+M*N)
-*/
-
-		i__1 = *m;
-		i__2 = ldwrkr;
-		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
-			i__2) {
-/* Computing MIN */
-		    i__3 = *m - i__ + 1;
-		    chunk = min(i__3,ldwrkr);
-		    dgemm_("N", "N", &chunk, n, n, &c_b2865, &a[i__ + a_dim1],
-			     lda, &work[iu], n, &c_b2879, &work[ir], &ldwrkr);
-		    dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
-			    a_dim1], lda);
-/* L10: */
-		}
-
-	    } else if (wntqs) {
-
-/*
-                Path 3 (M much larger than N, JOBZ='S')
-                N left singular vectors to be computed in U and
-                N right singular vectors to be computed in VT
-*/
-
-		ir = 1;
-
-/*              WORK(IR) is N by N */
-
-		ldwrkr = *n;
-		itau = ir + ldwrkr * *n;
-		nwork = itau + *n;
-
-/*
-                Compute A=Q*R
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__2, &ierr);
-
-/*              Copy R to WORK(IR), zeroing out below it */
-
-		dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
-		i__2 = *n - 1;
-		i__1 = *n - 1;
-		dlaset_("L", &i__2, &i__1, &c_b2879, &c_b2879, &work[ir + 1],
-			&ldwrkr);
-
-/*
-                Generate Q in A
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
-			 &i__2, &ierr);
-		ie = itau;
-		itauq = ie + *n;
-		itaup = itauq + *n;
-		nwork = itaup + *n;
-
-/*
-                Bidiagonalize R in WORK(IR)
-                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagoal matrix in U and computing right singular
-                vectors of bidiagonal matrix in VT
-                (Workspace: need N+BDSPAC)
-*/
-
-		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
-
-/*
-                Overwrite U by left singular vectors of R and VT
-                by right singular vectors of R
-                (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
-
-		i__2 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
-			ierr);
-
-/*
-                Multiply Q in A by left singular vectors of R in
-                WORK(IR), storing result in U
-                (Workspace: need N*N)
-*/
-
-		dlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
-		dgemm_("N", "N", m, n, n, &c_b2865, &a[a_offset], lda, &work[
-			ir], &ldwrkr, &c_b2879, &u[u_offset], ldu);
-
-	    } else if (wntqa) {
-
-/*
-                Path 4 (M much larger than N, JOBZ='A')
-                M left singular vectors to be computed in U and
-                N right singular vectors to be computed in VT
-*/
-
-		iu = 1;
-
-/*              WORK(IU) is N by N */
-
-		ldwrku = *n;
-		itau = iu + ldwrku * *n;
-		nwork = itau + *n;
-
-/*
-                Compute A=Q*R, copying result to U
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__2, &ierr);
-		dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
-
-/*
-                Generate Q in U
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
-		i__2 = *lwork - nwork + 1;
-		dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
-			 &i__2, &ierr);
-
-/*              Produce R in A, zeroing out other entries */
-
-		i__2 = *n - 1;
-		i__1 = *n - 1;
-		dlaset_("L", &i__2, &i__1, &c_b2879, &c_b2879, &a[a_dim1 + 2],
-			 lda);
-		ie = itau;
-		itauq = ie + *n;
-		itaup = itauq + *n;
-		nwork = itaup + *n;
-
-/*
-                Bidiagonalize R in A
-                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in WORK(IU) and computing right
-                singular vectors of bidiagonal matrix in VT
-                (Workspace: need N+N*N+BDSPAC)
-*/
-
-		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
-
-/*
-                Overwrite WORK(IU) by left singular vectors of R and VT
-                by right singular vectors of R
-                (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
-			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
-			ierr);
-		i__2 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
-			ierr);
-
-/*
-                Multiply Q in U by left singular vectors of R in
-                WORK(IU), storing result in A
-                (Workspace: need N*N)
-*/
-
-		dgemm_("N", "N", m, n, n, &c_b2865, &u[u_offset], ldu, &work[
-			iu], &ldwrku, &c_b2879, &a[a_offset], lda);
-
-/*              Copy left singular vectors of A from A to U */
-
-		dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
-
-	    }
-
-	} else {
-
-/*
-             M .LT. MNTHR
-
-             Path 5 (M at least N, but not much larger)
-             Reduce to bidiagonal form without QR decomposition
-*/
-
-	    ie = 1;
-	    itauq = ie + *n;
-	    itaup = itauq + *n;
-	    nwork = itaup + *n;
-
-/*
-             Bidiagonalize A
-             (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
-*/
-
-	    i__2 = *lwork - nwork + 1;
-	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
-		    work[itaup], &work[nwork], &i__2, &ierr);
-	    if (wntqn) {
-
-/*
-                Perform bidiagonal SVD, only computing singular values
-                (Workspace: need N+BDSPAC)
-*/
-
-		dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
-			 dum, idum, &work[nwork], &iwork[1], info);
-	    } else if (wntqo) {
-		iu = nwork;
-		if (*lwork >= *m * *n + *n * 3 + bdspac) {
-
-/*                 WORK( IU ) is M by N */
-
-		    ldwrku = *m;
-		    nwork = iu + ldwrku * *n;
-		    dlaset_("F", m, n, &c_b2879, &c_b2879, &work[iu], &ldwrku);
-		} else {
-
-/*                 WORK( IU ) is N by N */
-
-		    ldwrku = *n;
-		    nwork = iu + ldwrku * *n;
-
-/*                 WORK(IR) is LDWRKR by N */
-
-		    ir = nwork;
-		    ldwrkr = (*lwork - *n * *n - *n * 3) / *n;
-		}
-		nwork = iu + ldwrku * *n;
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in WORK(IU) and computing right
-                singular vectors of bidiagonal matrix in VT
-                (Workspace: need N+N*N+BDSPAC)
-*/
-
-		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, &
-			vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[
-			1], info);
-
-/*
-                Overwrite VT by right singular vectors of A
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
-			ierr);
-
-		if (*lwork >= *m * *n + *n * 3 + bdspac) {
-
-/*
-                   Overwrite WORK(IU) by left singular vectors of A
-                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
-
-		    i__2 = *lwork - nwork + 1;
-		    dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
-			    itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
-			    ierr);
-
-/*                 Copy left singular vectors of A from WORK(IU) to A */
-
-		    dlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
-		} else {
-
-/*
-                   Generate Q in A
-                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
-
-		    i__2 = *lwork - nwork + 1;
-		    dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
-			    work[nwork], &i__2, &ierr);
-
-/*
-                   Multiply Q in A by left singular vectors of
-                   bidiagonal matrix in WORK(IU), storing result in
-                   WORK(IR) and copying to A
-                   (Workspace: need 2*N*N, prefer N*N+M*N)
-*/
-
-		    i__2 = *m;
-		    i__1 = ldwrkr;
-		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
-			     i__1) {
-/* Computing MIN */
-			i__3 = *m - i__ + 1;
-			chunk = min(i__3,ldwrkr);
-			dgemm_("N", "N", &chunk, n, n, &c_b2865, &a[i__ +
-				a_dim1], lda, &work[iu], &ldwrku, &c_b2879, &
-				work[ir], &ldwrkr);
-			dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
-				a_dim1], lda);
-/* L20: */
-		    }
-		}
-
-	    } else if (wntqs) {
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in VT
-                (Workspace: need N+BDSPAC)
-*/
-
-		dlaset_("F", m, n, &c_b2879, &c_b2879, &u[u_offset], ldu);
-		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
-
-/*
-                Overwrite U by left singular vectors of A and VT
-                by right singular vectors of A
-                (Workspace: need 3*N, prefer 2*N+N*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-		i__1 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-	    } else if (wntqa) {
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in VT
-                (Workspace: need N+BDSPAC)
-*/
-
-		dlaset_("F", m, m, &c_b2879, &c_b2879, &u[u_offset], ldu);
-		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
-
-/*              Set the right corner of U to identity matrix */
-
-		i__1 = *m - *n;
-		i__2 = *m - *n;
-		dlaset_("F", &i__1, &i__2, &c_b2879, &c_b2865, &u[*n + 1 + (*
-			n + 1) * u_dim1], ldu);
-
-/*
-                Overwrite U by left singular vectors of A and VT
-                by right singular vectors of A
-                (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-		i__1 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-	    }
-
-	}
-
-    } else {
-
-/*
-          A has more columns than rows. If A has sufficiently more
-          columns than rows, first reduce using the LQ decomposition (if
-          sufficient workspace available)
-*/
-
-	if (*n >= mnthr) {
-
-	    if (wntqn) {
-
-/*
-                Path 1t (N much larger than M, JOBZ='N')
-                No singular vectors to be computed
-*/
-
-		itau = 1;
-		nwork = itau + *m;
-
-/*
-                Compute A=L*Q
-                (Workspace: need 2*M, prefer M+M*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__1, &ierr);
-
-/*              Zero out above L */
-
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		dlaset_("U", &i__1, &i__2, &c_b2879, &c_b2879, &a[((a_dim1) <<
-			 (1)) + 1], lda);
-		ie = 1;
-		itauq = ie + *m;
-		itaup = itauq + *m;
-		nwork = itaup + *m;
-
-/*
-                Bidiagonalize L in A
-                (Workspace: need 4*M, prefer 3*M+2*M*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
-		nwork = ie + *m;
-
-/*
-                Perform bidiagonal SVD, computing singular values only
-                (Workspace: need M+BDSPAC)
-*/
-
-		dbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
-			 dum, idum, &work[nwork], &iwork[1], info);
-
-	    } else if (wntqo) {
-
-/*
-                Path 2t (N much larger than M, JOBZ='O')
-                M right singular vectors to be overwritten on A and
-                M left singular vectors to be computed in U
-*/
-
-		ivt = 1;
-
-/*              IVT is M by M */
-
-		il = ivt + *m * *m;
-		if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) {
-
-/*                 WORK(IL) is M by N */
-
-		    ldwrkl = *m;
-		    chunk = *n;
-		} else {
-		    ldwrkl = *m;
-		    chunk = (*lwork - *m * *m) / *m;
-		}
-		itau = il + ldwrkl * *m;
-		nwork = itau + *m;
-
-/*
-                Compute A=L*Q
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__1, &ierr);
-
-/*              Copy L to WORK(IL), zeroing about above it */
-
-		dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		dlaset_("U", &i__1, &i__2, &c_b2879, &c_b2879, &work[il +
-			ldwrkl], &ldwrkl);
-
-/*
-                Generate Q in A
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
-			 &i__1, &ierr);
-		ie = itau;
-		itauq = ie + *m;
-		itaup = itauq + *m;
-		nwork = itaup + *m;
-
-/*
-                Bidiagonalize L in WORK(IL)
-                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U, and computing right singular
-                vectors of bidiagonal matrix in WORK(IVT)
-                (Workspace: need M+M*M+BDSPAC)
-*/
-
-		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
-			work[ivt], m, dum, idum, &work[nwork], &iwork[1],
-			info);
-
-/*
-                Overwrite U by left singular vectors of L and WORK(IVT)
-                by right singular vectors of L
-                (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-		i__1 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
-			itaup], &work[ivt], m, &work[nwork], &i__1, &ierr);
-
-/*
-                Multiply right singular vectors of L in WORK(IVT) by Q
-                in A, storing result in WORK(IL) and copying to A
-                (Workspace: need 2*M*M, prefer M*M+M*N)
-*/
-
-		i__1 = *n;
-		i__2 = chunk;
-		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
-			i__2) {
-/* Computing MIN */
-		    i__3 = *n - i__ + 1;
-		    blk = min(i__3,chunk);
-		    dgemm_("N", "N", m, &blk, m, &c_b2865, &work[ivt], m, &a[
-			    i__ * a_dim1 + 1], lda, &c_b2879, &work[il], &
-			    ldwrkl);
-		    dlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
-			    + 1], lda);
-/* L30: */
-		}
-
-	    } else if (wntqs) {
-
-/*
-                Path 3t (N much larger than M, JOBZ='S')
-                M right singular vectors to be computed in VT and
-                M left singular vectors to be computed in U
-*/
-
-		il = 1;
-
-/*              WORK(IL) is M by M */
-
-		ldwrkl = *m;
-		itau = il + ldwrkl * *m;
-		nwork = itau + *m;
-
-/*
-                Compute A=L*Q
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__2, &ierr);
-
-/*              Copy L to WORK(IL), zeroing out above it */
-
-		dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
-		i__2 = *m - 1;
-		i__1 = *m - 1;
-		dlaset_("U", &i__2, &i__1, &c_b2879, &c_b2879, &work[il +
-			ldwrkl], &ldwrkl);
-
-/*
-                Generate Q in A
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
-			 &i__2, &ierr);
-		ie = itau;
-		itauq = ie + *m;
-		itaup = itauq + *m;
-		nwork = itaup + *m;
-
-/*
-                Bidiagonalize L in WORK(IU), copying result to U
-                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in VT
-                (Workspace: need M+BDSPAC)
-*/
-
-		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
-
-/*
-                Overwrite U by left singular vectors of L and VT
-                by right singular vectors of L
-                (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
-		i__2 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
-			ierr);
-
-/*
-                Multiply right singular vectors of L in WORK(IL) by
-                Q in A, storing result in VT
-                (Workspace: need M*M)
-*/
-
-		dlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
-		dgemm_("N", "N", m, n, m, &c_b2865, &work[il], &ldwrkl, &a[
-			a_offset], lda, &c_b2879, &vt[vt_offset], ldvt);
-
-	    } else if (wntqa) {
-
-/*
-                Path 4t (N much larger than M, JOBZ='A')
-                N right singular vectors to be computed in VT and
-                M left singular vectors to be computed in U
-*/
-
-		ivt = 1;
-
-/*              WORK(IVT) is M by M */
-
-		ldwkvt = *m;
-		itau = ivt + ldwkvt * *m;
-		nwork = itau + *m;
-
-/*
-                Compute A=L*Q, copying result to VT
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__2, &ierr);
-		dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-
-/*
-                Generate Q in VT
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
-			nwork], &i__2, &ierr);
-
-/*              Produce L in A, zeroing out other entries */
-
-		i__2 = *m - 1;
-		i__1 = *m - 1;
-		dlaset_("U", &i__2, &i__1, &c_b2879, &c_b2879, &a[((a_dim1) <<
-			 (1)) + 1], lda);
-		ie = itau;
-		itauq = ie + *m;
-		itaup = itauq + *m;
-		nwork = itaup + *m;
-
-/*
-                Bidiagonalize L in A
-                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in WORK(IVT)
-                (Workspace: need M+M*M+BDSPAC)
-*/
-
-		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
-			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
-			, info);
-
-/*
-                Overwrite U by left singular vectors of L and WORK(IVT)
-                by right singular vectors of L
-                (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
-		i__2 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[
-			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
-			ierr);
-
-/*
-                Multiply right singular vectors of L in WORK(IVT) by
-                Q in VT, storing result in A
-                (Workspace: need M*M)
-*/
-
-		dgemm_("N", "N", m, n, m, &c_b2865, &work[ivt], &ldwkvt, &vt[
-			vt_offset], ldvt, &c_b2879, &a[a_offset], lda);
-
-/*              Copy right singular vectors of A from A to VT */
-
-		dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
-
-	    }
-
-	} else {
-
-/*
-             N .LT. MNTHR
-
-             Path 5t (N greater than M, but not much larger)
-             Reduce to bidiagonal form without LQ decomposition
-*/
-
-	    ie = 1;
-	    itauq = ie + *m;
-	    itaup = itauq + *m;
-	    nwork = itaup + *m;
-
-/*
-             Bidiagonalize A
-             (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
-*/
-
-	    i__2 = *lwork - nwork + 1;
-	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
-		    work[itaup], &work[nwork], &i__2, &ierr);
-	    if (wntqn) {
-
-/*
-                Perform bidiagonal SVD, only computing singular values
-                (Workspace: need M+BDSPAC)
-*/
-
-		dbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
-			 dum, idum, &work[nwork], &iwork[1], info);
-	    } else if (wntqo) {
-		ldwkvt = *m;
-		ivt = nwork;
-		if (*lwork >= *m * *n + *m * 3 + bdspac) {
-
-/*                 WORK( IVT ) is M by N */
-
-		    dlaset_("F", m, n, &c_b2879, &c_b2879, &work[ivt], &
-			    ldwkvt);
-		    nwork = ivt + ldwkvt * *n;
-		} else {
-
-/*                 WORK( IVT ) is M by M */
-
-		    nwork = ivt + ldwkvt * *m;
-		    il = nwork;
-
-/*                 WORK(IL) is M by CHUNK */
-
-		    chunk = (*lwork - *m * *m - *m * 3) / *m;
-		}
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in WORK(IVT)
-                (Workspace: need M*M+BDSPAC)
-*/
-
-		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
-			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
-			, info);
-
-/*
-                Overwrite U by left singular vectors of A
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
-
-		i__2 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
-
-		if (*lwork >= *m * *n + *m * 3 + bdspac) {
-
-/*
-                   Overwrite WORK(IVT) by left singular vectors of A
-                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
-
-		    i__2 = *lwork - nwork + 1;
-		    dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
-			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2,
-			    &ierr);
-
-/*                 Copy right singular vectors of A from WORK(IVT) to A */
-
-		    dlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
-		} else {
-
-/*
-                   Generate P**T in A
-                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
-
-		    i__2 = *lwork - nwork + 1;
-		    dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
-			    work[nwork], &i__2, &ierr);
-
-/*
-                   Multiply Q in A by right singular vectors of
-                   bidiagonal matrix in WORK(IVT), storing result in
-                   WORK(IL) and copying to A
-                   (Workspace: need 2*M*M, prefer M*M+M*N)
-*/
-
-		    i__2 = *n;
-		    i__1 = chunk;
-		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
-			     i__1) {
-/* Computing MIN */
-			i__3 = *n - i__ + 1;
-			blk = min(i__3,chunk);
-			dgemm_("N", "N", m, &blk, m, &c_b2865, &work[ivt], &
-				ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b2879, &
-				work[il], m);
-			dlacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 +
-				1], lda);
-/* L40: */
-		    }
-		}
-	    } else if (wntqs) {
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in VT
-                (Workspace: need M+BDSPAC)
-*/
-
-		dlaset_("F", m, n, &c_b2879, &c_b2879, &vt[vt_offset], ldvt);
-		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
-
-/*
-                Overwrite U by left singular vectors of A and VT
-                by right singular vectors of A
-                (Workspace: need 3*M, prefer 2*M+M*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-		i__1 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-	    } else if (wntqa) {
-
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in VT
-                (Workspace: need M+BDSPAC)
-*/
-
-		dlaset_("F", n, n, &c_b2879, &c_b2879, &vt[vt_offset], ldvt);
-		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
-
-/*              Set the right corner of VT to identity matrix */
-
-		i__1 = *n - *m;
-		i__2 = *n - *m;
-		dlaset_("F", &i__1, &i__2, &c_b2879, &c_b2865, &vt[*m + 1 + (*
-			m + 1) * vt_dim1], ldvt);
-
-/*
-                Overwrite U by left singular vectors of A and VT
-                by right singular vectors of A
-                (Workspace: need 2*M+N, prefer 2*M+N*NB)
-*/
-
-		i__1 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-		i__1 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-	    }
-
-	}
-
-    }
-
-/*     Undo scaling if necessary */
-
-    if (iscl == 1) {
-	if (anrm > bignum) {
-	    dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
-		    minmn, &ierr);
-	}
-	if (anrm < smlnum) {
-	    dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
-		    minmn, &ierr);
-	}
-    }
-
-/*     Return optimal workspace in WORK(1) */
-
-    work[1] = (doublereal) maxwrk;
-
-    return 0;
-
-/*     End of DGESDD */
-
-} /* dgesdd_ */
-
-/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer
-	*lda, integer *ipiv, doublereal *b, integer *ldb, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
-
-    /* Local variables */
-    extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), xerbla_(char *, integer *), dgetrs_(char *, integer *, integer *, doublereal *,
-	    integer *, integer *, doublereal *, integer *, integer *);
-
-
-/*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
-
-
-    Purpose
-    =======
-
-    DGESV computes the solution to a real system of linear equations
-       A * X = B,
-    where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
-
-    The LU decomposition with partial pivoting and row interchanges is
-    used to factor A as
-       A = P * L * U,
-    where P is a permutation matrix, L is unit lower triangular, and U is
-    upper triangular.  The factored form of A is then used to solve the
-    system of equations A * X = B.
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The number of linear equations, i.e., the order of the
-            matrix A.  N >= 0.
-
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrix B.  NRHS >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the N-by-N coefficient matrix A.
-            On exit, the factors L and U from the factorization
-            A = P*L*U; the unit diagonal elements of L are not stored.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    IPIV    (output) INTEGER array, dimension (N)
-            The pivot indices that define the permutation matrix P;
-            row i of the matrix was interchanged with row IPIV(i).
-
-    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
-            On entry, the N-by-NRHS matrix of right hand side matrix B.
-            On exit, if INFO = 0, the N-by-NRHS solution matrix X.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
-                  has been completed, but the factor U is exactly
-                  singular, so the solution could not be computed.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    *info = 0;
-    if (*n < 0) {
-	*info = -1;
-    } else if (*nrhs < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    } else if (*ldb < max(1,*n)) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGESV ", &i__1);
-	return 0;
-    }
-
-/*     Compute the LU factorization of A. */
-
-    dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
-    if (*info == 0) {
-
-/*        Solve the system A*X = B, overwriting B with X. */
-
-	dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
-		b_offset], ldb, info);
-    }
-    return 0;
-
-/*     End of DGESV */
-
-} /* dgesv_ */
-
-/* Subroutine */ int dgetf2_(integer *m, integer *n, doublereal *a, integer *
-	lda, integer *ipiv, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    doublereal d__1;
-
-    /* Local variables */
-    static integer j, jp;
-    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *), dscal_(integer *, doublereal *, doublereal *, integer
-	    *), dswap_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1992
-
-
-    Purpose
-    =======
-
-    DGETF2 computes an LU factorization of a general m-by-n matrix A
-    using partial pivoting with row interchanges.
-
-    The factorization has the form
-       A = P * L * U
-    where P is a permutation matrix, L is lower triangular with unit
-    diagonal elements (lower trapezoidal if m > n), and U is upper
-    triangular (upper trapezoidal if m < n).
-
-    This is the right-looking Level 2 BLAS version of the algorithm.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the m by n matrix to be factored.
-            On exit, the factors L and U from the factorization
-            A = P*L*U; the unit diagonal elements of L are not stored.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    IPIV    (output) INTEGER array, dimension (min(M,N))
-            The pivot indices; for 1 <= i <= min(M,N), row i of the
-            matrix was interchanged with row IPIV(i).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-            > 0: if INFO = k, U(k,k) is exactly zero. The factorization
-                 has been completed, but the factor U is exactly
-                 singular, and division by zero will occur if it is used
-                 to solve a system of equations.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGETF2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*m == 0) || (*n == 0)) {
-	return 0;
-    }
-
-    i__1 = min(*m,*n);
-    for (j = 1; j <= i__1; ++j) {
-
-/*        Find pivot and test for singularity. */
-
-	i__2 = *m - j + 1;
-	jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1);
-	ipiv[j] = jp;
-	if (a[jp + j * a_dim1] != 0.) {
-
-/*           Apply the interchange to columns 1:N. */
-
-	    if (jp != j) {
-		dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
-	    }
-
-/*           Compute elements J+1:M of J-th column. */
-
-	    if (j < *m) {
-		i__2 = *m - j;
-		d__1 = 1. / a[j + j * a_dim1];
-		dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
-	    }
-
-	} else if (*info == 0) {
-
-	    *info = j;
-	}
-
-	if (j < min(*m,*n)) {
-
-/*           Update trailing submatrix. */
-
-	    i__2 = *m - j;
-	    i__3 = *n - j;
-	    dger_(&i__2, &i__3, &c_b3001, &a[j + 1 + j * a_dim1], &c__1, &a[j
-		    + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1],
-		    lda);
-	}
-/* L10: */
-    }
-    return 0;
-
-/*     End of DGETF2 */
-
-} /* dgetf2_ */
-
-/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer *
-	lda, integer *ipiv, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-
-    /* Local variables */
-    static integer i__, j, jb, nb;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer iinfo;
-    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *), dgetf2_(
-	    integer *, integer *, doublereal *, integer *, integer *, integer
-	    *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *,
-	    integer *, integer *, integer *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
-
-
-    Purpose
-    =======
-
-    DGETRF computes an LU factorization of a general M-by-N matrix A
-    using partial pivoting with row interchanges.
-
-    The factorization has the form
-       A = P * L * U
-    where P is a permutation matrix, L is lower triangular with unit
-    diagonal elements (lower trapezoidal if m > n), and U is upper
-    triangular (upper trapezoidal if m < n).
-
-    This is the right-looking Level 3 BLAS version of the algorithm.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the M-by-N matrix to be factored.
-            On exit, the factors L and U from the factorization
-            A = P*L*U; the unit diagonal elements of L are not stored.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    IPIV    (output) INTEGER array, dimension (min(M,N))
-            The pivot indices; for 1 <= i <= min(M,N), row i of the
-            matrix was interchanged with row IPIV(i).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
-                  has been completed, but the factor U is exactly
-                  singular, and division by zero will occur if it is used
-                  to solve a system of equations.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGETRF", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*m == 0) || (*n == 0)) {
-	return 0;
-    }
-
-/*     Determine the block size for this environment. */
-
-    nb = ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
-	    1);
-    if ((nb <= 1) || (nb >= min(*m,*n))) {
-
-/*        Use unblocked code. */
-
-	dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
-    } else {
-
-/*        Use blocked code. */
-
-	i__1 = min(*m,*n);
-	i__2 = nb;
-	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
-/* Computing MIN */
-	    i__3 = min(*m,*n) - j + 1;
-	    jb = min(i__3,nb);
-
-/*
-             Factor diagonal and subdiagonal blocks and test for exact
-             singularity.
-*/
-
-	    i__3 = *m - j + 1;
-	    dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
-
-/*           Adjust INFO and the pivot indices. */
-
-	    if (*info == 0 && iinfo > 0) {
-		*info = iinfo + j - 1;
-	    }
-/* Computing MIN */
-	    i__4 = *m, i__5 = j + jb - 1;
-	    i__3 = min(i__4,i__5);
-	    for (i__ = j; i__ <= i__3; ++i__) {
-		ipiv[i__] = j - 1 + ipiv[i__];
-/* L10: */
-	    }
-
-/*           Apply interchanges to columns 1:J-1. */
-
-	    i__3 = j - 1;
-	    i__4 = j + jb - 1;
-	    dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
-
-	    if (j + jb <= *n) {
-
-/*              Apply interchanges to columns J+JB:N. */
-
-		i__3 = *n - j - jb + 1;
-		i__4 = j + jb - 1;
-		dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
-			ipiv[1], &c__1);
-
-/*              Compute block row of U. */
-
-		i__3 = *n - j - jb + 1;
-		dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
-			c_b2865, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
-			a_dim1], lda);
-		if (j + jb <= *m) {
-
-/*                 Update trailing submatrix. */
-
-		    i__3 = *m - j - jb + 1;
-		    i__4 = *n - j - jb + 1;
-		    dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
-			    &c_b3001, &a[j + jb + j * a_dim1], lda, &a[j + (j
-			    + jb) * a_dim1], lda, &c_b2865, &a[j + jb + (j +
-			    jb) * a_dim1], lda);
-		}
-	    }
-/* L20: */
-	}
-    }
-    return 0;
-
-/*     End of DGETRF */
-
-} /* dgetrf_ */
-
-/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs,
-	doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *
-	ldb, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
-
-    /* Local variables */
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *), xerbla_(
-	    char *, integer *), dlaswp_(integer *, doublereal *,
-	    integer *, integer *, integer *, integer *, integer *);
-    static logical notran;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
-
-
-    Purpose
-    =======
-
-    DGETRS solves a system of linear equations
-       A * X = B  or  A' * X = B
-    with a general N-by-N matrix A using the LU factorization computed
-    by DGETRF.
-
-    Arguments
-    =========
-
-    TRANS   (input) CHARACTER*1
-            Specifies the form of the system of equations:
-            = 'N':  A * X = B  (No transpose)
-            = 'T':  A'* X = B  (Transpose)
-            = 'C':  A'* X = B  (Conjugate transpose = Transpose)
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrix B.  NRHS >= 0.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            The factors L and U from the factorization A = P*L*U
-            as computed by DGETRF.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    IPIV    (input) INTEGER array, dimension (N)
-            The pivot indices from DGETRF; for 1<=i<=N, row i of the
-            matrix was interchanged with row IPIV(i).
-
-    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
-            On entry, the right hand side matrix B.
-            On exit, the solution matrix X.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    *info = 0;
-    notran = lsame_(trans, "N");
-    if (! notran && ! lsame_(trans, "T") && ! lsame_(
-	    trans, "C")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*nrhs < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*ldb < max(1,*n)) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGETRS", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*n == 0) || (*nrhs == 0)) {
-	return 0;
-    }
-
-    if (notran) {
-
-/*
-          Solve A * X = B.
-
-          Apply row interchanges to the right hand sides.
-*/
-
-	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
-
-/*        Solve L*X = B, overwriting B with X. */
-
-	dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b2865, &a[
-		a_offset], lda, &b[b_offset], ldb);
-
-/*        Solve U*X = B, overwriting B with X. */
-
-	dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b2865,
-		 &a[a_offset], lda, &b[b_offset], ldb);
-    } else {
-
-/*
-          Solve A' * X = B.
-
-          Solve U'*X = B, overwriting B with X.
-*/
-
-	dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b2865, &
-		a[a_offset], lda, &b[b_offset], ldb);
-
-/*        Solve L'*X = B, overwriting B with X. */
-
-	dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b2865, &a[
-		a_offset], lda, &b[b_offset], ldb);
-
-/*        Apply row interchanges to the solution vectors. */
-
-	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
-    }
-
-    return 0;
-
-/*     End of DGETRS */
-
-} /* dgetrs_ */
-
-/* Subroutine */ int dhseqr_(char *job, char *compz, integer *n, integer *ilo,
-	 integer *ihi, doublereal *h__, integer *ldh, doublereal *wr,
-	doublereal *wi, doublereal *z__, integer *ldz, doublereal *work,
-	integer *lwork, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4,
-	    i__5;
-    doublereal d__1, d__2;
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer i__, j, k, l;
-    static doublereal s[225]	/* was [15][15] */, v[16];
-    static integer i1, i2, ii, nh, nr, ns, nv;
-    static doublereal vv[16];
-    static integer itn;
-    static doublereal tau;
-    static integer its;
-    static doublereal ulp, tst1;
-    static integer maxb;
-    static doublereal absw;
-    static integer ierr;
-    static doublereal unfl, temp, ovfl;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
-    static integer itemp;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static logical initz, wantt, wantz;
-    extern doublereal dlapy2_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
-
-    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
-	     integer *, doublereal *);
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern doublereal dlanhs_(char *, integer *, doublereal *, integer *,
-	    doublereal *);
-    extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *,
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *, integer *, doublereal *, integer *,
-	    integer *), dlacpy_(char *, integer *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlaset_(char *,
-	    integer *, integer *, doublereal *, doublereal *, doublereal *,
-	    integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *), dlarfx_(
-	    char *, integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *, doublereal *);
-    static doublereal smlnum;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DHSEQR computes the eigenvalues of a real upper Hessenberg matrix H
-    and, optionally, the matrices T and Z from the Schur decomposition
-    H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur
-    form), and Z is the orthogonal matrix of Schur vectors.
-
-    Optionally Z may be postmultiplied into an input orthogonal matrix Q,
-    so that this routine can give the Schur factorization of a matrix A
-    which has been reduced to the Hessenberg form H by the orthogonal
-    matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
-
-    Arguments
-    =========
-
-    JOB     (input) CHARACTER*1
-            = 'E':  compute eigenvalues only;
-            = 'S':  compute eigenvalues and the Schur form T.
-
-    COMPZ   (input) CHARACTER*1
-            = 'N':  no Schur vectors are computed;
-            = 'I':  Z is initialized to the unit matrix and the matrix Z
-                    of Schur vectors of H is returned;
-            = 'V':  Z must contain an orthogonal matrix Q on entry, and
-                    the product Q*Z is returned.
-
-    N       (input) INTEGER
-            The order of the matrix H.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that H is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to DGEBAL, and then passed to SGEHRD
-            when the matrix output by DGEBAL is reduced to Hessenberg
-            form. Otherwise ILO and IHI should be set to 1 and N
-            respectively.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
-
-    H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
-            On entry, the upper Hessenberg matrix H.
-            On exit, if JOB = 'S', H contains the upper quasi-triangular
-            matrix T from the Schur decomposition (the Schur form);
-            2-by-2 diagonal blocks (corresponding to complex conjugate
-            pairs of eigenvalues) are returned in standard form, with
-            H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E',
-            the contents of H are unspecified on exit.
-
-    LDH     (input) INTEGER
-            The leading dimension of the array H. LDH >= max(1,N).
-
-    WR      (output) DOUBLE PRECISION array, dimension (N)
-    WI      (output) DOUBLE PRECISION array, dimension (N)
-            The real and imaginary parts, respectively, of the computed
-            eigenvalues. If two eigenvalues are computed as a complex
-            conjugate pair, they are stored in consecutive elements of
-            WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and
-            WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the
-            same order as on the diagonal of the Schur form returned in
-            H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
-            diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and
-            WI(i+1) = -WI(i).
-
-    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
-            If COMPZ = 'N': Z is not referenced.
-            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
-            contains the orthogonal matrix Z of the Schur vectors of H.
-            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
-            which is assumed to be equal to the unit matrix except for
-            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
-            Normally Q is the orthogonal matrix generated by DORGHR after
-            the call to DGEHRD which formed the Hessenberg matrix H.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.
-            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,N).
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, DHSEQR failed to compute all of the
-                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
-                  elements 1:ilo-1 and i+1:n of WR and WI contain those
-                  eigenvalues which have been successfully computed.
-
-    =====================================================================
-
-
-       Decode and test the input parameters
-*/
-
-    /* Parameter adjustments */
-    h_dim1 = *ldh;
-    h_offset = 1 + h_dim1;
-    h__ -= h_offset;
-    --wr;
-    --wi;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --work;
-
-    /* Function Body */
-    wantt = lsame_(job, "S");
-    initz = lsame_(compz, "I");
-    wantz = (initz) || (lsame_(compz, "V"));
-
-    *info = 0;
-    work[1] = (doublereal) max(1,*n);
-    lquery = *lwork == -1;
-    if (! lsame_(job, "E") && ! wantt) {
-	*info = -1;
-    } else if (! lsame_(compz, "N") && ! wantz) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
-	*info = -4;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
-	*info = -5;
-    } else if (*ldh < max(1,*n)) {
-	*info = -7;
-    } else if ((*ldz < 1) || (wantz && *ldz < max(1,*n))) {
-	*info = -11;
-    } else if (*lwork < max(1,*n) && ! lquery) {
-	*info = -13;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DHSEQR", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Initialize Z, if necessary */
-
-    if (initz) {
-	dlaset_("Full", n, n, &c_b2879, &c_b2865, &z__[z_offset], ldz);
-    }
-
-/*     Store the eigenvalues isolated by DGEBAL. */
-
-    i__1 = *ilo - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.;
-/* L10: */
-    }
-    i__1 = *n;
-    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
-	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.;
-/* L20: */
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-	return 0;
-    }
-    if (*ilo == *ihi) {
-	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
-	wi[*ilo] = 0.;
-	return 0;
-    }
-
-/*
-       Set rows and columns ILO to IHI to zero below the first
-       subdiagonal.
-*/
-
-    i__1 = *ihi - 2;
-    for (j = *ilo; j <= i__1; ++j) {
-	i__2 = *n;
-	for (i__ = j + 2; i__ <= i__2; ++i__) {
-	    h__[i__ + j * h_dim1] = 0.;
-/* L30: */
-	}
-/* L40: */
-    }
-    nh = *ihi - *ilo + 1;
-
-/*
-       Determine the order of the multi-shift QR algorithm to be used.
-
-   Writing concatenation
-*/
-    i__3[0] = 1, a__1[0] = job;
-    i__3[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-    ns = ilaenv_(&c__4, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-/* Writing concatenation */
-    i__3[0] = 1, a__1[0] = job;
-    i__3[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-    maxb = ilaenv_(&c__8, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-    if (((ns <= 2) || (ns > nh)) || (maxb >= nh)) {
-
-/*        Use the standard double-shift algorithm */
-
-	dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
-		1], ilo, ihi, &z__[z_offset], ldz, info);
-	return 0;
-    }
-    maxb = max(3,maxb);
-/* Computing MIN */
-    i__1 = min(ns,maxb);
-    ns = min(i__1,15);
-
-/*
-       Now 2 < NS <= MAXB < NH.
-
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
-
-    unfl = SAFEMINIMUM;
-    ovfl = 1. / unfl;
-    dlabad_(&unfl, &ovfl);
-    ulp = PRECISION;
-    smlnum = unfl * (nh / ulp);
-
-/*
-       I1 and I2 are the indices of the first row and last column of H
-       to which transformations must be applied. If eigenvalues only are
-       being computed, I1 and I2 are set inside the main loop.
-*/
-
-    if (wantt) {
-	i1 = 1;
-	i2 = *n;
-    }
-
-/*     ITN is the total number of multiple-shift QR iterations allowed. */
-
-    itn = nh * 30;
-
-/*
-       The main loop begins here. I is the loop index and decreases from
-       IHI to ILO in steps of at most MAXB. Each iteration of the loop
-       works with the active submatrix in rows and columns L to I.
-       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
-       H(L,L-1) is negligible so that the matrix splits.
-*/
-
-    i__ = *ihi;
-L50:
-    l = *ilo;
-    if (i__ < *ilo) {
-	goto L170;
-    }
-
-/*
-       Perform multiple-shift QR iterations on rows and columns ILO to I
-       until a submatrix of order at most MAXB splits off at the bottom
-       because a subdiagonal element has become negligible.
-*/
-
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
-
-/*        Look for a single small subdiagonal element. */
-
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
-		     h__[k + k * h_dim1], abs(d__2));
-	    if (tst1 == 0.) {
-		i__4 = i__ - l + 1;
-		tst1 = dlanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1]
-			);
-	    }
-/* Computing MAX */
-	    d__2 = ulp * tst1;
-	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2,
-		    smlnum)) {
-		goto L70;
-	    }
-/* L60: */
-	}
-L70:
-	l = k;
-	if (l > *ilo) {
-
-/*           H(L,L-1) is negligible. */
-
-	    h__[l + (l - 1) * h_dim1] = 0.;
-	}
-
-/*        Exit from loop if a submatrix of order <= MAXB has split off. */
-
-	if (l >= i__ - maxb + 1) {
-	    goto L160;
-	}
-
-/*
-          Now the active submatrix is in rows and columns L to I. If
-          eigenvalues only are being computed, only the active submatrix
-          need be transformed.
-*/
-
-	if (! wantt) {
-	    i1 = l;
-	    i2 = i__;
-	}
-
-	if ((its == 20) || (its == 30)) {
-
-/*           Exceptional shifts. */
-
-	    i__2 = i__;
-	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
-		wr[ii] = ((d__1 = h__[ii + (ii - 1) * h_dim1], abs(d__1)) + (
-			d__2 = h__[ii + ii * h_dim1], abs(d__2))) * 1.5;
-		wi[ii] = 0.;
-/* L80: */
-	    }
-	} else {
-
-/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
-
-	    dlacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
-		    h_dim1], ldh, s, &c__15);
-	    dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ -
-		    ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset],
-		    ldz, &ierr);
-	    if (ierr > 0) {
-
-/*
-                If DLAHQR failed to compute all NS eigenvalues, use the
-                unconverged diagonal elements as the remaining shifts.
-*/
-
-		i__2 = ierr;
-		for (ii = 1; ii <= i__2; ++ii) {
-		    wr[i__ - ns + ii] = s[ii + ii * 15 - 16];
-		    wi[i__ - ns + ii] = 0.;
-/* L90: */
-		}
-	    }
-	}
-
-/*
-          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
-          where G is the Hessenberg submatrix H(L:I,L:I) and w is
-          the vector of shifts (stored in WR and WI). The result is
-          stored in the local array V.
-*/
-
-	v[0] = 1.;
-	i__2 = ns + 1;
-	for (ii = 2; ii <= i__2; ++ii) {
-	    v[ii - 1] = 0.;
-/* L100: */
-	}
-	nv = 1;
-	i__2 = i__;
-	for (j = i__ - ns + 1; j <= i__2; ++j) {
-	    if (wi[j] >= 0.) {
-		if (wi[j] == 0.) {
-
-/*                 real shift */
-
-		    i__4 = nv + 1;
-		    dcopy_(&i__4, v, &c__1, vv, &c__1);
-		    i__4 = nv + 1;
-		    d__1 = -wr[j];
-		    dgemv_("No transpose", &i__4, &nv, &c_b2865, &h__[l + l *
-			    h_dim1], ldh, vv, &c__1, &d__1, v, &c__1);
-		    ++nv;
-		} else if (wi[j] > 0.) {
-
-/*                 complex conjugate pair of shifts */
-
-		    i__4 = nv + 1;
-		    dcopy_(&i__4, v, &c__1, vv, &c__1);
-		    i__4 = nv + 1;
-		    d__1 = wr[j] * -2.;
-		    dgemv_("No transpose", &i__4, &nv, &c_b2865, &h__[l + l *
-			    h_dim1], ldh, v, &c__1, &d__1, vv, &c__1);
-		    i__4 = nv + 1;
-		    itemp = idamax_(&i__4, vv, &c__1);
-/* Computing MAX */
-		    d__2 = (d__1 = vv[itemp - 1], abs(d__1));
-		    temp = 1. / max(d__2,smlnum);
-		    i__4 = nv + 1;
-		    dscal_(&i__4, &temp, vv, &c__1);
-		    absw = dlapy2_(&wr[j], &wi[j]);
-		    temp = temp * absw * absw;
-		    i__4 = nv + 2;
-		    i__5 = nv + 1;
-		    dgemv_("No transpose", &i__4, &i__5, &c_b2865, &h__[l + l
-			    * h_dim1], ldh, vv, &c__1, &temp, v, &c__1);
-		    nv += 2;
-		}
-
-/*
-                Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
-                reset it to the unit vector.
-*/
-
-		itemp = idamax_(&nv, v, &c__1);
-		temp = (d__1 = v[itemp - 1], abs(d__1));
-		if (temp == 0.) {
-		    v[0] = 1.;
-		    i__4 = nv;
-		    for (ii = 2; ii <= i__4; ++ii) {
-			v[ii - 1] = 0.;
-/* L110: */
-		    }
-		} else {
-		    temp = max(temp,smlnum);
-		    d__1 = 1. / temp;
-		    dscal_(&nv, &d__1, v, &c__1);
-		}
-	    }
-/* L120: */
-	}
-
-/*        Multiple-shift QR step */
-
-	i__2 = i__ - 1;
-	for (k = l; k <= i__2; ++k) {
-
-/*
-             The first iteration of this loop determines a reflection G
-             from the vector V and applies it from left and right to H,
-             thus creating a nonzero bulge below the subdiagonal.
-
-             Each subsequent iteration determines a reflection G to
-             restore the Hessenberg form in the (K-1)th column, and thus
-             chases the bulge one step toward the bottom of the active
-             submatrix. NR is the order of G.
-
-   Computing MIN
-*/
-	    i__4 = ns + 1, i__5 = i__ - k + 1;
-	    nr = min(i__4,i__5);
-	    if (k > l) {
-		dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
-	    }
-	    dlarfg_(&nr, v, &v[1], &c__1, &tau);
-	    if (k > l) {
-		h__[k + (k - 1) * h_dim1] = v[0];
-		i__4 = i__;
-		for (ii = k + 1; ii <= i__4; ++ii) {
-		    h__[ii + (k - 1) * h_dim1] = 0.;
-/* L130: */
-		}
-	    }
-	    v[0] = 1.;
-
-/*
-             Apply G from the left to transform the rows of the matrix in
-             columns K to I2.
-*/
-
-	    i__4 = i2 - k + 1;
-	    dlarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, &
-		    work[1]);
-
-/*
-             Apply G from the right to transform the columns of the
-             matrix in rows I1 to min(K+NR,I).
-
-   Computing MIN
-*/
-	    i__5 = k + nr;
-	    i__4 = min(i__5,i__) - i1 + 1;
-	    dlarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
-		    &work[1]);
-
-	    if (wantz) {
-
-/*              Accumulate transformations in the matrix Z */
-
-		dlarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
-			ldz, &work[1]);
-	    }
-/* L140: */
-	}
-
-/* L150: */
-    }
-
-/*     Failure to converge in remaining number of iterations */
-
-    *info = i__;
-    return 0;
-
-L160:
-
-/*
-       A submatrix of order <= MAXB in rows and columns L to I has split
-       off. Use the double-shift QR algorithm to handle it.
-*/
-
-    dlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1],
-	    ilo, ihi, &z__[z_offset], ldz, info);
-    if (*info > 0) {
-	return 0;
-    }
-
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with a new value of I.
-*/
-
-    itn -= its;
-    i__ = l - 1;
-    goto L50;
-
-L170:
-    work[1] = (doublereal) max(1,*n);
-    return 0;
-
-/*     End of DHSEQR */
-
-} /* dhseqr_ */
-
-/* Subroutine */ int dlabad_(doublereal *small, doublereal *large)
-{
-    /* Builtin functions */
-    double d_lg10(doublereal *), sqrt(doublereal);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLABAD takes as input the values computed by DLAMCH for underflow and
-    overflow, and returns the square root of each of these values if the
-    log of LARGE is sufficiently large.  This subroutine is intended to
-    identify machines with a large exponent range, such as the Crays, and
-    redefine the underflow and overflow limits to be the square roots of
-    the values computed by DLAMCH.  This subroutine is needed because
-    DLAMCH does not compensate for poor arithmetic in the upper half of
-    the exponent range, as is found on a Cray.
-
-    Arguments
-    =========
-
-    SMALL   (input/output) DOUBLE PRECISION
-            On entry, the underflow threshold as computed by DLAMCH.
-            On exit, if LOG10(LARGE) is sufficiently large, the square
-            root of SMALL, otherwise unchanged.
-
-    LARGE   (input/output) DOUBLE PRECISION
-            On entry, the overflow threshold as computed by DLAMCH.
-            On exit, if LOG10(LARGE) is sufficiently large, the square
-            root of LARGE, otherwise unchanged.
-
-    =====================================================================
-
-
-       If it looks like we're on a Cray, take the square root of
-       SMALL and LARGE to avoid overflow and underflow problems.
-*/
-
-    if (d_lg10(large) > 2e3) {
-	*small = sqrt(*small);
-	*large = sqrt(*large);
-    }
-
-    return 0;
-
-/*     End of DLABAD */
-
-} /* dlabad_ */
-
-/* Subroutine */ int dlabrd_(integer *m, integer *n, integer *nb, doublereal *
-	a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq,
-	doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer
-	*ldy)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2,
-	    i__3;
-
-    /* Local variables */
-    static integer i__;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *), dgemv_(char *, integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *), dlarfg_(integer *, doublereal *,
-	     doublereal *, integer *, doublereal *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DLABRD reduces the first NB rows and columns of a real general
-    m by n matrix A to upper or lower bidiagonal form by an orthogonal
-    transformation Q' * A * P, and returns the matrices X and Y which
-    are needed to apply the transformation to the unreduced part of A.
-
-    If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
-    bidiagonal form.
-
-    This is an auxiliary routine called by DGEBRD
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows in the matrix A.
-
-    N       (input) INTEGER
-            The number of columns in the matrix A.
-
-    NB      (input) INTEGER
-            The number of leading rows and columns of A to be reduced.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the m by n general matrix to be reduced.
-            On exit, the first NB rows and columns of the matrix are
-            overwritten; the rest of the array is unchanged.
-            If m >= n, elements on and below the diagonal in the first NB
-              columns, with the array TAUQ, represent the orthogonal
-              matrix Q as a product of elementary reflectors; and
-              elements above the diagonal in the first NB rows, with the
-              array TAUP, represent the orthogonal matrix P as a product
-              of elementary reflectors.
-            If m < n, elements below the diagonal in the first NB
-              columns, with the array TAUQ, represent the orthogonal
-              matrix Q as a product of elementary reflectors, and
-              elements on and above the diagonal in the first NB rows,
-              with the array TAUP, represent the orthogonal matrix P as
-              a product of elementary reflectors.
-            See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    D       (output) DOUBLE PRECISION array, dimension (NB)
-            The diagonal elements of the first NB rows and columns of
-            the reduced matrix.  D(i) = A(i,i).
-
-    E       (output) DOUBLE PRECISION array, dimension (NB)
-            The off-diagonal elements of the first NB rows and columns of
-            the reduced matrix.
-
-    TAUQ    (output) DOUBLE PRECISION array dimension (NB)
-            The scalar factors of the elementary reflectors which
-            represent the orthogonal matrix Q. See Further Details.
-
-    TAUP    (output) DOUBLE PRECISION array, dimension (NB)
-            The scalar factors of the elementary reflectors which
-            represent the orthogonal matrix P. See Further Details.
-
-    X       (output) DOUBLE PRECISION array, dimension (LDX,NB)
-            The m-by-nb matrix X required to update the unreduced part
-            of A.
-
-    LDX     (input) INTEGER
-            The leading dimension of the array X. LDX >= M.
-
-    Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
-            The n-by-nb matrix Y required to update the unreduced part
-            of A.
-
-    LDY     (output) INTEGER
-            The leading dimension of the array Y. LDY >= N.
-
-    Further Details
-    ===============
-
-    The matrices Q and P are represented as products of elementary
-    reflectors:
-
-       Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
-
-    Each H(i) and G(i) has the form:
-
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
-
-    where tauq and taup are real scalars, and v and u are real vectors.
-
-    If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
-    A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
-    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
-
-    If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
-    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
-    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
-
-    The elements of the vectors v and u together form the m-by-nb matrix
-    V and the nb-by-n matrix U' which are needed, with X and Y, to apply
-    the transformation to the unreduced part of the matrix, using a block
-    update of the form:  A := A - V*Y' - X*U'.
-
-    The contents of A on exit are illustrated by the following examples
-    with nb = 2:
-
-    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
-
-      (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
-      (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
-      (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
-      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
-      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
-      (  v1  v2  a   a   a  )
-
-    where a denotes an element of the original matrix which is unchanged,
-    vi denotes an element of the vector defining H(i), and ui an element
-    of the vector defining G(i).
-
-    =====================================================================
-
-
-       Quick return if possible
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --d__;
-    --e;
-    --tauq;
-    --taup;
-    x_dim1 = *ldx;
-    x_offset = 1 + x_dim1;
-    x -= x_offset;
-    y_dim1 = *ldy;
-    y_offset = 1 + y_dim1;
-    y -= y_offset;
-
-    /* Function Body */
-    if ((*m <= 0) || (*n <= 0)) {
-	return 0;
-    }
-
-    if (*m >= *n) {
-
-/*        Reduce to upper bidiagonal form */
-
-	i__1 = *nb;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*           Update A(i:m,i) */
-
-	    i__2 = *m - i__ + 1;
-	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[i__ + a_dim1],
-		    lda, &y[i__ + y_dim1], ldy, &c_b2865, &a[i__ + i__ *
-		    a_dim1], &c__1);
-	    i__2 = *m - i__ + 1;
-	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &x[i__ + x_dim1],
-		    ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b2865, &a[i__ + i__ *
-		     a_dim1], &c__1);
-
-/*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
-
-	    i__2 = *m - i__ + 1;
-/* Computing MIN */
-	    i__3 = i__ + 1;
-	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
-		    a_dim1], &c__1, &tauq[i__]);
-	    d__[i__] = a[i__ + i__ * a_dim1];
-	    if (i__ < *n) {
-		a[i__ + i__ * a_dim1] = 1.;
-
-/*              Compute Y(i+1:n,i) */
-
-		i__2 = *m - i__ + 1;
-		i__3 = *n - i__;
-		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[i__ + (i__ + 1)
-			 * a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &
-			c_b2879, &y[i__ + 1 + i__ * y_dim1], &c__1)
-			;
-		i__2 = *m - i__ + 1;
-		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[i__ + a_dim1],
-			lda, &a[i__ + i__ * a_dim1], &c__1, &c_b2879, &y[i__ *
-			 y_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &y[i__ + 1 +
-			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b2865, &
-			y[i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *m - i__ + 1;
-		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &x[i__ + x_dim1],
-			ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b2879, &y[i__ *
-			 y_dim1 + 1], &c__1);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		dgemv_("Transpose", &i__2, &i__3, &c_b3001, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
-			c_b2865, &y[i__ + 1 + i__ * y_dim1], &c__1)
-			;
-		i__2 = *n - i__;
-		dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
-
-/*              Update A(i,i+1:n) */
-
-		i__2 = *n - i__;
-		dgemv_("No transpose", &i__2, &i__, &c_b3001, &y[i__ + 1 +
-			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b2865, &a[i__
-			+ (i__ + 1) * a_dim1], lda);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		dgemv_("Transpose", &i__2, &i__3, &c_b3001, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b2865, &a[
-			i__ + (i__ + 1) * a_dim1], lda);
-
-/*              Generate reflection P(i) to annihilate A(i,i+2:n) */
-
-		i__2 = *n - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
-			i__3,*n) * a_dim1], lda, &taup[i__]);
-		e[i__] = a[i__ + (i__ + 1) * a_dim1];
-		a[i__ + (i__ + 1) * a_dim1] = 1.;
-
-/*              Compute X(i+1:m,i) */
-
-		i__2 = *m - i__;
-		i__3 = *n - i__;
-		dgemv_("No transpose", &i__2, &i__3, &c_b2865, &a[i__ + 1 + (
-			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
-			 lda, &c_b2879, &x[i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *n - i__;
-		dgemv_("Transpose", &i__2, &i__, &c_b2865, &y[i__ + 1 +
-			y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b2879, &x[i__ * x_dim1 + 1], &c__1);
-		i__2 = *m - i__;
-		dgemv_("No transpose", &i__2, &i__, &c_b3001, &a[i__ + 1 +
-			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b2865, &
-			x[i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		dgemv_("No transpose", &i__2, &i__3, &c_b2865, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b2879, &x[i__ * x_dim1 + 1], &c__1);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &x[i__ + 1 +
-			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b2865, &
-			x[i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *m - i__;
-		dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
-	    }
-/* L10: */
-	}
-    } else {
-
-/*        Reduce to lower bidiagonal form */
-
-	i__1 = *nb;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*           Update A(i,i:n) */
-
-	    i__2 = *n - i__ + 1;
-	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &y[i__ + y_dim1],
-		    ldy, &a[i__ + a_dim1], lda, &c_b2865, &a[i__ + i__ *
-		    a_dim1], lda);
-	    i__2 = i__ - 1;
-	    i__3 = *n - i__ + 1;
-	    dgemv_("Transpose", &i__2, &i__3, &c_b3001, &a[i__ * a_dim1 + 1],
-		    lda, &x[i__ + x_dim1], ldx, &c_b2865, &a[i__ + i__ *
-		    a_dim1], lda);
-
-/*           Generate reflection P(i) to annihilate A(i,i+1:n) */
-
-	    i__2 = *n - i__ + 1;
-/* Computing MIN */
-	    i__3 = i__ + 1;
-	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
-		    a_dim1], lda, &taup[i__]);
-	    d__[i__] = a[i__ + i__ * a_dim1];
-	    if (i__ < *m) {
-		a[i__ + i__ * a_dim1] = 1.;
-
-/*              Compute X(i+1:m,i) */
-
-		i__2 = *m - i__;
-		i__3 = *n - i__ + 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b2865, &a[i__ + 1 +
-			i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &
-			c_b2879, &x[i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *n - i__ + 1;
-		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &y[i__ + y_dim1],
-			ldy, &a[i__ + i__ * a_dim1], lda, &c_b2879, &x[i__ *
-			x_dim1 + 1], &c__1);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[i__ + 1 +
-			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b2865, &
-			x[i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = i__ - 1;
-		i__3 = *n - i__ + 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b2865, &a[i__ *
-			a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, &
-			c_b2879, &x[i__ * x_dim1 + 1], &c__1);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &x[i__ + 1 +
-			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b2865, &
-			x[i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *m - i__;
-		dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
-
-/*              Update A(i+1:m,i) */
-
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[i__ + 1 +
-			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b2865, &a[i__
-			+ 1 + i__ * a_dim1], &c__1);
-		i__2 = *m - i__;
-		dgemv_("No transpose", &i__2, &i__, &c_b3001, &x[i__ + 1 +
-			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b2865, &
-			a[i__ + 1 + i__ * a_dim1], &c__1);
-
-/*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
-
-		i__2 = *m - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
-			i__ * a_dim1], &c__1, &tauq[i__]);
-		e[i__] = a[i__ + 1 + i__ * a_dim1];
-		a[i__ + 1 + i__ * a_dim1] = 1.;
-
-/*              Compute Y(i+1:n,i) */
-
-		i__2 = *m - i__;
-		i__3 = *n - i__;
-		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[i__ + 1 + (i__
-			+ 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &
-			c__1, &c_b2879, &y[i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[i__ + 1 +
-			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b2879, &y[i__ * y_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &y[i__ + 1 +
-			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b2865, &
-			y[i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *m - i__;
-		dgemv_("Transpose", &i__2, &i__, &c_b2865, &x[i__ + 1 +
-			x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b2879, &y[i__ * y_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		dgemv_("Transpose", &i__, &i__2, &c_b3001, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
-			c_b2865, &y[i__ + 1 + i__ * y_dim1], &c__1)
-			;
-		i__2 = *n - i__;
-		dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
-	    }
-/* L20: */
-	}
-    }
-    return 0;
-
-/*     End of DLABRD */
-
-} /* dlabrd_ */
-
-/* Subroutine */ int dlacpy_(char *uplo, integer *m, integer *n, doublereal *
-	a, integer *lda, doublereal *b, integer *ldb)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j;
-    extern logical lsame_(char *, char *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DLACPY copies all or part of a two-dimensional matrix A to another
-    matrix B.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies the part of the matrix A to be copied to B.
-            = 'U':      Upper triangular part
-            = 'L':      Lower triangular part
-            Otherwise:  All of the matrix A
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            The m by n matrix A.  If UPLO = 'U', only the upper triangle
-            or trapezoid is accessed; if UPLO = 'L', only the lower
-            triangle or trapezoid is accessed.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    B       (output) DOUBLE PRECISION array, dimension (LDB,N)
-            On exit, B = A in the locations specified by UPLO.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,M).
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    if (lsame_(uplo, "U")) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = min(j,*m);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else if (lsame_(uplo, "L")) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = j; i__ <= i__2; ++i__) {
-		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
-/* L30: */
-	    }
-/* L40: */
-	}
-    } else {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
-/* L50: */
-	    }
-/* L60: */
-	}
-    }
-    return 0;
-
-/*     End of DLACPY */
-
-} /* dlacpy_ */
-
-/* Subroutine */ int dladiv_(doublereal *a, doublereal *b, doublereal *c__,
-	doublereal *d__, doublereal *p, doublereal *q)
-{
-    static doublereal e, f;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLADIV performs complex division in  real arithmetic
-
-                          a + i*b
-               p + i*q = ---------
-                          c + i*d
-
-    The algorithm is due to Robert L. Smith and can be found
-    in D. Knuth, The art of Computer Programming, Vol.2, p.195
-
-    Arguments
-    =========
-
-    A       (input) DOUBLE PRECISION
-    B       (input) DOUBLE PRECISION
-    C       (input) DOUBLE PRECISION
-    D       (input) DOUBLE PRECISION
-            The scalars a, b, c, and d in the above expression.
-
-    P       (output) DOUBLE PRECISION
-    Q       (output) DOUBLE PRECISION
-            The scalars p and q in the above expression.
-
-    =====================================================================
-*/
-
-
-    if (abs(*d__) < abs(*c__)) {
-	e = *d__ / *c__;
-	f = *c__ + *d__ * e;
-	*p = (*a + *b * e) / f;
-	*q = (*b - *a * e) / f;
-    } else {
-	e = *c__ / *d__;
-	f = *d__ + *c__ * e;
-	*p = (*b + *a * e) / f;
-	*q = (-(*a) + *b * e) / f;
-    }
-
-    return 0;
-
-/*     End of DLADIV */
-
-} /* dladiv_ */
-
-/* Subroutine */ int dlae2_(doublereal *a, doublereal *b, doublereal *c__,
-	doublereal *rt1, doublereal *rt2)
-{
-    /* System generated locals */
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal ab, df, tb, sm, rt, adf, acmn, acmx;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
-       [  A   B  ]
-       [  B   C  ].
-    On return, RT1 is the eigenvalue of larger absolute value, and RT2
-    is the eigenvalue of smaller absolute value.
-
-    Arguments
-    =========
-
-    A       (input) DOUBLE PRECISION
-            The (1,1) element of the 2-by-2 matrix.
-
-    B       (input) DOUBLE PRECISION
-            The (1,2) and (2,1) elements of the 2-by-2 matrix.
-
-    C       (input) DOUBLE PRECISION
-            The (2,2) element of the 2-by-2 matrix.
-
-    RT1     (output) DOUBLE PRECISION
-            The eigenvalue of larger absolute value.
-
-    RT2     (output) DOUBLE PRECISION
-            The eigenvalue of smaller absolute value.
-
-    Further Details
-    ===============
-
-    RT1 is accurate to a few ulps barring over/underflow.
-
-    RT2 may be inaccurate if there is massive cancellation in the
-    determinant A*C-B*B; higher precision or correctly rounded or
-    correctly truncated arithmetic would be needed to compute RT2
-    accurately in all cases.
-
-    Overflow is possible only if RT1 is within a factor of 5 of overflow.
-    Underflow is harmless if the input data is 0 or exceeds
-       underflow_threshold / macheps.
-
-   =====================================================================
-
-
-       Compute the eigenvalues
-*/
-
-    sm = *a + *c__;
-    df = *a - *c__;
-    adf = abs(df);
-    tb = *b + *b;
-    ab = abs(tb);
-    if (abs(*a) > abs(*c__)) {
-	acmx = *a;
-	acmn = *c__;
-    } else {
-	acmx = *c__;
-	acmn = *a;
-    }
-    if (adf > ab) {
-/* Computing 2nd power */
-	d__1 = ab / adf;
-	rt = adf * sqrt(d__1 * d__1 + 1.);
-    } else if (adf < ab) {
-/* Computing 2nd power */
-	d__1 = adf / ab;
-	rt = ab * sqrt(d__1 * d__1 + 1.);
-    } else {
-
-/*        Includes case AB=ADF=0 */
-
-	rt = ab * sqrt(2.);
-    }
-    if (sm < 0.) {
-	*rt1 = (sm - rt) * .5;
-
-/*
-          Order of execution important.
-          To get fully accurate smaller eigenvalue,
-          next line needs to be executed in higher precision.
-*/
-
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else if (sm > 0.) {
-	*rt1 = (sm + rt) * .5;
-
-/*
-          Order of execution important.
-          To get fully accurate smaller eigenvalue,
-          next line needs to be executed in higher precision.
-*/
-
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else {
-
-/*        Includes case RT1 = RT2 = 0 */
-
-	*rt1 = rt * .5;
-	*rt2 = rt * -.5;
-    }
-    return 0;
-
-/*     End of DLAE2 */
-
-} /* dlae2_ */
-
-/* Subroutine */ int dlaed0_(integer *icompq, integer *qsiz, integer *n,
-	doublereal *d__, doublereal *e, doublereal *q, integer *ldq,
-	doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork,
-	integer *info)
-{
-    /* System generated locals */
-    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double log(doublereal);
-    integer pow_ii(integer *, integer *);
-
-    /* Local variables */
-    static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2;
-    static doublereal temp;
-    static integer curr;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer iperm;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer indxq, iwrem;
-    extern /* Subroutine */ int dlaed1_(integer *, doublereal *, doublereal *,
-	     integer *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, integer *);
-    static integer iqptr;
-    extern /* Subroutine */ int dlaed7_(integer *, integer *, integer *,
-	    integer *, integer *, integer *, doublereal *, doublereal *,
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, integer *, integer *, integer *, integer *, doublereal
-	    *, doublereal *, integer *, integer *);
-    static integer tlvls;
-    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *);
-    static integer igivcl;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer igivnm, submat, curprb, subpbs, igivpt;
-    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *);
-    static integer curlvl, matsiz, iprmpt, smlsiz;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLAED0 computes all eigenvalues and corresponding eigenvectors of a
-    symmetric tridiagonal matrix using the divide and conquer method.
-
-    Arguments
-    =========
-
-    ICOMPQ  (input) INTEGER
-            = 0:  Compute eigenvalues only.
-            = 1:  Compute eigenvectors of original dense symmetric matrix
-                  also.  On entry, Q contains the orthogonal matrix used
-                  to reduce the original matrix to tridiagonal form.
-            = 2:  Compute eigenvalues and eigenvectors of tridiagonal
-                  matrix.
-
-    QSIZ   (input) INTEGER
-           The dimension of the orthogonal matrix used to reduce
-           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
-
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry, the main diagonal of the tridiagonal matrix.
-           On exit, its eigenvalues.
-
-    E      (input) DOUBLE PRECISION array, dimension (N-1)
-           The off-diagonal elements of the tridiagonal matrix.
-           On exit, E has been destroyed.
-
-    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
-           On entry, Q must contain an N-by-N orthogonal matrix.
-           If ICOMPQ = 0    Q is not referenced.
-           If ICOMPQ = 1    On entry, Q is a subset of the columns of the
-                            orthogonal matrix used to reduce the full
-                            matrix to tridiagonal form corresponding to
-                            the subset of the full matrix which is being
-                            decomposed at this time.
-           If ICOMPQ = 2    On entry, Q will be the identity matrix.
-                            On exit, Q contains the eigenvectors of the
-                            tridiagonal matrix.
-
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  If eigenvectors are
-           desired, then  LDQ >= max(1,N).  In any case,  LDQ >= 1.
-
-    QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N)
-           Referenced only when ICOMPQ = 1.  Used to store parts of
-           the eigenvector matrix when the updating matrix multiplies
-           take place.
-
-    LDQS   (input) INTEGER
-           The leading dimension of the array QSTORE.  If ICOMPQ = 1,
-           then  LDQS >= max(1,N).  In any case,  LDQS >= 1.
-
-    WORK   (workspace) DOUBLE PRECISION array,
-           If ICOMPQ = 0 or 1, the dimension of WORK must be at least
-                       1 + 3*N + 2*N*lg N + 2*N**2
-                       ( lg( N ) = smallest integer k
-                                   such that 2^k >= N )
-           If ICOMPQ = 2, the dimension of WORK must be at least
-                       4*N + N**2.
-
-    IWORK  (workspace) INTEGER array,
-           If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
-                          6 + 6*N + 5*N*lg N.
-                          ( lg( N ) = smallest integer k
-                                      such that 2^k >= N )
-           If ICOMPQ = 2, the dimension of IWORK must be at least
-                          3 + 5*N.
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The algorithm failed to compute an eigenvalue while
-                  working on the submatrix lying in rows and columns
-                  INFO/(N+1) through mod(INFO,N+1).
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    qstore_dim1 = *ldqs;
-    qstore_offset = 1 + qstore_dim1;
-    qstore -= qstore_offset;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-    if ((*icompq < 0) || (*icompq > 2)) {
-	*info = -1;
-    } else if (*icompq == 1 && *qsiz < max(0,*n)) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*ldq < max(1,*n)) {
-	*info = -7;
-    } else if (*ldqs < max(1,*n)) {
-	*info = -9;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAED0", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    smlsiz = ilaenv_(&c__9, "DLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-
-/*
-       Determine the size and placement of the submatrices, and save in
-       the leading elements of IWORK.
-*/
-
-    iwork[1] = *n;
-    subpbs = 1;
-    tlvls = 0;
-L10:
-    if (iwork[subpbs] > smlsiz) {
-	for (j = subpbs; j >= 1; --j) {
-	    iwork[j * 2] = (iwork[j] + 1) / 2;
-	    iwork[((j) << (1)) - 1] = iwork[j] / 2;
-/* L20: */
-	}
-	++tlvls;
-	subpbs <<= 1;
-	goto L10;
-    }
-    i__1 = subpbs;
-    for (j = 2; j <= i__1; ++j) {
-	iwork[j] += iwork[j - 1];
-/* L30: */
-    }
-
-/*
-       Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
-       using rank-1 modifications (cuts).
-*/
-
-    spm1 = subpbs - 1;
-    i__1 = spm1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	submat = iwork[i__] + 1;
-	smm1 = submat - 1;
-	d__[smm1] -= (d__1 = e[smm1], abs(d__1));
-	d__[submat] -= (d__1 = e[smm1], abs(d__1));
-/* L40: */
-    }
-
-    indxq = ((*n) << (2)) + 3;
-    if (*icompq != 2) {
-
-/*
-          Set up workspaces for eigenvalues only/accumulate new vectors
-          routine
-*/
-
-	temp = log((doublereal) (*n)) / log(2.);
-	lgn = (integer) temp;
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	iprmpt = indxq + *n + 1;
-	iperm = iprmpt + *n * lgn;
-	iqptr = iperm + *n * lgn;
-	igivpt = iqptr + *n + 2;
-	igivcl = igivpt + *n * lgn;
-
-	igivnm = 1;
-	iq = igivnm + ((*n) << (1)) * lgn;
-/* Computing 2nd power */
-	i__1 = *n;
-	iwrem = iq + i__1 * i__1 + 1;
-
-/*        Initialize pointers */
-
-	i__1 = subpbs;
-	for (i__ = 0; i__ <= i__1; ++i__) {
-	    iwork[iprmpt + i__] = 1;
-	    iwork[igivpt + i__] = 1;
-/* L50: */
-	}
-	iwork[iqptr] = 1;
-    }
-
-/*
-       Solve each submatrix eigenproblem at the bottom of the divide and
-       conquer tree.
-*/
-
-    curr = 0;
-    i__1 = spm1;
-    for (i__ = 0; i__ <= i__1; ++i__) {
-	if (i__ == 0) {
-	    submat = 1;
-	    matsiz = iwork[1];
-	} else {
-	    submat = iwork[i__] + 1;
-	    matsiz = iwork[i__ + 1] - iwork[i__];
-	}
-	if (*icompq == 2) {
-	    dsteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat +
-		    submat * q_dim1], ldq, &work[1], info);
-	    if (*info != 0) {
-		goto L130;
-	    }
-	} else {
-	    dsteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 +
-		    iwork[iqptr + curr]], &matsiz, &work[1], info);
-	    if (*info != 0) {
-		goto L130;
-	    }
-	    if (*icompq == 1) {
-		dgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b2865, &q[submat *
-			 q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]]
-			, &matsiz, &c_b2879, &qstore[submat * qstore_dim1 + 1]
-			, ldqs);
-	    }
-/* Computing 2nd power */
-	    i__2 = matsiz;
-	    iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
-	    ++curr;
-	}
-	k = 1;
-	i__2 = iwork[i__ + 1];
-	for (j = submat; j <= i__2; ++j) {
-	    iwork[indxq + j] = k;
-	    ++k;
-/* L60: */
-	}
-/* L70: */
-    }
-
-/*
-       Successively merge eigensystems of adjacent submatrices
-       into eigensystem for the corresponding larger matrix.
-
-       while ( SUBPBS > 1 )
-*/
-
-    curlvl = 1;
-L80:
-    if (subpbs > 1) {
-	spm2 = subpbs - 2;
-	i__1 = spm2;
-	for (i__ = 0; i__ <= i__1; i__ += 2) {
-	    if (i__ == 0) {
-		submat = 1;
-		matsiz = iwork[2];
-		msd2 = iwork[1];
-		curprb = 0;
-	    } else {
-		submat = iwork[i__] + 1;
-		matsiz = iwork[i__ + 2] - iwork[i__];
-		msd2 = matsiz / 2;
-		++curprb;
-	    }
-
-/*
-       Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
-       into an eigensystem of size MATSIZ.
-       DLAED1 is used only for the full eigensystem of a tridiagonal
-       matrix.
-       DLAED7 handles the cases in which eigenvalues only or eigenvalues
-       and eigenvectors of a full symmetric matrix (which was reduced to
-       tridiagonal form) are desired.
-*/
-
-	    if (*icompq == 2) {
-		dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1],
-			ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], &
-			msd2, &work[1], &iwork[subpbs + 1], info);
-	    } else {
-		dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[
-			submat], &qstore[submat * qstore_dim1 + 1], ldqs, &
-			iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &
-			work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm]
-			, &iwork[igivpt], &iwork[igivcl], &work[igivnm], &
-			work[iwrem], &iwork[subpbs + 1], info);
-	    }
-	    if (*info != 0) {
-		goto L130;
-	    }
-	    iwork[i__ / 2 + 1] = iwork[i__ + 2];
-/* L90: */
-	}
-	subpbs /= 2;
-	++curlvl;
-	goto L80;
-    }
-
-/*
-       end while
-
-       Re-merge the eigenvalues/vectors which were deflated at the final
-       merge step.
-*/
-
-    if (*icompq == 1) {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    j = iwork[indxq + i__];
-	    work[i__] = d__[j];
-	    dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1
-		    + 1], &c__1);
-/* L100: */
-	}
-	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
-    } else if (*icompq == 2) {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    j = iwork[indxq + i__];
-	    work[i__] = d__[j];
-	    dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
-/* L110: */
-	}
-	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
-	dlacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq);
-    } else {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    j = iwork[indxq + i__];
-	    work[i__] = d__[j];
-/* L120: */
-	}
-	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
-    }
-    goto L140;
-
-L130:
-    *info = submat * (*n + 1) + submat + matsiz - 1;
-
-L140:
-    return 0;
-
-/*     End of DLAED0 */
-
-} /* dlaed0_ */
-
-/* Subroutine */ int dlaed1_(integer *n, doublereal *d__, doublereal *q,
-	integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt,
-	doublereal *work, integer *iwork, integer *info)
-{
-    /* System generated locals */
-    integer q_dim1, q_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer indxp;
-    extern /* Subroutine */ int dlaed2_(integer *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
-	     integer *, integer *, integer *, integer *), dlaed3_(integer *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, doublereal *, doublereal *, integer *, integer *,
-	    doublereal *, doublereal *, integer *);
-    static integer idlmda;
-    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), xerbla_(char *, integer *);
-    static integer coltyp;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLAED1 computes the updated eigensystem of a diagonal
-    matrix after modification by a rank-one symmetric matrix.  This
-    routine is used only for the eigenproblem which requires all
-    eigenvalues and eigenvectors of a tridiagonal matrix.  DLAED7 handles
-    the case in which eigenvalues only or eigenvalues and eigenvectors
-    of a full symmetric matrix (which was reduced to tridiagonal form)
-    are desired.
-
-      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
-
-       where Z = Q'u, u is a vector of length N with ones in the
-       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
-
-       The eigenvectors of the original matrix are stored in Q, and the
-       eigenvalues are in D.  The algorithm consists of three stages:
-
-          The first stage consists of deflating the size of the problem
-          when there are multiple eigenvalues or if there is a zero in
-          the Z vector.  For each such occurence the dimension of the
-          secular equation problem is reduced by one.  This stage is
-          performed by the routine DLAED2.
-
-          The second stage consists of calculating the updated
-          eigenvalues. This is done by finding the roots of the secular
-          equation via the routine DLAED4 (as called by DLAED3).
-          This routine also calculates the eigenvectors of the current
-          problem.
-
-          The final stage consists of computing the updated eigenvectors
-          directly using the updated eigenvalues.  The eigenvectors for
-          the current problem are multiplied with the eigenvectors from
-          the overall problem.
-
-    Arguments
-    =========
-
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry, the eigenvalues of the rank-1-perturbed matrix.
-           On exit, the eigenvalues of the repaired matrix.
-
-    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
-           On entry, the eigenvectors of the rank-1-perturbed matrix.
-           On exit, the eigenvectors of the repaired tridiagonal matrix.
-
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max(1,N).
-
-    INDXQ  (input/output) INTEGER array, dimension (N)
-           On entry, the permutation which separately sorts the two
-           subproblems in D into ascending order.
-           On exit, the permutation which will reintegrate the
-           subproblems back into sorted order,
-           i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
-
-    RHO    (input) DOUBLE PRECISION
-           The subdiagonal entry used to create the rank-1 modification.
-
-    CUTPNT (input) INTEGER
-           The location of the last eigenvalue in the leading sub-matrix.
-           min(1,N) <= CUTPNT <= N/2.
-
-    WORK   (workspace) DOUBLE PRECISION array, dimension (4*N + N**2)
-
-    IWORK  (workspace) INTEGER array, dimension (4*N)
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an eigenvalue did not converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-    Modified by Francoise Tisseur, University of Tennessee.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --indxq;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*n < 0) {
-	*info = -1;
-    } else if (*ldq < max(1,*n)) {
-	*info = -4;
-    } else /* if(complicated condition) */ {
-/* Computing MIN */
-	i__1 = 1, i__2 = *n / 2;
-	if ((min(i__1,i__2) > *cutpnt) || (*n / 2 < *cutpnt)) {
-	    *info = -7;
-	}
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAED1", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*
-       The following values are integer pointers which indicate
-       the portion of the workspace
-       used by a particular array in DLAED2 and DLAED3.
-*/
-
-    iz = 1;
-    idlmda = iz + *n;
-    iw = idlmda + *n;
-    iq2 = iw + *n;
-
-    indx = 1;
-    indxc = indx + *n;
-    coltyp = indxc + *n;
-    indxp = coltyp + *n;
-
-
-/*
-       Form the z-vector which consists of the last row of Q_1 and the
-       first row of Q_2.
-*/
-
-    dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
-    zpp1 = *cutpnt + 1;
-    i__1 = *n - *cutpnt;
-    dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);
-
-/*     Deflate eigenvalues. */
-
-    dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[
-	    iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[
-	    indxc], &iwork[indxp], &iwork[coltyp], info);
-
-    if (*info != 0) {
-	goto L20;
-    }
-
-/*     Solve Secular Equation. */
-
-    if (k != 0) {
-	is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp +
-		1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
-	dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda],
-		 &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[
-		is], info);
-	if (*info != 0) {
-	    goto L20;
-	}
-
-/*     Prepare the INDXQ sorting permutation. */
-
-	n1 = k;
-	n2 = *n - k;
-	dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
-    } else {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    indxq[i__] = i__;
-/* L10: */
-	}
-    }
-
-L20:
-    return 0;
-
-/*     End of DLAED1 */
-
-} /* dlaed1_ */
-
-/* Subroutine */ int dlaed2_(integer *k, integer *n, integer *n1, doublereal *
-	d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho,
-	doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2,
-	integer *indx, integer *indxc, integer *indxp, integer *coltyp,
-	integer *info)
-{
-    /* System generated locals */
-    integer q_dim1, q_offset, i__1, i__2;
-    doublereal d__1, d__2, d__3, d__4;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal c__;
-    static integer i__, j;
-    static doublereal s, t;
-    static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
-    static doublereal eps, tau, tol;
-    static integer psm[4], imax, jmax;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static integer ctot[4];
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *), dcopy_(integer *, doublereal *, integer *, doublereal
-	    *, integer *);
-
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), dlacpy_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    DLAED2 merges the two sets of eigenvalues together into a single
-    sorted set.  Then it tries to deflate the size of the problem.
-    There are two ways in which deflation can occur:  when two or more
-    eigenvalues are close together or if there is a tiny entry in the
-    Z vector.  For each such occurrence the order of the related secular
-    equation problem is reduced by one.
-
-    Arguments
-    =========
-
-    K      (output) INTEGER
-           The number of non-deflated eigenvalues, and the order of the
-           related secular equation. 0 <= K <=N.
-
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    N1     (input) INTEGER
-           The location of the last eigenvalue in the leading sub-matrix.
-           min(1,N) <= N1 <= N/2.
-
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry, D contains the eigenvalues of the two submatrices to
-           be combined.
-           On exit, D contains the trailing (N-K) updated eigenvalues
-           (those which were deflated) sorted into increasing order.
-
-    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
-           On entry, Q contains the eigenvectors of two submatrices in
-           the two square blocks with corners at (1,1), (N1,N1)
-           and (N1+1, N1+1), (N,N).
-           On exit, Q contains the trailing (N-K) updated eigenvectors
-           (those which were deflated) in its last N-K columns.
-
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max(1,N).
-
-    INDXQ  (input/output) INTEGER array, dimension (N)
-           The permutation which separately sorts the two sub-problems
-           in D into ascending order.  Note that elements in the second
-           half of this permutation must first have N1 added to their
-           values. Destroyed on exit.
-
-    RHO    (input/output) DOUBLE PRECISION
-           On entry, the off-diagonal element associated with the rank-1
-           cut which originally split the two submatrices which are now
-           being recombined.
-           On exit, RHO has been modified to the value required by
-           DLAED3.
-
-    Z      (input) DOUBLE PRECISION array, dimension (N)
-           On entry, Z contains the updating vector (the last
-           row of the first sub-eigenvector matrix and the first row of
-           the second sub-eigenvector matrix).
-           On exit, the contents of Z have been destroyed by the updating
-           process.
-
-    DLAMDA (output) DOUBLE PRECISION array, dimension (N)
-           A copy of the first K eigenvalues which will be used by
-           DLAED3 to form the secular equation.
-
-    W      (output) DOUBLE PRECISION array, dimension (N)
-           The first k values of the final deflation-altered z-vector
-           which will be passed to DLAED3.
-
-    Q2     (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
-           A copy of the first K eigenvectors which will be used by
-           DLAED3 in a matrix multiply (DGEMM) to solve for the new
-           eigenvectors.
-
-    INDX   (workspace) INTEGER array, dimension (N)
-           The permutation used to sort the contents of DLAMDA into
-           ascending order.
-
-    INDXC  (output) INTEGER array, dimension (N)
-           The permutation used to arrange the columns of the deflated
-           Q matrix into three groups:  the first group contains non-zero
-           elements only at and above N1, the second contains
-           non-zero elements only below N1, and the third is dense.
-
-    INDXP  (workspace) INTEGER array, dimension (N)
-           The permutation used to place deflated values of D at the end
-           of the array.  INDXP(1:K) points to the nondeflated D-values
-           and INDXP(K+1:N) points to the deflated eigenvalues.
-
-    COLTYP (workspace/output) INTEGER array, dimension (N)
-           During execution, a label which will indicate which of the
-           following types a column in the Q2 matrix is:
-           1 : non-zero in the upper half only;
-           2 : dense;
-           3 : non-zero in the lower half only;
-           4 : deflated.
-           On exit, COLTYP(i) is the number of columns of type i,
-           for i=1 to 4 only.
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-    Modified by Francoise Tisseur, University of Tennessee.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --indxq;
-    --z__;
-    --dlamda;
-    --w;
-    --q2;
-    --indx;
-    --indxc;
-    --indxp;
-    --coltyp;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*n < 0) {
-	*info = -2;
-    } else if (*ldq < max(1,*n)) {
-	*info = -6;
-    } else /* if(complicated condition) */ {
-/* Computing MIN */
-	i__1 = 1, i__2 = *n / 2;
-	if ((min(i__1,i__2) > *n1) || (*n / 2 < *n1)) {
-	    *info = -3;
-	}
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAED2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    n2 = *n - *n1;
-    n1p1 = *n1 + 1;
-
-    if (*rho < 0.) {
-	dscal_(&n2, &c_b3001, &z__[n1p1], &c__1);
-    }
-
-/*
-       Normalize z so that norm(z) = 1.  Since z is the concatenation of
-       two normalized vectors, norm2(z) = sqrt(2).
-*/
-
-    t = 1. / sqrt(2.);
-    dscal_(n, &t, &z__[1], &c__1);
-
-/*     RHO = ABS( norm(z)**2 * RHO ) */
-
-    *rho = (d__1 = *rho * 2., abs(d__1));
-
-/*     Sort the eigenvalues into increasing order */
-
-    i__1 = *n;
-    for (i__ = n1p1; i__ <= i__1; ++i__) {
-	indxq[i__] += *n1;
-/* L10: */
-    }
-
-/*     re-integrate the deflated parts from the last pass */
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dlamda[i__] = d__[indxq[i__]];
-/* L20: */
-    }
-    dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	indx[i__] = indxq[indxc[i__]];
-/* L30: */
-    }
-
-/*     Calculate the allowable deflation tolerance */
-
-    imax = idamax_(n, &z__[1], &c__1);
-    jmax = idamax_(n, &d__[1], &c__1);
-    eps = EPSILON;
-/* Computing MAX */
-    d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2))
-	    ;
-    tol = eps * 8. * max(d__3,d__4);
-
-/*
-       If the rank-1 modifier is small enough, no more needs to be done
-       except to reorganize Q so that its columns correspond with the
-       elements in D.
-*/
-
-    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
-	*k = 0;
-	iq2 = 1;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__ = indx[j];
-	    dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
-	    dlamda[j] = d__[i__];
-	    iq2 += *n;
-/* L40: */
-	}
-	dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
-	dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
-	goto L190;
-    }
-
-/*
-       If there are multiple eigenvalues then the problem deflates.  Here
-       the number of equal eigenvalues are found.  As each equal
-       eigenvalue is found, an elementary reflector is computed to rotate
-       the corresponding eigensubspace so that the corresponding
-       components of Z are zero in this new basis.
-*/
-
-    i__1 = *n1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	coltyp[i__] = 1;
-/* L50: */
-    }
-    i__1 = *n;
-    for (i__ = n1p1; i__ <= i__1; ++i__) {
-	coltyp[i__] = 3;
-/* L60: */
-    }
-
-
-    *k = 0;
-    k2 = *n + 1;
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	nj = indx[j];
-	if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
-
-/*           Deflate due to small z component. */
-
-	    --k2;
-	    coltyp[nj] = 4;
-	    indxp[k2] = nj;
-	    if (j == *n) {
-		goto L100;
-	    }
-	} else {
-	    pj = nj;
-	    goto L80;
-	}
-/* L70: */
-    }
-L80:
-    ++j;
-    nj = indx[j];
-    if (j > *n) {
-	goto L100;
-    }
-    if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
-
-/*        Deflate due to small z component. */
-
-	--k2;
-	coltyp[nj] = 4;
-	indxp[k2] = nj;
-    } else {
-
-/*        Check if eigenvalues are close enough to allow deflation. */
-
-	s = z__[pj];
-	c__ = z__[nj];
-
-/*
-          Find sqrt(a**2+b**2) without overflow or
-          destructive underflow.
-*/
-
-	tau = dlapy2_(&c__, &s);
-	t = d__[nj] - d__[pj];
-	c__ /= tau;
-	s = -s / tau;
-	if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
-
-/*           Deflation is possible. */
-
-	    z__[nj] = tau;
-	    z__[pj] = 0.;
-	    if (coltyp[nj] != coltyp[pj]) {
-		coltyp[nj] = 2;
-	    }
-	    coltyp[pj] = 4;
-	    drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
-		    c__, &s);
-/* Computing 2nd power */
-	    d__1 = c__;
-/* Computing 2nd power */
-	    d__2 = s;
-	    t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
-/* Computing 2nd power */
-	    d__1 = s;
-/* Computing 2nd power */
-	    d__2 = c__;
-	    d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
-	    d__[pj] = t;
-	    --k2;
-	    i__ = 1;
-L90:
-	    if (k2 + i__ <= *n) {
-		if (d__[pj] < d__[indxp[k2 + i__]]) {
-		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
-		    indxp[k2 + i__] = pj;
-		    ++i__;
-		    goto L90;
-		} else {
-		    indxp[k2 + i__ - 1] = pj;
-		}
-	    } else {
-		indxp[k2 + i__ - 1] = pj;
-	    }
-	    pj = nj;
-	} else {
-	    ++(*k);
-	    dlamda[*k] = d__[pj];
-	    w[*k] = z__[pj];
-	    indxp[*k] = pj;
-	    pj = nj;
-	}
-    }
-    goto L80;
-L100:
-
-/*     Record the last eigenvalue. */
-
-    ++(*k);
-    dlamda[*k] = d__[pj];
-    w[*k] = z__[pj];
-    indxp[*k] = pj;
-
-/*
-       Count up the total number of the various types of columns, then
-       form a permutation which positions the four column types into
-       four uniform groups (although one or more of these groups may be
-       empty).
-*/
-
-    for (j = 1; j <= 4; ++j) {
-	ctot[j - 1] = 0;
-/* L110: */
-    }
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	ct = coltyp[j];
-	++ctot[ct - 1];
-/* L120: */
-    }
-
-/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
-
-    psm[0] = 1;
-    psm[1] = ctot[0] + 1;
-    psm[2] = psm[1] + ctot[1];
-    psm[3] = psm[2] + ctot[2];
-    *k = *n - ctot[3];
-
-/*
-       Fill out the INDXC array so that the permutation which it induces
-       will place all type-1 columns first, all type-2 columns next,
-       then all type-3's, and finally all type-4's.
-*/
-
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	js = indxp[j];
-	ct = coltyp[js];
-	indx[psm[ct - 1]] = js;
-	indxc[psm[ct - 1]] = j;
-	++psm[ct - 1];
-/* L130: */
-    }
-
-/*
-       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
-       and Q2 respectively.  The eigenvalues/vectors which were not
-       deflated go into the first K slots of DLAMDA and Q2 respectively,
-       while those which were deflated go into the last N - K slots.
-*/
-
-    i__ = 1;
-    iq1 = 1;
-    iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
-    i__1 = ctot[0];
-    for (j = 1; j <= i__1; ++j) {
-	js = indx[i__];
-	dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
-	z__[i__] = d__[js];
-	++i__;
-	iq1 += *n1;
-/* L140: */
-    }
-
-    i__1 = ctot[1];
-    for (j = 1; j <= i__1; ++j) {
-	js = indx[i__];
-	dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
-	dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
-	z__[i__] = d__[js];
-	++i__;
-	iq1 += *n1;
-	iq2 += n2;
-/* L150: */
-    }
-
-    i__1 = ctot[2];
-    for (j = 1; j <= i__1; ++j) {
-	js = indx[i__];
-	dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
-	z__[i__] = d__[js];
-	++i__;
-	iq2 += n2;
-/* L160: */
-    }
-
-    iq1 = iq2;
-    i__1 = ctot[3];
-    for (j = 1; j <= i__1; ++j) {
-	js = indx[i__];
-	dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
-	iq2 += *n;
-	z__[i__] = d__[js];
-	++i__;
-/* L170: */
-    }
-
-/*
-       The deflated eigenvalues and their corresponding vectors go back
-       into the last N - K slots of D and Q respectively.
-*/
-
-    dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
-    i__1 = *n - *k;
-    dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
-
-/*     Copy CTOT into COLTYP for referencing in DLAED3. */
-
-    for (j = 1; j <= 4; ++j) {
-	coltyp[j] = ctot[j - 1];
-/* L180: */
-    }
-
-L190:
-    return 0;
-
-/*     End of DLAED2 */
-
-} /* dlaed2_ */
-
-/* Subroutine */ int dlaed3_(integer *k, integer *n, integer *n1, doublereal *
-	d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda,
-	 doublereal *q2, integer *indx, integer *ctot, doublereal *w,
-	doublereal *s, integer *info)
-{
-    /* System generated locals */
-    integer q_dim1, q_offset, i__1, i__2;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static integer i__, j, n2, n12, ii, n23, iq2;
-    static doublereal temp;
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *),
-	     dcopy_(integer *, doublereal *, integer *, doublereal *, integer
-	    *), dlaed4_(integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, integer *);
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *),
-	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *), xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLAED3 finds the roots of the secular equation, as defined by the
-    values in D, W, and RHO, between 1 and K.  It makes the
-    appropriate calls to DLAED4 and then updates the eigenvectors by
-    multiplying the matrix of eigenvectors of the pair of eigensystems
-    being combined by the matrix of eigenvectors of the K-by-K system
-    which is solved here.
-
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
-
-    Arguments
-    =========
-
-    K       (input) INTEGER
-            The number of terms in the rational function to be solved by
-            DLAED4.  K >= 0.
-
-    N       (input) INTEGER
-            The number of rows and columns in the Q matrix.
-            N >= K (deflation may result in N>K).
-
-    N1      (input) INTEGER
-            The location of the last eigenvalue in the leading submatrix.
-            min(1,N) <= N1 <= N/2.
-
-    D       (output) DOUBLE PRECISION array, dimension (N)
-            D(I) contains the updated eigenvalues for
-            1 <= I <= K.
-
-    Q       (output) DOUBLE PRECISION array, dimension (LDQ,N)
-            Initially the first K columns are used as workspace.
-            On output the columns 1 to K contain
-            the updated eigenvectors.
-
-    LDQ     (input) INTEGER
-            The leading dimension of the array Q.  LDQ >= max(1,N).
-
-    RHO     (input) DOUBLE PRECISION
-            The value of the parameter in the rank one update equation.
-            RHO >= 0 required.
-
-    DLAMDA  (input/output) DOUBLE PRECISION array, dimension (K)
-            The first K elements of this array contain the old roots
-            of the deflated updating problem.  These are the poles
-            of the secular equation. May be changed on output by
-            having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
-            Cray-2, or Cray C-90, as described above.
-
-    Q2      (input) DOUBLE PRECISION array, dimension (LDQ2, N)
-            The first K columns of this matrix contain the non-deflated
-            eigenvectors for the split problem.
-
-    INDX    (input) INTEGER array, dimension (N)
-            The permutation used to arrange the columns of the deflated
-            Q matrix into three groups (see DLAED2).
-            The rows of the eigenvectors found by DLAED4 must be likewise
-            permuted before the matrix multiply can take place.
-
-    CTOT    (input) INTEGER array, dimension (4)
-            A count of the total number of the various types of columns
-            in Q, as described in INDX.  The fourth column type is any
-            column which has been deflated.
-
-    W       (input/output) DOUBLE PRECISION array, dimension (K)
-            The first K elements of this array contain the components
-            of the deflation-adjusted updating vector. Destroyed on
-            output.
-
-    S       (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K
-            Will contain the eigenvectors of the repaired matrix which
-            will be multiplied by the previously accumulated eigenvectors
-            to update the system.
-
-    LDS     (input) INTEGER
-            The leading dimension of S.  LDS >= max(1,K).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an eigenvalue did not converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-    Modified by Francoise Tisseur, University of Tennessee.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --dlamda;
-    --q2;
-    --indx;
-    --ctot;
-    --w;
-    --s;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*k < 0) {
-	*info = -1;
-    } else if (*n < *k) {
-	*info = -2;
-    } else if (*ldq < max(1,*n)) {
-	*info = -6;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAED3", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*k == 0) {
-	return 0;
-    }
-
-/*
-       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
-       be computed with high relative accuracy (barring over/underflow).
-       This is a problem on machines without a guard digit in
-       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
-       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
-       which on any of these machines zeros out the bottommost
-       bit of DLAMDA(I) if it is 1; this makes the subsequent
-       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
-       occurs. On binary machines with a guard digit (almost all
-       machines) it does not change DLAMDA(I) at all. On hexadecimal
-       and decimal machines with a guard digit, it slightly
-       changes the bottommost bits of DLAMDA(I). It does not account
-       for hexadecimal or decimal machines without guard digits
-       (we know of none). We use a subroutine call to compute
-       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
-       this code.
-*/
-
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
-/* L10: */
-    }
-
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
-		info);
-
-/*        If the zero finder fails, the computation is terminated. */
-
-	if (*info != 0) {
-	    goto L120;
-	}
-/* L20: */
-    }
-
-    if (*k == 1) {
-	goto L110;
-    }
-    if (*k == 2) {
-	i__1 = *k;
-	for (j = 1; j <= i__1; ++j) {
-	    w[1] = q[j * q_dim1 + 1];
-	    w[2] = q[j * q_dim1 + 2];
-	    ii = indx[1];
-	    q[j * q_dim1 + 1] = w[ii];
-	    ii = indx[2];
-	    q[j * q_dim1 + 2] = w[ii];
-/* L30: */
-	}
-	goto L110;
-    }
-
-/*     Compute updated W. */
-
-    dcopy_(k, &w[1], &c__1, &s[1], &c__1);
-
-/*     Initialize W(I) = Q(I,I) */
-
-    i__1 = *ldq + 1;
-    dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = j - 1;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
-/* L40: */
-	}
-	i__2 = *k;
-	for (i__ = j + 1; i__ <= i__2; ++i__) {
-	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
-/* L50: */
-	}
-/* L60: */
-    }
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	d__1 = sqrt(-w[i__]);
-	w[i__] = d_sign(&d__1, &s[i__]);
-/* L70: */
-    }
-
-/*     Compute eigenvectors of the modified rank-1 modification. */
-
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *k;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    s[i__] = w[i__] / q[i__ + j * q_dim1];
-/* L80: */
-	}
-	temp = dnrm2_(k, &s[1], &c__1);
-	i__2 = *k;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    ii = indx[i__];
-	    q[i__ + j * q_dim1] = s[ii] / temp;
-/* L90: */
-	}
-/* L100: */
-    }
-
-/*     Compute the updated eigenvectors. */
-
-L110:
-
-    n2 = *n - *n1;
-    n12 = ctot[1] + ctot[2];
-    n23 = ctot[2] + ctot[3];
-
-    dlacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23);
-    iq2 = *n1 * n12 + 1;
-    if (n23 != 0) {
-	dgemm_("N", "N", &n2, k, &n23, &c_b2865, &q2[iq2], &n2, &s[1], &n23, &
-		c_b2879, &q[*n1 + 1 + q_dim1], ldq);
-    } else {
-	dlaset_("A", &n2, k, &c_b2879, &c_b2879, &q[*n1 + 1 + q_dim1], ldq);
-    }
-
-    dlacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12);
-    if (n12 != 0) {
-	dgemm_("N", "N", n1, k, &n12, &c_b2865, &q2[1], n1, &s[1], &n12, &
-		c_b2879, &q[q_offset], ldq);
-    } else {
-	dlaset_("A", n1, k, &c_b2879, &c_b2879, &q[q_dim1 + 1], ldq);
-    }
-
-
-L120:
-    return 0;
-
-/*     End of DLAED3 */
-
-} /* dlaed3_ */
-
-/* Subroutine */ int dlaed4_(integer *n, integer *i__, doublereal *d__,
-	doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam,
-	 integer *info)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal a, b, c__;
-    static integer j;
-    static doublereal w;
-    static integer ii;
-    static doublereal dw, zz[3];
-    static integer ip1;
-    static doublereal del, eta, phi, eps, tau, psi;
-    static integer iim1, iip1;
-    static doublereal dphi, dpsi;
-    static integer iter;
-    static doublereal temp, prew, temp1, dltlb, dltub, midpt;
-    static integer niter;
-    static logical swtch;
-    extern /* Subroutine */ int dlaed5_(integer *, doublereal *, doublereal *,
-	     doublereal *, doublereal *, doublereal *), dlaed6_(integer *,
-	    logical *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *);
-    static logical swtch3;
-
-    static logical orgati;
-    static doublereal erretm, rhoinv;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       December 23, 1999
-
-
-    Purpose
-    =======
-
-    This subroutine computes the I-th updated eigenvalue of a symmetric
-    rank-one modification to a diagonal matrix whose elements are
-    given in the array d, and that
-
-               D(i) < D(j)  for  i < j
-
-    and that RHO > 0.  This is arranged by the calling routine, and is
-    no loss in generality.  The rank-one modified system is thus
-
-               diag( D )  +  RHO *  Z * Z_transpose.
-
-    where we assume the Euclidean norm of Z is 1.
-
-    The method consists of approximating the rational functions in the
-    secular equation by simpler interpolating rational functions.
-
-    Arguments
-    =========
-
-    N      (input) INTEGER
-           The length of all arrays.
-
-    I      (input) INTEGER
-           The index of the eigenvalue to be computed.  1 <= I <= N.
-
-    D      (input) DOUBLE PRECISION array, dimension (N)
-           The original eigenvalues.  It is assumed that they are in
-           order, D(I) < D(J)  for I < J.
-
-    Z      (input) DOUBLE PRECISION array, dimension (N)
-           The components of the updating vector.
-
-    DELTA  (output) DOUBLE PRECISION array, dimension (N)
-           If N .ne. 1, DELTA contains (D(j) - lambda_I) in its  j-th
-           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
-           contains the information necessary to construct the
-           eigenvectors.
-
-    RHO    (input) DOUBLE PRECISION
-           The scalar in the symmetric updating formula.
-
-    DLAM   (output) DOUBLE PRECISION
-           The computed lambda_I, the I-th updated eigenvalue.
-
-    INFO   (output) INTEGER
-           = 0:  successful exit
-           > 0:  if INFO = 1, the updating process failed.
-
-    Internal Parameters
-    ===================
-
-    Logical variable ORGATI (origin-at-i?) is used for distinguishing
-    whether D(i) or D(i+1) is treated as the origin.
-
-              ORGATI = .true.    origin at i
-              ORGATI = .false.   origin at i+1
-
-     Logical variable SWTCH3 (switch-for-3-poles?) is for noting
-     if we are working with THREE poles!
-
-     MAXIT is the maximum number of iterations allowed for each
-     eigenvalue.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ren-Cang Li, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-
-
-       Since this routine is called in an inner loop, we do no argument
-       checking.
-
-       Quick return for N=1 and 2.
-*/
-
-    /* Parameter adjustments */
-    --delta;
-    --z__;
-    --d__;
-
-    /* Function Body */
-    *info = 0;
-    if (*n == 1) {
-
-/*         Presumably, I=1 upon entry */
-
-	*dlam = d__[1] + *rho * z__[1] * z__[1];
-	delta[1] = 1.;
-	return 0;
-    }
-    if (*n == 2) {
-	dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
-	return 0;
-    }
-
-/*     Compute machine epsilon */
-
-    eps = EPSILON;
-    rhoinv = 1. / *rho;
-
-/*     The case I = N */
-
-    if (*i__ == *n) {
-
-/*        Initialize some basic variables */
-
-	ii = *n - 1;
-	niter = 1;
-
-/*        Calculate initial guess */
-
-	midpt = *rho / 2.;
-
-/*
-          If ||Z||_2 is not one, then TEMP should be set to
-          RHO * ||Z||_2^2 / TWO
-*/
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    delta[j] = d__[j] - d__[*i__] - midpt;
-/* L10: */
-	}
-
-	psi = 0.;
-	i__1 = *n - 2;
-	for (j = 1; j <= i__1; ++j) {
-	    psi += z__[j] * z__[j] / delta[j];
-/* L20: */
-	}
-
-	c__ = rhoinv + psi;
-	w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*
-		n];
-
-	if (w <= 0.) {
-	    temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho)
-		    + z__[*n] * z__[*n] / *rho;
-	    if (c__ <= temp) {
-		tau = *rho;
-	    } else {
-		del = d__[*n] - d__[*n - 1];
-		a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]
-			;
-		b = z__[*n] * z__[*n] * del;
-		if (a < 0.) {
-		    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
-		} else {
-		    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
-		}
-	    }
-
-/*
-             It can be proved that
-                 D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO
-*/
-
-	    dltlb = midpt;
-	    dltub = *rho;
-	} else {
-	    del = d__[*n] - d__[*n - 1];
-	    a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
-	    b = z__[*n] * z__[*n] * del;
-	    if (a < 0.) {
-		tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
-	    } else {
-		tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
-	    }
-
-/*
-             It can be proved that
-                 D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
-*/
-
-	    dltlb = 0.;
-	    dltub = midpt;
-	}
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    delta[j] = d__[j] - d__[*i__] - tau;
-/* L30: */
-	}
-
-/*        Evaluate PSI and the derivative DPSI */
-
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
-	i__1 = ii;
-	for (j = 1; j <= i__1; ++j) {
-	    temp = z__[j] / delta[j];
-	    psi += z__[j] * temp;
-	    dpsi += temp * temp;
-	    erretm += psi;
-/* L40: */
-	}
-	erretm = abs(erretm);
-
-/*        Evaluate PHI and the derivative DPHI */
-
-	temp = z__[*n] / delta[*n];
-	phi = z__[*n] * temp;
-	dphi = temp * temp;
-	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
-		+ dphi);
-
-	w = rhoinv + phi + psi;
-
-/*        Test for convergence */
-
-	if (abs(w) <= eps * erretm) {
-	    *dlam = d__[*i__] + tau;
-	    goto L250;
-	}
-
-	if (w <= 0.) {
-	    dltlb = max(dltlb,tau);
-	} else {
-	    dltub = min(dltub,tau);
-	}
-
-/*        Calculate the new step */
-
-	++niter;
-	c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
-	a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (
-		dpsi + dphi);
-	b = delta[*n - 1] * delta[*n] * w;
-	if (c__ < 0.) {
-	    c__ = abs(c__);
-	}
-	if (c__ == 0.) {
-/*
-            ETA = B/A
-             ETA = RHO - TAU
-*/
-	    eta = dltub - tau;
-	} else if (a >= 0.) {
-	    eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
-		    * 2.);
-	} else {
-	    eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
-		    );
-	}
-
-/*
-          Note, eta should be positive if w is negative, and
-          eta should be negative otherwise. However,
-          if for some reason caused by roundoff, eta*w > 0,
-          we simply use one Newton step instead. This way
-          will guarantee eta*w < 0.
-*/
-
-	if (w * eta > 0.) {
-	    eta = -w / (dpsi + dphi);
-	}
-	temp = tau + eta;
-	if ((temp > dltub) || (temp < dltlb)) {
-	    if (w < 0.) {
-		eta = (dltub - tau) / 2.;
-	    } else {
-		eta = (dltlb - tau) / 2.;
-	    }
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    delta[j] -= eta;
-/* L50: */
-	}
-
-	tau += eta;
-
-/*        Evaluate PSI and the derivative DPSI */
-
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
-	i__1 = ii;
-	for (j = 1; j <= i__1; ++j) {
-	    temp = z__[j] / delta[j];
-	    psi += z__[j] * temp;
-	    dpsi += temp * temp;
-	    erretm += psi;
-/* L60: */
-	}
-	erretm = abs(erretm);
-
-/*        Evaluate PHI and the derivative DPHI */
-
-	temp = z__[*n] / delta[*n];
-	phi = z__[*n] * temp;
-	dphi = temp * temp;
-	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
-		+ dphi);
-
-	w = rhoinv + phi + psi;
-
-/*        Main loop to update the values of the array   DELTA */
-
-	iter = niter + 1;
-
-	for (niter = iter; niter <= 30; ++niter) {
-
-/*           Test for convergence */
-
-	    if (abs(w) <= eps * erretm) {
-		*dlam = d__[*i__] + tau;
-		goto L250;
-	    }
-
-	    if (w <= 0.) {
-		dltlb = max(dltlb,tau);
-	    } else {
-		dltub = min(dltub,tau);
-	    }
-
-/*           Calculate the new step */
-
-	    c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
-	    a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] *
-		    (dpsi + dphi);
-	    b = delta[*n - 1] * delta[*n] * w;
-	    if (a >= 0.) {
-		eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-			c__ * 2.);
-	    } else {
-		eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
-			d__1))));
-	    }
-
-/*
-             Note, eta should be positive if w is negative, and
-             eta should be negative otherwise. However,
-             if for some reason caused by roundoff, eta*w > 0,
-             we simply use one Newton step instead. This way
-             will guarantee eta*w < 0.
-*/
-
-	    if (w * eta > 0.) {
-		eta = -w / (dpsi + dphi);
-	    }
-	    temp = tau + eta;
-	    if ((temp > dltub) || (temp < dltlb)) {
-		if (w < 0.) {
-		    eta = (dltub - tau) / 2.;
-		} else {
-		    eta = (dltlb - tau) / 2.;
-		}
-	    }
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		delta[j] -= eta;
-/* L70: */
-	    }
-
-	    tau += eta;
-
-/*           Evaluate PSI and the derivative DPSI */
-
-	    dpsi = 0.;
-	    psi = 0.;
-	    erretm = 0.;
-	    i__1 = ii;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = z__[j] / delta[j];
-		psi += z__[j] * temp;
-		dpsi += temp * temp;
-		erretm += psi;
-/* L80: */
-	    }
-	    erretm = abs(erretm);
-
-/*           Evaluate PHI and the derivative DPHI */
-
-	    temp = z__[*n] / delta[*n];
-	    phi = z__[*n] * temp;
-	    dphi = temp * temp;
-	    erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
-		    dpsi + dphi);
-
-	    w = rhoinv + phi + psi;
-/* L90: */
-	}
-
-/*        Return with INFO = 1, NITER = MAXIT and not converged */
-
-	*info = 1;
-	*dlam = d__[*i__] + tau;
-	goto L250;
-
-/*        End for the case I = N */
-
-    } else {
-
-/*        The case for I < N */
-
-	niter = 1;
-	ip1 = *i__ + 1;
-
-/*        Calculate initial guess */
-
-	del = d__[ip1] - d__[*i__];
-	midpt = del / 2.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    delta[j] = d__[j] - d__[*i__] - midpt;
-/* L100: */
-	}
-
-	psi = 0.;
-	i__1 = *i__ - 1;
-	for (j = 1; j <= i__1; ++j) {
-	    psi += z__[j] * z__[j] / delta[j];
-/* L110: */
-	}
-
-	phi = 0.;
-	i__1 = *i__ + 2;
-	for (j = *n; j >= i__1; --j) {
-	    phi += z__[j] * z__[j] / delta[j];
-/* L120: */
-	}
-	c__ = rhoinv + psi + phi;
-	w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] /
-		delta[ip1];
-
-	if (w > 0.) {
-
-/*
-             d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
-
-             We choose d(i) as origin.
-*/
-
-	    orgati = TRUE_;
-	    a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
-	    b = z__[*i__] * z__[*i__] * del;
-	    if (a > 0.) {
-		tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
-			d__1))));
-	    } else {
-		tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-			c__ * 2.);
-	    }
-	    dltlb = 0.;
-	    dltub = midpt;
-	} else {
-
-/*
-             (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
-
-             We choose d(i+1) as origin.
-*/
-
-	    orgati = FALSE_;
-	    a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
-	    b = z__[ip1] * z__[ip1] * del;
-	    if (a < 0.) {
-		tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
-			d__1))));
-	    } else {
-		tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
-			(c__ * 2.);
-	    }
-	    dltlb = -midpt;
-	    dltub = 0.;
-	}
-
-	if (orgati) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		delta[j] = d__[j] - d__[*i__] - tau;
-/* L130: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		delta[j] = d__[j] - d__[ip1] - tau;
-/* L140: */
-	    }
-	}
-	if (orgati) {
-	    ii = *i__;
-	} else {
-	    ii = *i__ + 1;
-	}
-	iim1 = ii - 1;
-	iip1 = ii + 1;
-
-/*        Evaluate PSI and the derivative DPSI */
-
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
-	i__1 = iim1;
-	for (j = 1; j <= i__1; ++j) {
-	    temp = z__[j] / delta[j];
-	    psi += z__[j] * temp;
-	    dpsi += temp * temp;
-	    erretm += psi;
-/* L150: */
-	}
-	erretm = abs(erretm);
-
-/*        Evaluate PHI and the derivative DPHI */
-
-	dphi = 0.;
-	phi = 0.;
-	i__1 = iip1;
-	for (j = *n; j >= i__1; --j) {
-	    temp = z__[j] / delta[j];
-	    phi += z__[j] * temp;
-	    dphi += temp * temp;
-	    erretm += phi;
-/* L160: */
-	}
-
-	w = rhoinv + phi + psi;
-
-/*
-          W is the value of the secular function with
-          its ii-th element removed.
-*/
-
-	swtch3 = FALSE_;
-	if (orgati) {
-	    if (w < 0.) {
-		swtch3 = TRUE_;
-	    }
-	} else {
-	    if (w > 0.) {
-		swtch3 = TRUE_;
-	    }
-	}
-	if ((ii == 1) || (ii == *n)) {
-	    swtch3 = FALSE_;
-	}
-
-	temp = z__[ii] / delta[ii];
-	dw = dpsi + dphi + temp * temp;
-	temp = z__[ii] * temp;
-	w += temp;
-	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
-		abs(tau) * dw;
-
-/*        Test for convergence */
-
-	if (abs(w) <= eps * erretm) {
-	    if (orgati) {
-		*dlam = d__[*i__] + tau;
-	    } else {
-		*dlam = d__[ip1] + tau;
-	    }
-	    goto L250;
-	}
-
-	if (w <= 0.) {
-	    dltlb = max(dltlb,tau);
-	} else {
-	    dltub = min(dltub,tau);
-	}
-
-/*        Calculate the new step */
-
-	++niter;
-	if (! swtch3) {
-	    if (orgati) {
-/* Computing 2nd power */
-		d__1 = z__[*i__] / delta[*i__];
-		c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 *
-			d__1);
-	    } else {
-/* Computing 2nd power */
-		d__1 = z__[ip1] / delta[ip1];
-		c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 *
-			d__1);
-	    }
-	    a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] *
-		    dw;
-	    b = delta[*i__] * delta[ip1] * w;
-	    if (c__ == 0.) {
-		if (a == 0.) {
-		    if (orgati) {
-			a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] *
-				(dpsi + dphi);
-		    } else {
-			a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] *
-				(dpsi + dphi);
-		    }
-		}
-		eta = b / a;
-	    } else if (a <= 0.) {
-		eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-			c__ * 2.);
-	    } else {
-		eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
-			d__1))));
-	    }
-	} else {
-
-/*           Interpolation using THREE most relevant poles */
-
-	    temp = rhoinv + psi + phi;
-	    if (orgati) {
-		temp1 = z__[iim1] / delta[iim1];
-		temp1 *= temp1;
-		c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[
-			iip1]) * temp1;
-		zz[0] = z__[iim1] * z__[iim1];
-		zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
-	    } else {
-		temp1 = z__[iip1] / delta[iip1];
-		temp1 *= temp1;
-		c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[
-			iim1]) * temp1;
-		zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
-		zz[2] = z__[iip1] * z__[iip1];
-	    }
-	    zz[1] = z__[ii] * z__[ii];
-	    dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
-	    if (*info != 0) {
-		goto L250;
-	    }
-	}
-
-/*
-          Note, eta should be positive if w is negative, and
-          eta should be negative otherwise. However,
-          if for some reason caused by roundoff, eta*w > 0,
-          we simply use one Newton step instead. This way
-          will guarantee eta*w < 0.
-*/
-
-	if (w * eta >= 0.) {
-	    eta = -w / dw;
-	}
-	temp = tau + eta;
-	if ((temp > dltub) || (temp < dltlb)) {
-	    if (w < 0.) {
-		eta = (dltub - tau) / 2.;
-	    } else {
-		eta = (dltlb - tau) / 2.;
-	    }
-	}
-
-	prew = w;
-
-/* L170: */
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    delta[j] -= eta;
-/* L180: */
-	}
-
-/*        Evaluate PSI and the derivative DPSI */
-
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
-	i__1 = iim1;
-	for (j = 1; j <= i__1; ++j) {
-	    temp = z__[j] / delta[j];
-	    psi += z__[j] * temp;
-	    dpsi += temp * temp;
-	    erretm += psi;
-/* L190: */
-	}
-	erretm = abs(erretm);
-
-/*        Evaluate PHI and the derivative DPHI */
-
-	dphi = 0.;
-	phi = 0.;
-	i__1 = iip1;
-	for (j = *n; j >= i__1; --j) {
-	    temp = z__[j] / delta[j];
-	    phi += z__[j] * temp;
-	    dphi += temp * temp;
-	    erretm += phi;
-/* L200: */
-	}
-
-	temp = z__[ii] / delta[ii];
-	dw = dpsi + dphi + temp * temp;
-	temp = z__[ii] * temp;
-	w = rhoinv + phi + psi + temp;
-	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + (
-		d__1 = tau + eta, abs(d__1)) * dw;
-
-	swtch = FALSE_;
-	if (orgati) {
-	    if (-w > abs(prew) / 10.) {
-		swtch = TRUE_;
-	    }
-	} else {
-	    if (w > abs(prew) / 10.) {
-		swtch = TRUE_;
-	    }
-	}
-
-	tau += eta;
-
-/*        Main loop to update the values of the array   DELTA */
-
-	iter = niter + 1;
-
-	for (niter = iter; niter <= 30; ++niter) {
-
-/*           Test for convergence */
-
-	    if (abs(w) <= eps * erretm) {
-		if (orgati) {
-		    *dlam = d__[*i__] + tau;
-		} else {
-		    *dlam = d__[ip1] + tau;
-		}
-		goto L250;
-	    }
-
-	    if (w <= 0.) {
-		dltlb = max(dltlb,tau);
-	    } else {
-		dltub = min(dltub,tau);
-	    }
-
-/*           Calculate the new step */
-
-	    if (! swtch3) {
-		if (! swtch) {
-		    if (orgati) {
-/* Computing 2nd power */
-			d__1 = z__[*i__] / delta[*i__];
-			c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (
-				d__1 * d__1);
-		    } else {
-/* Computing 2nd power */
-			d__1 = z__[ip1] / delta[ip1];
-			c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) *
-				(d__1 * d__1);
-		    }
-		} else {
-		    temp = z__[ii] / delta[ii];
-		    if (orgati) {
-			dpsi += temp * temp;
-		    } else {
-			dphi += temp * temp;
-		    }
-		    c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi;
-		}
-		a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1]
-			* dw;
-		b = delta[*i__] * delta[ip1] * w;
-		if (c__ == 0.) {
-		    if (a == 0.) {
-			if (! swtch) {
-			    if (orgati) {
-				a = z__[*i__] * z__[*i__] + delta[ip1] *
-					delta[ip1] * (dpsi + dphi);
-			    } else {
-				a = z__[ip1] * z__[ip1] + delta[*i__] * delta[
-					*i__] * (dpsi + dphi);
-			    }
-			} else {
-			    a = delta[*i__] * delta[*i__] * dpsi + delta[ip1]
-				    * delta[ip1] * dphi;
-			}
-		    }
-		    eta = b / a;
-		} else if (a <= 0.) {
-		    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
-			     / (c__ * 2.);
-		} else {
-		    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
-			    abs(d__1))));
-		}
-	    } else {
-
-/*              Interpolation using THREE most relevant poles */
-
-		temp = rhoinv + psi + phi;
-		if (swtch) {
-		    c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
-		    zz[0] = delta[iim1] * delta[iim1] * dpsi;
-		    zz[2] = delta[iip1] * delta[iip1] * dphi;
-		} else {
-		    if (orgati) {
-			temp1 = z__[iim1] / delta[iim1];
-			temp1 *= temp1;
-			c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1]
-				- d__[iip1]) * temp1;
-			zz[0] = z__[iim1] * z__[iim1];
-			zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 +
-				dphi);
-		    } else {
-			temp1 = z__[iip1] / delta[iip1];
-			temp1 *= temp1;
-			c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1]
-				- d__[iim1]) * temp1;
-			zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi -
-				temp1));
-			zz[2] = z__[iip1] * z__[iip1];
-		    }
-		}
-		dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta,
-			info);
-		if (*info != 0) {
-		    goto L250;
-		}
-	    }
-
-/*
-             Note, eta should be positive if w is negative, and
-             eta should be negative otherwise. However,
-             if for some reason caused by roundoff, eta*w > 0,
-             we simply use one Newton step instead. This way
-             will guarantee eta*w < 0.
-*/
-
-	    if (w * eta >= 0.) {
-		eta = -w / dw;
-	    }
-	    temp = tau + eta;
-	    if ((temp > dltub) || (temp < dltlb)) {
-		if (w < 0.) {
-		    eta = (dltub - tau) / 2.;
-		} else {
-		    eta = (dltlb - tau) / 2.;
-		}
-	    }
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		delta[j] -= eta;
-/* L210: */
-	    }
-
-	    tau += eta;
-	    prew = w;
-
-/*           Evaluate PSI and the derivative DPSI */
-
-	    dpsi = 0.;
-	    psi = 0.;
-	    erretm = 0.;
-	    i__1 = iim1;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = z__[j] / delta[j];
-		psi += z__[j] * temp;
-		dpsi += temp * temp;
-		erretm += psi;
-/* L220: */
-	    }
-	    erretm = abs(erretm);
-
-/*           Evaluate PHI and the derivative DPHI */
-
-	    dphi = 0.;
-	    phi = 0.;
-	    i__1 = iip1;
-	    for (j = *n; j >= i__1; --j) {
-		temp = z__[j] / delta[j];
-		phi += z__[j] * temp;
-		dphi += temp * temp;
-		erretm += phi;
-/* L230: */
-	    }
-
-	    temp = z__[ii] / delta[ii];
-	    dw = dpsi + dphi + temp * temp;
-	    temp = z__[ii] * temp;
-	    w = rhoinv + phi + psi + temp;
-	    erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
-		    + abs(tau) * dw;
-	    if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
-		swtch = ! swtch;
-	    }
-
-/* L240: */
-	}
-
-/*        Return with INFO = 1, NITER = MAXIT and not converged */
-
-	*info = 1;
-	if (orgati) {
-	    *dlam = d__[*i__] + tau;
-	} else {
-	    *dlam = d__[ip1] + tau;
-	}
-
-    }
-
-L250:
-
-    return 0;
-
-/*     End of DLAED4 */
-
-} /* dlaed4_ */
-
-/* Subroutine */ int dlaed5_(integer *i__, doublereal *d__, doublereal *z__,
-	doublereal *delta, doublereal *rho, doublereal *dlam)
-{
-    /* System generated locals */
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal b, c__, w, del, tau, temp;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    This subroutine computes the I-th eigenvalue of a symmetric rank-one
-    modification of a 2-by-2 diagonal matrix
-
-               diag( D )  +  RHO *  Z * transpose(Z) .
-
-    The diagonal elements in the array D are assumed to satisfy
-
-               D(i) < D(j)  for  i < j .
-
-    We also assume RHO > 0 and that the Euclidean norm of the vector
-    Z is one.
-
-    Arguments
-    =========
-
-    I      (input) INTEGER
-           The index of the eigenvalue to be computed.  I = 1 or I = 2.
-
-    D      (input) DOUBLE PRECISION array, dimension (2)
-           The original eigenvalues.  We assume D(1) < D(2).
-
-    Z      (input) DOUBLE PRECISION array, dimension (2)
-           The components of the updating vector.
-
-    DELTA  (output) DOUBLE PRECISION array, dimension (2)
-           The vector DELTA contains the information necessary
-           to construct the eigenvectors.
-
-    RHO    (input) DOUBLE PRECISION
-           The scalar in the symmetric updating formula.
-
-    DLAM   (output) DOUBLE PRECISION
-           The computed lambda_I, the I-th updated eigenvalue.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ren-Cang Li, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --delta;
-    --z__;
-    --d__;
-
-    /* Function Body */
-    del = d__[2] - d__[1];
-    if (*i__ == 1) {
-	w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.;
-	if (w > 0.) {
-	    b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
-	    c__ = *rho * z__[1] * z__[1] * del;
-
-/*           B > ZERO, always */
-
-	    tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
-	    *dlam = d__[1] + tau;
-	    delta[1] = -z__[1] / tau;
-	    delta[2] = z__[2] / (del - tau);
-	} else {
-	    b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
-	    c__ = *rho * z__[2] * z__[2] * del;
-	    if (b > 0.) {
-		tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
-	    } else {
-		tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
-	    }
-	    *dlam = d__[2] + tau;
-	    delta[1] = -z__[1] / (del + tau);
-	    delta[2] = -z__[2] / tau;
-	}
-	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
-	delta[1] /= temp;
-	delta[2] /= temp;
-    } else {
-
-/*     Now I=2 */
-
-	b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
-	c__ = *rho * z__[2] * z__[2] * del;
-	if (b > 0.) {
-	    tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
-	} else {
-	    tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
-	}
-	*dlam = d__[2] + tau;
-	delta[1] = -z__[1] / (del + tau);
-	delta[2] = -z__[2] / tau;
-	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
-	delta[1] /= temp;
-	delta[2] /= temp;
-    }
-    return 0;
-
-/*     End OF DLAED5 */
-
-} /* dlaed5_ */
-
-/* Subroutine */ int dlaed6_(integer *kniter, logical *orgati, doublereal *
-	rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal *
-	tau, integer *info)
-{
-    /* Initialized data */
-
-    static logical first = TRUE_;
-
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1, d__2, d__3, d__4;
-
-    /* Builtin functions */
-    double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *);
-
-    /* Local variables */
-    static doublereal a, b, c__, f;
-    static integer i__;
-    static doublereal fc, df, ddf, eta, eps, base;
-    static integer iter;
-    static doublereal temp, temp1, temp2, temp3, temp4;
-    static logical scale;
-    static integer niter;
-    static doublereal small1, small2, sminv1, sminv2;
-
-    static doublereal dscale[3], sclfac, zscale[3], erretm, sclinv;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLAED6 computes the positive or negative root (closest to the origin)
-    of
-                     z(1)        z(2)        z(3)
-    f(x) =   rho + --------- + ---------- + ---------
-                    d(1)-x      d(2)-x      d(3)-x
-
-    It is assumed that
-
-          if ORGATI = .true. the root is between d(2) and d(3);
-          otherwise it is between d(1) and d(2)
-
-    This routine will be called by DLAED4 when necessary. In most cases,
-    the root sought is the smallest in magnitude, though it might not be
-    in some extremely rare situations.
-
-    Arguments
-    =========
-
-    KNITER       (input) INTEGER
-                 Refer to DLAED4 for its significance.
-
-    ORGATI       (input) LOGICAL
-                 If ORGATI is true, the needed root is between d(2) and
-                 d(3); otherwise it is between d(1) and d(2).  See
-                 DLAED4 for further details.
-
-    RHO          (input) DOUBLE PRECISION
-                 Refer to the equation f(x) above.
-
-    D            (input) DOUBLE PRECISION array, dimension (3)
-                 D satisfies d(1) < d(2) < d(3).
-
-    Z            (input) DOUBLE PRECISION array, dimension (3)
-                 Each of the elements in z must be positive.
-
-    FINIT        (input) DOUBLE PRECISION
-                 The value of f at 0. It is more accurate than the one
-                 evaluated inside this routine (if someone wants to do
-                 so).
-
-    TAU          (output) DOUBLE PRECISION
-                 The root of the equation f(x).
-
-    INFO         (output) INTEGER
-                 = 0: successful exit
-                 > 0: if INFO = 1, failure to converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ren-Cang Li, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-*/
-
-    /* Parameter adjustments */
-    --z__;
-    --d__;
-
-    /* Function Body */
-
-    *info = 0;
-
-    niter = 1;
-    *tau = 0.;
-    if (*kniter == 2) {
-	if (*orgati) {
-	    temp = (d__[3] - d__[2]) / 2.;
-	    c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
-	    a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
-	    b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
-	} else {
-	    temp = (d__[1] - d__[2]) / 2.;
-	    c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
-	    a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
-	    b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
-	}
-/* Computing MAX */
-	d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
-	temp = max(d__1,d__2);
-	a /= temp;
-	b /= temp;
-	c__ /= temp;
-	if (c__ == 0.) {
-	    *tau = b / a;
-	} else if (a <= 0.) {
-	    *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-		    c__ * 2.);
-	} else {
-	    *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))
-		    ));
-	}
-	temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) +
-		z__[3] / (d__[3] - *tau);
-	if (abs(*finit) <= abs(temp)) {
-	    *tau = 0.;
-	}
-    }
-
-/*
-       On first call to routine, get machine parameters for
-       possible scaling to avoid overflow
-*/
-
-    if (first) {
-	eps = EPSILON;
-	base = BASE;
-	i__1 = (integer) (log(SAFEMINIMUM) / log(base) / 3.);
-	small1 = pow_di(&base, &i__1);
-	sminv1 = 1. / small1;
-	small2 = small1 * small1;
-	sminv2 = sminv1 * sminv1;
-	first = FALSE_;
-    }
-
-/*
-       Determine if scaling of inputs necessary to avoid overflow
-       when computing 1/TEMP**3
-*/
-
-    if (*orgati) {
-/* Computing MIN */
-	d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - *
-		tau, abs(d__2));
-	temp = min(d__3,d__4);
-    } else {
-/* Computing MIN */
-	d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - *
-		tau, abs(d__2));
-	temp = min(d__3,d__4);
-    }
-    scale = FALSE_;
-    if (temp <= small1) {
-	scale = TRUE_;
-	if (temp <= small2) {
-
-/*        Scale up by power of radix nearest 1/SAFMIN**(2/3) */
-
-	    sclfac = sminv2;
-	    sclinv = small2;
-	} else {
-
-/*        Scale up by power of radix nearest 1/SAFMIN**(1/3) */
-
-	    sclfac = sminv1;
-	    sclinv = small1;
-	}
-
-/*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
-
-	for (i__ = 1; i__ <= 3; ++i__) {
-	    dscale[i__ - 1] = d__[i__] * sclfac;
-	    zscale[i__ - 1] = z__[i__] * sclfac;
-/* L10: */
-	}
-	*tau *= sclfac;
-    } else {
-
-/*        Copy D and Z to DSCALE and ZSCALE */
-
-	for (i__ = 1; i__ <= 3; ++i__) {
-	    dscale[i__ - 1] = d__[i__];
-	    zscale[i__ - 1] = z__[i__];
-/* L20: */
-	}
-    }
-
-    fc = 0.;
-    df = 0.;
-    ddf = 0.;
-    for (i__ = 1; i__ <= 3; ++i__) {
-	temp = 1. / (dscale[i__ - 1] - *tau);
-	temp1 = zscale[i__ - 1] * temp;
-	temp2 = temp1 * temp;
-	temp3 = temp2 * temp;
-	fc += temp1 / dscale[i__ - 1];
-	df += temp2;
-	ddf += temp3;
-/* L30: */
-    }
-    f = *finit + *tau * fc;
-
-    if (abs(f) <= 0.) {
-	goto L60;
-    }
-
-/*
-          Iteration begins
-
-       It is not hard to see that
-
-             1) Iterations will go up monotonically
-                if FINIT < 0;
-
-             2) Iterations will go down monotonically
-                if FINIT > 0.
-*/
-
-    iter = niter + 1;
-
-    for (niter = iter; niter <= 20; ++niter) {
-
-	if (*orgati) {
-	    temp1 = dscale[1] - *tau;
-	    temp2 = dscale[2] - *tau;
-	} else {
-	    temp1 = dscale[0] - *tau;
-	    temp2 = dscale[1] - *tau;
-	}
-	a = (temp1 + temp2) * f - temp1 * temp2 * df;
-	b = temp1 * temp2 * f;
-	c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
-/* Computing MAX */
-	d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
-	temp = max(d__1,d__2);
-	a /= temp;
-	b /= temp;
-	c__ /= temp;
-	if (c__ == 0.) {
-	    eta = b / a;
-	} else if (a <= 0.) {
-	    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
-		    * 2.);
-	} else {
-	    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
-		    );
-	}
-	if (f * eta >= 0.) {
-	    eta = -f / df;
-	}
-
-	temp = eta + *tau;
-	if (*orgati) {
-	    if (eta > 0. && temp >= dscale[2]) {
-		eta = (dscale[2] - *tau) / 2.;
-	    }
-	    if (eta < 0. && temp <= dscale[1]) {
-		eta = (dscale[1] - *tau) / 2.;
-	    }
-	} else {
-	    if (eta > 0. && temp >= dscale[1]) {
-		eta = (dscale[1] - *tau) / 2.;
-	    }
-	    if (eta < 0. && temp <= dscale[0]) {
-		eta = (dscale[0] - *tau) / 2.;
-	    }
-	}
-	*tau += eta;
-
-	fc = 0.;
-	erretm = 0.;
-	df = 0.;
-	ddf = 0.;
-	for (i__ = 1; i__ <= 3; ++i__) {
-	    temp = 1. / (dscale[i__ - 1] - *tau);
-	    temp1 = zscale[i__ - 1] * temp;
-	    temp2 = temp1 * temp;
-	    temp3 = temp2 * temp;
-	    temp4 = temp1 / dscale[i__ - 1];
-	    fc += temp4;
-	    erretm += abs(temp4);
-	    df += temp2;
-	    ddf += temp3;
-/* L40: */
-	}
-	f = *finit + *tau * fc;
-	erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df;
-	if (abs(f) <= eps * erretm) {
-	    goto L60;
-	}
-/* L50: */
-    }
-    *info = 1;
-L60:
-
-/*     Undo scaling */
-
-    if (scale) {
-	*tau *= sclinv;
-    }
-    return 0;
-
-/*     End of DLAED6 */
-
-} /* dlaed6_ */
-
-/* Subroutine */ int dlaed7_(integer *icompq, integer *n, integer *qsiz,
-	integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__,
-	doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer
-	*cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer *
-	perm, integer *givptr, integer *givcol, doublereal *givnum,
-	doublereal *work, integer *iwork, integer *info)
-{
-    /* System generated locals */
-    integer q_dim1, q_offset, i__1, i__2;
-
-    /* Builtin functions */
-    integer pow_ii(integer *, integer *);
-
-    /* Local variables */
-    static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer indxc, indxp;
-    extern /* Subroutine */ int dlaed8_(integer *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
-	     integer *, doublereal *, integer *, integer *, integer *,
-	    doublereal *, integer *, integer *, integer *), dlaed9_(integer *,
-	     integer *, integer *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     integer *, integer *), dlaeda_(integer *, integer *, integer *,
-	    integer *, integer *, integer *, integer *, integer *, doublereal
-	    *, doublereal *, integer *, doublereal *, doublereal *, integer *)
-	    ;
-    static integer idlmda;
-    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), xerbla_(char *, integer *);
-    static integer coltyp;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    DLAED7 computes the updated eigensystem of a diagonal
-    matrix after modification by a rank-one symmetric matrix. This
-    routine is used only for the eigenproblem which requires all
-    eigenvalues and optionally eigenvectors of a dense symmetric matrix
-    that has been reduced to tridiagonal form.  DLAED1 handles
-    the case in which all eigenvalues and eigenvectors of a symmetric
-    tridiagonal matrix are desired.
-
-      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
-
-       where Z = Q'u, u is a vector of length N with ones in the
-       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
-
-       The eigenvectors of the original matrix are stored in Q, and the
-       eigenvalues are in D.  The algorithm consists of three stages:
-
-          The first stage consists of deflating the size of the problem
-          when there are multiple eigenvalues or if there is a zero in
-          the Z vector.  For each such occurence the dimension of the
-          secular equation problem is reduced by one.  This stage is
-          performed by the routine DLAED8.
-
-          The second stage consists of calculating the updated
-          eigenvalues. This is done by finding the roots of the secular
-          equation via the routine DLAED4 (as called by DLAED9).
-          This routine also calculates the eigenvectors of the current
-          problem.
-
-          The final stage consists of computing the updated eigenvectors
-          directly using the updated eigenvalues.  The eigenvectors for
-          the current problem are multiplied with the eigenvectors from
-          the overall problem.
-
-    Arguments
-    =========
-
-    ICOMPQ  (input) INTEGER
-            = 0:  Compute eigenvalues only.
-            = 1:  Compute eigenvectors of original dense symmetric matrix
-                  also.  On entry, Q contains the orthogonal matrix used
-                  to reduce the original matrix to tridiagonal form.
-
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    QSIZ   (input) INTEGER
-           The dimension of the orthogonal matrix used to reduce
-           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
-
-    TLVLS  (input) INTEGER
-           The total number of merging levels in the overall divide and
-           conquer tree.
-
-    CURLVL (input) INTEGER
-           The current level in the overall merge routine,
-           0 <= CURLVL <= TLVLS.
-
-    CURPBM (input) INTEGER
-           The current problem in the current level in the overall
-           merge routine (counting from upper left to lower right).
-
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry, the eigenvalues of the rank-1-perturbed matrix.
-           On exit, the eigenvalues of the repaired matrix.
-
-    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
-           On entry, the eigenvectors of the rank-1-perturbed matrix.
-           On exit, the eigenvectors of the repaired tridiagonal matrix.
-
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max(1,N).
-
-    INDXQ  (output) INTEGER array, dimension (N)
-           The permutation which will reintegrate the subproblem just
-           solved back into sorted order, i.e., D( INDXQ( I = 1, N ) )
-           will be in ascending order.
-
-    RHO    (input) DOUBLE PRECISION
-           The subdiagonal element used to create the rank-1
-           modification.
-
-    CUTPNT (input) INTEGER
-           Contains the location of the last eigenvalue in the leading
-           sub-matrix.  min(1,N) <= CUTPNT <= N.
-
-    QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1)
-           Stores eigenvectors of submatrices encountered during
-           divide and conquer, packed together. QPTR points to
-           beginning of the submatrices.
-
-    QPTR   (input/output) INTEGER array, dimension (N+2)
-           List of indices pointing to beginning of submatrices stored
-           in QSTORE. The submatrices are numbered starting at the
-           bottom left of the divide and conquer tree, from left to
-           right and bottom to top.
-
-    PRMPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in PERM a
-           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
-           indicates the size of the permutation and also the size of
-           the full, non-deflated problem.
-
-    PERM   (input) INTEGER array, dimension (N lg N)
-           Contains the permutations (from deflation and sorting) to be
-           applied to each eigenblock.
-
-    GIVPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in GIVCOL a
-           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
-           indicates the number of Givens rotations.
-
-    GIVCOL (input) INTEGER array, dimension (2, N lg N)
-           Each pair of numbers indicates a pair of columns to take place
-           in a Givens rotation.
-
-    GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
-           Each number indicates the S value to be used in the
-           corresponding Givens rotation.
-
-    WORK   (workspace) DOUBLE PRECISION array, dimension (3*N+QSIZ*N)
-
-    IWORK  (workspace) INTEGER array, dimension (4*N)
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an eigenvalue did not converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --indxq;
-    --qstore;
-    --qptr;
-    --prmptr;
-    --perm;
-    --givptr;
-    givcol -= 3;
-    givnum -= 3;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*icompq == 1 && *qsiz < *n) {
-	*info = -4;
-    } else if (*ldq < max(1,*n)) {
-	*info = -9;
-    } else if ((min(1,*n) > *cutpnt) || (*n < *cutpnt)) {
-	*info = -12;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAED7", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*
-       The following values are for bookkeeping purposes only.  They are
-       integer pointers which indicate the portion of the workspace
-       used by a particular array in DLAED8 and DLAED9.
-*/
-
-    if (*icompq == 1) {
-	ldq2 = *qsiz;
-    } else {
-	ldq2 = *n;
-    }
-
-    iz = 1;
-    idlmda = iz + *n;
-    iw = idlmda + *n;
-    iq2 = iw + *n;
-    is = iq2 + *n * ldq2;
-
-    indx = 1;
-    indxc = indx + *n;
-    coltyp = indxc + *n;
-    indxp = coltyp + *n;
-
-/*
-       Form the z-vector which consists of the last row of Q_1 and the
-       first row of Q_2.
-*/
-
-    ptr = pow_ii(&c__2, tlvls) + 1;
-    i__1 = *curlvl - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = *tlvls - i__;
-	ptr += pow_ii(&c__2, &i__2);
-/* L10: */
-    }
-    curr = ptr + *curpbm;
-    dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
-	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz
-	    + *n], info);
-
-/*
-       When solving the final problem, we no longer need the stored data,
-       so we will overwrite the data from this level onto the previously
-       used storage space.
-*/
-
-    if (*curlvl == *tlvls) {
-	qptr[curr] = 1;
-	prmptr[curr] = 1;
-	givptr[curr] = 1;
-    }
-
-/*     Sort and Deflate eigenvalues. */
-
-    dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho,
-	    cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], &
-	    perm[prmptr[curr]], &givptr[curr + 1], &givcol[((givptr[curr]) <<
-	    (1)) + 1], &givnum[((givptr[curr]) << (1)) + 1], &iwork[indxp], &
-	    iwork[indx], info);
-    prmptr[curr + 1] = prmptr[curr] + *n;
-    givptr[curr + 1] += givptr[curr];
-
-/*     Solve Secular Equation. */
-
-    if (k != 0) {
-	dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda],
-		&work[iw], &qstore[qptr[curr]], &k, info);
-	if (*info != 0) {
-	    goto L30;
-	}
-	if (*icompq == 1) {
-	    dgemm_("N", "N", qsiz, &k, &k, &c_b2865, &work[iq2], &ldq2, &
-		    qstore[qptr[curr]], &k, &c_b2879, &q[q_offset], ldq);
-	}
-/* Computing 2nd power */
-	i__1 = k;
-	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
-
-/*     Prepare the INDXQ sorting permutation. */
-
-	n1 = k;
-	n2 = *n - k;
-	dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
-    } else {
-	qptr[curr + 1] = qptr[curr];
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    indxq[i__] = i__;
-/* L20: */
-	}
-    }
-
-L30:
-    return 0;
-
-/*     End of DLAED7 */
-
-} /* dlaed7_ */
-
-/* Subroutine */ int dlaed8_(integer *icompq, integer *k, integer *n, integer
-	*qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
-	doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda,
-	 doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer
-	*givptr, integer *givcol, doublereal *givnum, integer *indxp, integer
-	*indx, integer *info)
-{
-    /* System generated locals */
-    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal c__;
-    static integer i__, j;
-    static doublereal s, t;
-    static integer k2, n1, n2, jp, n1p1;
-    static doublereal eps, tau, tol;
-    static integer jlam, imax, jmax;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *), dscal_(
-	    integer *, doublereal *, doublereal *, integer *), dcopy_(integer
-	    *, doublereal *, integer *, doublereal *, integer *);
-
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), dlacpy_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    DLAED8 merges the two sets of eigenvalues together into a single
-    sorted set.  Then it tries to deflate the size of the problem.
-    There are two ways in which deflation can occur:  when two or more
-    eigenvalues are close together or if there is a tiny element in the
-    Z vector.  For each such occurrence the order of the related secular
-    equation problem is reduced by one.
-
-    Arguments
-    =========
-
-    ICOMPQ  (input) INTEGER
-            = 0:  Compute eigenvalues only.
-            = 1:  Compute eigenvectors of original dense symmetric matrix
-                  also.  On entry, Q contains the orthogonal matrix used
-                  to reduce the original matrix to tridiagonal form.
-
-    K      (output) INTEGER
-           The number of non-deflated eigenvalues, and the order of the
-           related secular equation.
-
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    QSIZ   (input) INTEGER
-           The dimension of the orthogonal matrix used to reduce
-           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
-
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry, the eigenvalues of the two submatrices to be
-           combined.  On exit, the trailing (N-K) updated eigenvalues
-           (those which were deflated) sorted into increasing order.
-
-    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
-           If ICOMPQ = 0, Q is not referenced.  Otherwise,
-           on entry, Q contains the eigenvectors of the partially solved
-           system which has been previously updated in matrix
-           multiplies with other partially solved eigensystems.
-           On exit, Q contains the trailing (N-K) updated eigenvectors
-           (those which were deflated) in its last N-K columns.
-
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max(1,N).
-
-    INDXQ  (input) INTEGER array, dimension (N)
-           The permutation which separately sorts the two sub-problems
-           in D into ascending order.  Note that elements in the second
-           half of this permutation must first have CUTPNT added to
-           their values in order to be accurate.
-
-    RHO    (input/output) DOUBLE PRECISION
-           On entry, the off-diagonal element associated with the rank-1
-           cut which originally split the two submatrices which are now
-           being recombined.
-           On exit, RHO has been modified to the value required by
-           DLAED3.
-
-    CUTPNT (input) INTEGER
-           The location of the last eigenvalue in the leading
-           sub-matrix.  min(1,N) <= CUTPNT <= N.
-
-    Z      (input) DOUBLE PRECISION array, dimension (N)
-           On entry, Z contains the updating vector (the last row of
-           the first sub-eigenvector matrix and the first row of the
-           second sub-eigenvector matrix).
-           On exit, the contents of Z are destroyed by the updating
-           process.
-
-    DLAMDA (output) DOUBLE PRECISION array, dimension (N)
-           A copy of the first K eigenvalues which will be used by
-           DLAED3 to form the secular equation.
-
-    Q2     (output) DOUBLE PRECISION array, dimension (LDQ2,N)
-           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
-           a copy of the first K eigenvectors which will be used by
-           DLAED7 in a matrix multiply (DGEMM) to update the new
-           eigenvectors.
-
-    LDQ2   (input) INTEGER
-           The leading dimension of the array Q2.  LDQ2 >= max(1,N).
-
-    W      (output) DOUBLE PRECISION array, dimension (N)
-           The first k values of the final deflation-altered z-vector and
-           will be passed to DLAED3.
-
-    PERM   (output) INTEGER array, dimension (N)
-           The permutations (from deflation and sorting) to be applied
-           to each eigenblock.
-
-    GIVPTR (output) INTEGER
-           The number of Givens rotations which took place in this
-           subproblem.
-
-    GIVCOL (output) INTEGER array, dimension (2, N)
-           Each pair of numbers indicates a pair of columns to take place
-           in a Givens rotation.
-
-    GIVNUM (output) DOUBLE PRECISION array, dimension (2, N)
-           Each number indicates the S value to be used in the
-           corresponding Givens rotation.
-
-    INDXP  (workspace) INTEGER array, dimension (N)
-           The permutation used to place deflated values of D at the end
-           of the array.  INDXP(1:K) points to the nondeflated D-values
-           and INDXP(K+1:N) points to the deflated eigenvalues.
-
-    INDX   (workspace) INTEGER array, dimension (N)
-           The permutation used to sort the contents of D into ascending
-           order.
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --indxq;
-    --z__;
-    --dlamda;
-    q2_dim1 = *ldq2;
-    q2_offset = 1 + q2_dim1;
-    q2 -= q2_offset;
-    --w;
-    --perm;
-    givcol -= 3;
-    givnum -= 3;
-    --indxp;
-    --indx;
-
-    /* Function Body */
-    *info = 0;
-
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*icompq == 1 && *qsiz < *n) {
-	*info = -4;
-    } else if (*ldq < max(1,*n)) {
-	*info = -7;
-    } else if ((*cutpnt < min(1,*n)) || (*cutpnt > *n)) {
-	*info = -10;
-    } else if (*ldq2 < max(1,*n)) {
-	*info = -14;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAED8", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    n1 = *cutpnt;
-    n2 = *n - n1;
-    n1p1 = n1 + 1;
-
-    if (*rho < 0.) {
-	dscal_(&n2, &c_b3001, &z__[n1p1], &c__1);
-    }
-
-/*     Normalize z so that norm(z) = 1 */
-
-    t = 1. / sqrt(2.);
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	indx[j] = j;
-/* L10: */
-    }
-    dscal_(n, &t, &z__[1], &c__1);
-    *rho = (d__1 = *rho * 2., abs(d__1));
-
-/*     Sort the eigenvalues into increasing order */
-
-    i__1 = *n;
-    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
-	indxq[i__] += *cutpnt;
-/* L20: */
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dlamda[i__] = d__[indxq[i__]];
-	w[i__] = z__[indxq[i__]];
-/* L30: */
-    }
-    i__ = 1;
-    j = *cutpnt + 1;
-    dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	d__[i__] = dlamda[indx[i__]];
-	z__[i__] = w[indx[i__]];
-/* L40: */
-    }
-
-/*     Calculate the allowable deflation tolerence */
-
-    imax = idamax_(n, &z__[1], &c__1);
-    jmax = idamax_(n, &d__[1], &c__1);
-    eps = EPSILON;
-    tol = eps * 8. * (d__1 = d__[jmax], abs(d__1));
-
-/*
-       If the rank-1 modifier is small enough, no more needs to be done
-       except to reorganize Q so that its columns correspond with the
-       elements in D.
-*/
-
-    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
-	*k = 0;
-	if (*icompq == 0) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		perm[j] = indxq[indx[j]];
-/* L50: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		perm[j] = indxq[indx[j]];
-		dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1
-			+ 1], &c__1);
-/* L60: */
-	    }
-	    dlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
-	}
-	return 0;
-    }
-
-/*
-       If there are multiple eigenvalues then the problem deflates.  Here
-       the number of equal eigenvalues are found.  As each equal
-       eigenvalue is found, an elementary reflector is computed to rotate
-       the corresponding eigensubspace so that the corresponding
-       components of Z are zero in this new basis.
-*/
-
-    *k = 0;
-    *givptr = 0;
-    k2 = *n + 1;
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
-
-/*           Deflate due to small z component. */
-
-	    --k2;
-	    indxp[k2] = j;
-	    if (j == *n) {
-		goto L110;
-	    }
-	} else {
-	    jlam = j;
-	    goto L80;
-	}
-/* L70: */
-    }
-L80:
-    ++j;
-    if (j > *n) {
-	goto L100;
-    }
-    if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
-
-/*        Deflate due to small z component. */
-
-	--k2;
-	indxp[k2] = j;
-    } else {
-
-/*        Check if eigenvalues are close enough to allow deflation. */
-
-	s = z__[jlam];
-	c__ = z__[j];
-
-/*
-          Find sqrt(a**2+b**2) without overflow or
-          destructive underflow.
-*/
-
-	tau = dlapy2_(&c__, &s);
-	t = d__[j] - d__[jlam];
-	c__ /= tau;
-	s = -s / tau;
-	if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
-
-/*           Deflation is possible. */
-
-	    z__[j] = tau;
-	    z__[jlam] = 0.;
-
-/*           Record the appropriate Givens rotation */
-
-	    ++(*givptr);
-	    givcol[((*givptr) << (1)) + 1] = indxq[indx[jlam]];
-	    givcol[((*givptr) << (1)) + 2] = indxq[indx[j]];
-	    givnum[((*givptr) << (1)) + 1] = c__;
-	    givnum[((*givptr) << (1)) + 2] = s;
-	    if (*icompq == 1) {
-		drot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[
-			indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
-	    }
-	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
-	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
-	    d__[jlam] = t;
-	    --k2;
-	    i__ = 1;
-L90:
-	    if (k2 + i__ <= *n) {
-		if (d__[jlam] < d__[indxp[k2 + i__]]) {
-		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
-		    indxp[k2 + i__] = jlam;
-		    ++i__;
-		    goto L90;
-		} else {
-		    indxp[k2 + i__ - 1] = jlam;
-		}
-	    } else {
-		indxp[k2 + i__ - 1] = jlam;
-	    }
-	    jlam = j;
-	} else {
-	    ++(*k);
-	    w[*k] = z__[jlam];
-	    dlamda[*k] = d__[jlam];
-	    indxp[*k] = jlam;
-	    jlam = j;
-	}
-    }
-    goto L80;
-L100:
-
-/*     Record the last eigenvalue. */
-
-    ++(*k);
-    w[*k] = z__[jlam];
-    dlamda[*k] = d__[jlam];
-    indxp[*k] = jlam;
-
-L110:
-
-/*
-       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
-       and Q2 respectively.  The eigenvalues/vectors which were not
-       deflated go into the first K slots of DLAMDA and Q2 respectively,
-       while those which were deflated go into the last N - K slots.
-*/
-
-    if (*icompq == 0) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    jp = indxp[j];
-	    dlamda[j] = d__[jp];
-	    perm[j] = indxq[indx[jp]];
-/* L120: */
-	}
-    } else {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    jp = indxp[j];
-	    dlamda[j] = d__[jp];
-	    perm[j] = indxq[indx[jp]];
-	    dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
-		    , &c__1);
-/* L130: */
-	}
-    }
-
-/*
-       The deflated eigenvalues and their corresponding vectors go back
-       into the last N - K slots of D and Q respectively.
-*/
-
-    if (*k < *n) {
-	if (*icompq == 0) {
-	    i__1 = *n - *k;
-	    dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
-	} else {
-	    i__1 = *n - *k;
-	    dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
-	    i__1 = *n - *k;
-	    dlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*
-		    k + 1) * q_dim1 + 1], ldq);
-	}
-    }
-
-    return 0;
-
-/*     End of DLAED8 */
-
-} /* dlaed8_ */
-
-/* Subroutine */ int dlaed9_(integer *k, integer *kstart, integer *kstop,
-	integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal *
-	rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds,
-	integer *info)
-{
-    /* System generated locals */
-    integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static integer i__, j;
-    static doublereal temp;
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlaed4_(integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, integer *);
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    DLAED9 finds the roots of the secular equation, as defined by the
-    values in D, Z, and RHO, between KSTART and KSTOP.  It makes the
-    appropriate calls to DLAED4 and then stores the new matrix of
-    eigenvectors for use in calculating the next level of Z vectors.
-
-    Arguments
-    =========
-
-    K       (input) INTEGER
-            The number of terms in the rational function to be solved by
-            DLAED4.  K >= 0.
-
-    KSTART  (input) INTEGER
-    KSTOP   (input) INTEGER
-            The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP
-            are to be computed.  1 <= KSTART <= KSTOP <= K.
-
-    N       (input) INTEGER
-            The number of rows and columns in the Q matrix.
-            N >= K (delation may result in N > K).
-
-    D       (output) DOUBLE PRECISION array, dimension (N)
-            D(I) contains the updated eigenvalues
-            for KSTART <= I <= KSTOP.
-
-    Q       (workspace) DOUBLE PRECISION array, dimension (LDQ,N)
-
-    LDQ     (input) INTEGER
-            The leading dimension of the array Q.  LDQ >= max( 1, N ).
-
-    RHO     (input) DOUBLE PRECISION
-            The value of the parameter in the rank one update equation.
-            RHO >= 0 required.
-
-    DLAMDA  (input) DOUBLE PRECISION array, dimension (K)
-            The first K elements of this array contain the old roots
-            of the deflated updating problem.  These are the poles
-            of the secular equation.
-
-    W       (input) DOUBLE PRECISION array, dimension (K)
-            The first K elements of this array contain the components
-            of the deflation-adjusted updating vector.
-
-    S       (output) DOUBLE PRECISION array, dimension (LDS, K)
-            Will contain the eigenvectors of the repaired matrix which
-            will be stored for subsequent Z vector calculation and
-            multiplied by the previously accumulated eigenvectors
-            to update the system.
-
-    LDS     (input) INTEGER
-            The leading dimension of S.  LDS >= max( 1, K ).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an eigenvalue did not converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --dlamda;
-    --w;
-    s_dim1 = *lds;
-    s_offset = 1 + s_dim1;
-    s -= s_offset;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*k < 0) {
-	*info = -1;
-    } else if ((*kstart < 1) || (*kstart > max(1,*k))) {
-	*info = -2;
-    } else if ((max(1,*kstop) < *kstart) || (*kstop > max(1,*k))) {
-	*info = -3;
-    } else if (*n < *k) {
-	*info = -4;
-    } else if (*ldq < max(1,*k)) {
-	*info = -7;
-    } else if (*lds < max(1,*k)) {
-	*info = -12;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAED9", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*k == 0) {
-	return 0;
-    }
-
-/*
-       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
-       be computed with high relative accuracy (barring over/underflow).
-       This is a problem on machines without a guard digit in
-       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
-       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
-       which on any of these machines zeros out the bottommost
-       bit of DLAMDA(I) if it is 1; this makes the subsequent
-       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
-       occurs. On binary machines with a guard digit (almost all
-       machines) it does not change DLAMDA(I) at all. On hexadecimal
-       and decimal machines with a guard digit, it slightly
-       changes the bottommost bits of DLAMDA(I). It does not account
-       for hexadecimal or decimal machines without guard digits
-       (we know of none). We use a subroutine call to compute
-       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
-       this code.
-*/
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
-/* L10: */
-    }
-
-    i__1 = *kstop;
-    for (j = *kstart; j <= i__1; ++j) {
-	dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
-		info);
-
-/*        If the zero finder fails, the computation is terminated. */
-
-	if (*info != 0) {
-	    goto L120;
-	}
-/* L20: */
-    }
-
-    if ((*k == 1) || (*k == 2)) {
-	i__1 = *k;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = *k;
-	    for (j = 1; j <= i__2; ++j) {
-		s[j + i__ * s_dim1] = q[j + i__ * q_dim1];
-/* L30: */
-	    }
-/* L40: */
-	}
-	goto L120;
-    }
-
-/*     Compute updated W. */
-
-    dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1);
-
-/*     Initialize W(I) = Q(I,I) */
-
-    i__1 = *ldq + 1;
-    dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = j - 1;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
-/* L50: */
-	}
-	i__2 = *k;
-	for (i__ = j + 1; i__ <= i__2; ++i__) {
-	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
-/* L60: */
-	}
-/* L70: */
-    }
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	d__1 = sqrt(-w[i__]);
-	w[i__] = d_sign(&d__1, &s[i__ + s_dim1]);
-/* L80: */
-    }
-
-/*     Compute eigenvectors of the modified rank-1 modification. */
-
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *k;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1];
-/* L90: */
-	}
-	temp = dnrm2_(k, &q[j * q_dim1 + 1], &c__1);
-	i__2 = *k;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp;
-/* L100: */
-	}
-/* L110: */
-    }
-
-L120:
-    return 0;
-
-/*     End of DLAED9 */
-
-} /* dlaed9_ */
-
-/* Subroutine */ int dlaeda_(integer *n, integer *tlvls, integer *curlvl,
-	integer *curpbm, integer *prmptr, integer *perm, integer *givptr,
-	integer *givcol, doublereal *givnum, doublereal *q, integer *qptr,
-	doublereal *z__, doublereal *ztemp, integer *info)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-
-    /* Builtin functions */
-    integer pow_ii(integer *, integer *);
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, k, mid, ptr;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static integer curr, bsiz1, bsiz2, psiz1, psiz2, zptr1;
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *), dcopy_(integer *,
-	    doublereal *, integer *, doublereal *, integer *), xerbla_(char *,
-	     integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    DLAEDA computes the Z vector corresponding to the merge step in the
-    CURLVLth step of the merge process with TLVLS steps for the CURPBMth
-    problem.
-
-    Arguments
-    =========
-
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    TLVLS  (input) INTEGER
-           The total number of merging levels in the overall divide and
-           conquer tree.
-
-    CURLVL (input) INTEGER
-           The current level in the overall merge routine,
-           0 <= curlvl <= tlvls.
-
-    CURPBM (input) INTEGER
-           The current problem in the current level in the overall
-           merge routine (counting from upper left to lower right).
-
-    PRMPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in PERM a
-           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
-           indicates the size of the permutation and incidentally the
-           size of the full, non-deflated problem.
-
-    PERM   (input) INTEGER array, dimension (N lg N)
-           Contains the permutations (from deflation and sorting) to be
-           applied to each eigenblock.
-
-    GIVPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in GIVCOL a
-           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
-           indicates the number of Givens rotations.
-
-    GIVCOL (input) INTEGER array, dimension (2, N lg N)
-           Each pair of numbers indicates a pair of columns to take place
-           in a Givens rotation.
-
-    GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
-           Each number indicates the S value to be used in the
-           corresponding Givens rotation.
-
-    Q      (input) DOUBLE PRECISION array, dimension (N**2)
-           Contains the square eigenblocks from previous levels, the
-           starting positions for blocks are given by QPTR.
-
-    QPTR   (input) INTEGER array, dimension (N+2)
-           Contains a list of pointers which indicate where in Q an
-           eigenblock is stored.  SQRT( QPTR(i+1) - QPTR(i) ) indicates
-           the size of the block.
-
-    Z      (output) DOUBLE PRECISION array, dimension (N)
-           On output this vector contains the updating vector (the last
-           row of the first sub-eigenvector matrix and the first row of
-           the second sub-eigenvector matrix).
-
-    ZTEMP  (workspace) DOUBLE PRECISION array, dimension (N)
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --ztemp;
-    --z__;
-    --qptr;
-    --q;
-    givnum -= 3;
-    givcol -= 3;
-    --givptr;
-    --perm;
-    --prmptr;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*n < 0) {
-	*info = -1;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAEDA", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Determine location of first number in second half. */
-
-    mid = *n / 2 + 1;
-
-/*     Gather last/first rows of appropriate eigenblocks into center of Z */
-
-    ptr = 1;
-
-/*
-       Determine location of lowest level subproblem in the full storage
-       scheme
-*/
-
-    i__1 = *curlvl - 1;
-    curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
-
-/*
-       Determine size of these matrices.  We add HALF to the value of
-       the SQRT in case the machine underestimates one of these square
-       roots.
-*/
-
-    bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5);
-    bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])) +
-	    .5);
-    i__1 = mid - bsiz1 - 1;
-    for (k = 1; k <= i__1; ++k) {
-	z__[k] = 0.;
-/* L10: */
-    }
-    dcopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], &
-	    c__1);
-    dcopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
-    i__1 = *n;
-    for (k = mid + bsiz2; k <= i__1; ++k) {
-	z__[k] = 0.;
-/* L20: */
-    }
-
-/*
-       Loop thru remaining levels 1 -> CURLVL applying the Givens
-       rotations and permutation and then multiplying the center matrices
-       against the current Z.
-*/
-
-    ptr = pow_ii(&c__2, tlvls) + 1;
-    i__1 = *curlvl - 1;
-    for (k = 1; k <= i__1; ++k) {
-	i__2 = *curlvl - k;
-	i__3 = *curlvl - k - 1;
-	curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
-		1;
-	psiz1 = prmptr[curr + 1] - prmptr[curr];
-	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
-	zptr1 = mid - psiz1;
-
-/*       Apply Givens at CURR and CURR+1 */
-
-	i__2 = givptr[curr + 1] - 1;
-	for (i__ = givptr[curr]; i__ <= i__2; ++i__) {
-	    drot_(&c__1, &z__[zptr1 + givcol[((i__) << (1)) + 1] - 1], &c__1,
-		    &z__[zptr1 + givcol[((i__) << (1)) + 2] - 1], &c__1, &
-		    givnum[((i__) << (1)) + 1], &givnum[((i__) << (1)) + 2]);
-/* L30: */
-	}
-	i__2 = givptr[curr + 2] - 1;
-	for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) {
-	    drot_(&c__1, &z__[mid - 1 + givcol[((i__) << (1)) + 1]], &c__1, &
-		    z__[mid - 1 + givcol[((i__) << (1)) + 2]], &c__1, &givnum[
-		    ((i__) << (1)) + 1], &givnum[((i__) << (1)) + 2]);
-/* L40: */
-	}
-	psiz1 = prmptr[curr + 1] - prmptr[curr];
-	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
-	i__2 = psiz1 - 1;
-	for (i__ = 0; i__ <= i__2; ++i__) {
-	    ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
-/* L50: */
-	}
-	i__2 = psiz2 - 1;
-	for (i__ = 0; i__ <= i__2; ++i__) {
-	    ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] -
-		    1];
-/* L60: */
-	}
-
-/*
-          Multiply Blocks at CURR and CURR+1
-
-          Determine size of these matrices.  We add HALF to the value of
-          the SQRT in case the machine underestimates one of these
-          square roots.
-*/
-
-	bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) +
-		.5);
-	bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])
-		) + .5);
-	if (bsiz1 > 0) {
-	    dgemv_("T", &bsiz1, &bsiz1, &c_b2865, &q[qptr[curr]], &bsiz1, &
-		    ztemp[1], &c__1, &c_b2879, &z__[zptr1], &c__1);
-	}
-	i__2 = psiz1 - bsiz1;
-	dcopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
-	if (bsiz2 > 0) {
-	    dgemv_("T", &bsiz2, &bsiz2, &c_b2865, &q[qptr[curr + 1]], &bsiz2,
-		    &ztemp[psiz1 + 1], &c__1, &c_b2879, &z__[mid], &c__1);
-	}
-	i__2 = psiz2 - bsiz2;
-	dcopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], &
-		c__1);
-
-	i__2 = *tlvls - k;
-	ptr += pow_ii(&c__2, &i__2);
-/* L70: */
-    }
-
-    return 0;
-
-/*     End of DLAEDA */
-
-} /* dlaeda_ */
-
-/* Subroutine */ int dlaev2_(doublereal *a, doublereal *b, doublereal *c__,
-	doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1)
-{
-    /* System generated locals */
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal ab, df, cs, ct, tb, sm, tn, rt, adf, acs;
-    static integer sgn1, sgn2;
-    static doublereal acmn, acmx;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
-       [  A   B  ]
-       [  B   C  ].
-    On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
-    eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
-    eigenvector for RT1, giving the decomposition
-
-       [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
-       [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
-
-    Arguments
-    =========
-
-    A       (input) DOUBLE PRECISION
-            The (1,1) element of the 2-by-2 matrix.
-
-    B       (input) DOUBLE PRECISION
-            The (1,2) element and the conjugate of the (2,1) element of
-            the 2-by-2 matrix.
-
-    C       (input) DOUBLE PRECISION
-            The (2,2) element of the 2-by-2 matrix.
-
-    RT1     (output) DOUBLE PRECISION
-            The eigenvalue of larger absolute value.
-
-    RT2     (output) DOUBLE PRECISION
-            The eigenvalue of smaller absolute value.
-
-    CS1     (output) DOUBLE PRECISION
-    SN1     (output) DOUBLE PRECISION
-            The vector (CS1, SN1) is a unit right eigenvector for RT1.
-
-    Further Details
-    ===============
-
-    RT1 is accurate to a few ulps barring over/underflow.
-
-    RT2 may be inaccurate if there is massive cancellation in the
-    determinant A*C-B*B; higher precision or correctly rounded or
-    correctly truncated arithmetic would be needed to compute RT2
-    accurately in all cases.
-
-    CS1 and SN1 are accurate to a few ulps barring over/underflow.
-
-    Overflow is possible only if RT1 is within a factor of 5 of overflow.
-    Underflow is harmless if the input data is 0 or exceeds
-       underflow_threshold / macheps.
-
-   =====================================================================
-
-
-       Compute the eigenvalues
-*/
-
-    sm = *a + *c__;
-    df = *a - *c__;
-    adf = abs(df);
-    tb = *b + *b;
-    ab = abs(tb);
-    if (abs(*a) > abs(*c__)) {
-	acmx = *a;
-	acmn = *c__;
-    } else {
-	acmx = *c__;
-	acmn = *a;
-    }
-    if (adf > ab) {
-/* Computing 2nd power */
-	d__1 = ab / adf;
-	rt = adf * sqrt(d__1 * d__1 + 1.);
-    } else if (adf < ab) {
-/* Computing 2nd power */
-	d__1 = adf / ab;
-	rt = ab * sqrt(d__1 * d__1 + 1.);
-    } else {
-
-/*        Includes case AB=ADF=0 */
-
-	rt = ab * sqrt(2.);
-    }
-    if (sm < 0.) {
-	*rt1 = (sm - rt) * .5;
-	sgn1 = -1;
-
-/*
-          Order of execution important.
-          To get fully accurate smaller eigenvalue,
-          next line needs to be executed in higher precision.
-*/
-
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else if (sm > 0.) {
-	*rt1 = (sm + rt) * .5;
-	sgn1 = 1;
-
-/*
-          Order of execution important.
-          To get fully accurate smaller eigenvalue,
-          next line needs to be executed in higher precision.
-*/
-
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else {
-
-/*        Includes case RT1 = RT2 = 0 */
-
-	*rt1 = rt * .5;
-	*rt2 = rt * -.5;
-	sgn1 = 1;
-    }
-
-/*     Compute the eigenvector */
-
-    if (df >= 0.) {
-	cs = df + rt;
-	sgn2 = 1;
-    } else {
-	cs = df - rt;
-	sgn2 = -1;
-    }
-    acs = abs(cs);
-    if (acs > ab) {
-	ct = -tb / cs;
-	*sn1 = 1. / sqrt(ct * ct + 1.);
-	*cs1 = ct * *sn1;
-    } else {
-	if (ab == 0.) {
-	    *cs1 = 1.;
-	    *sn1 = 0.;
-	} else {
-	    tn = -cs / tb;
-	    *cs1 = 1. / sqrt(tn * tn + 1.);
-	    *sn1 = tn * *cs1;
-	}
-    }
-    if (sgn1 == sgn2) {
-	tn = *cs1;
-	*cs1 = -(*sn1);
-	*sn1 = tn;
-    }
-    return 0;
-
-/*     End of DLAEV2 */
-
-} /* dlaev2_ */
-
-/* Subroutine */ int dlahqr_(logical *wantt, logical *wantz, integer *n,
-	integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal
-	*wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__,
-	integer *ldz, integer *info)
-{
-    /* System generated locals */
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static integer i__, j, k, l, m;
-    static doublereal s, v[3];
-    static integer i1, i2;
-    static doublereal t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22,
-	    h33, h44;
-    static integer nh;
-    static doublereal cs;
-    static integer nr;
-    static doublereal sn;
-    static integer nz;
-    static doublereal ave, h33s, h44s;
-    static integer itn, its;
-    static doublereal ulp, sum, tst1, h43h34, disc, unfl, ovfl;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static doublereal work[1];
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlanv2_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *), dlabad_(
-	    doublereal *, doublereal *);
-
-    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
-	     integer *, doublereal *);
-    extern doublereal dlanhs_(char *, integer *, doublereal *, integer *,
-	    doublereal *);
-    static doublereal smlnum;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLAHQR is an auxiliary routine called by DHSEQR to update the
-    eigenvalues and Schur decomposition already computed by DHSEQR, by
-    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
-
-    Arguments
-    =========
-
-    WANTT   (input) LOGICAL
-            = .TRUE. : the full Schur form T is required;
-            = .FALSE.: only eigenvalues are required.
-
-    WANTZ   (input) LOGICAL
-            = .TRUE. : the matrix of Schur vectors Z is required;
-            = .FALSE.: Schur vectors are not required.
-
-    N       (input) INTEGER
-            The order of the matrix H.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that H is already upper quasi-triangular in
-            rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless
-            ILO = 1). DLAHQR works primarily with the Hessenberg
-            submatrix in rows and columns ILO to IHI, but applies
-            transformations to all of H if WANTT is .TRUE..
-            1 <= ILO <= max(1,IHI); IHI <= N.
-
-    H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
-            On entry, the upper Hessenberg matrix H.
-            On exit, if WANTT is .TRUE., H is upper quasi-triangular in
-            rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in
-            standard form. If WANTT is .FALSE., the contents of H are
-            unspecified on exit.
-
-    LDH     (input) INTEGER
-            The leading dimension of the array H. LDH >= max(1,N).
-
-    WR      (output) DOUBLE PRECISION array, dimension (N)
-    WI      (output) DOUBLE PRECISION array, dimension (N)
-            The real and imaginary parts, respectively, of the computed
-            eigenvalues ILO to IHI are stored in the corresponding
-            elements of WR and WI. If two eigenvalues are computed as a
-            complex conjugate pair, they are stored in consecutive
-            elements of WR and WI, say the i-th and (i+1)th, with
-            WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
-            eigenvalues are stored in the same order as on the diagonal
-            of the Schur form returned in H, with WR(i) = H(i,i), and, if
-            H(i:i+1,i:i+1) is a 2-by-2 diagonal block,
-            WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i).
-
-    ILOZ    (input) INTEGER
-    IHIZ    (input) INTEGER
-            Specify the rows of Z to which transformations must be
-            applied if WANTZ is .TRUE..
-            1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
-
-    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
-            If WANTZ is .TRUE., on entry Z must contain the current
-            matrix Z of transformations accumulated by DHSEQR, and on
-            exit Z has been updated; transformations are applied only to
-            the submatrix Z(ILOZ:IHIZ,ILO:IHI).
-            If WANTZ is .FALSE., Z is not referenced.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z. LDZ >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            > 0: DLAHQR failed to compute all the eigenvalues ILO to IHI
-                 in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
-                 elements i+1:ihi of WR and WI contain those eigenvalues
-                 which have been successfully computed.
-
-    Further Details
-    ===============
-
-    2-96 Based on modifications by
-       David Day, Sandia National Laboratory, USA
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    h_dim1 = *ldh;
-    h_offset = 1 + h_dim1;
-    h__ -= h_offset;
-    --wr;
-    --wi;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-
-    /* Function Body */
-    *info = 0;
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-    if (*ilo == *ihi) {
-	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
-	wi[*ilo] = 0.;
-	return 0;
-    }
-
-    nh = *ihi - *ilo + 1;
-    nz = *ihiz - *iloz + 1;
-
-/*
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
-
-    unfl = SAFEMINIMUM;
-    ovfl = 1. / unfl;
-    dlabad_(&unfl, &ovfl);
-    ulp = PRECISION;
-    smlnum = unfl * (nh / ulp);
-
-/*
-       I1 and I2 are the indices of the first row and last column of H
-       to which transformations must be applied. If eigenvalues only are
-       being computed, I1 and I2 are set inside the main loop.
-*/
-
-    if (*wantt) {
-	i1 = 1;
-	i2 = *n;
-    }
-
-/*     ITN is the total number of QR iterations allowed. */
-
-    itn = nh * 30;
-
-/*
-       The main loop begins here. I is the loop index and decreases from
-       IHI to ILO in steps of 1 or 2. Each iteration of the loop works
-       with the active submatrix in rows and columns L to I.
-       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
-       H(L,L-1) is negligible so that the matrix splits.
-*/
-
-    i__ = *ihi;
-L10:
-    l = *ilo;
-    if (i__ < *ilo) {
-	goto L150;
-    }
-
-/*
-       Perform QR iterations on rows and columns ILO to I until a
-       submatrix of order 1 or 2 splits off at the bottom because a
-       subdiagonal element has become negligible.
-*/
-
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
-
-/*        Look for a single small subdiagonal element. */
-
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
-		     h__[k + k * h_dim1], abs(d__2));
-	    if (tst1 == 0.) {
-		i__3 = i__ - l + 1;
-		tst1 = dlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work);
-	    }
-/* Computing MAX */
-	    d__2 = ulp * tst1;
-	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2,
-		    smlnum)) {
-		goto L30;
-	    }
-/* L20: */
-	}
-L30:
-	l = k;
-	if (l > *ilo) {
-
-/*           H(L,L-1) is negligible */
-
-	    h__[l + (l - 1) * h_dim1] = 0.;
-	}
-
-/*        Exit from loop if a submatrix of order 1 or 2 has split off. */
-
-	if (l >= i__ - 1) {
-	    goto L140;
-	}
-
-/*
-          Now the active submatrix is in rows and columns L to I. If
-          eigenvalues only are being computed, only the active submatrix
-          need be transformed.
-*/
-
-	if (! (*wantt)) {
-	    i1 = l;
-	    i2 = i__;
-	}
-
-	if ((its == 10) || (its == 20)) {
-
-/*           Exceptional shift. */
-
-	    s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 =
-		    h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2));
-	    h44 = s * .75 + h__[i__ + i__ * h_dim1];
-	    h33 = h44;
-	    h43h34 = s * -.4375 * s;
-	} else {
-
-/*
-             Prepare to use Francis' double shift
-             (i.e. 2nd degree generalized Rayleigh quotient)
-*/
-
-	    h44 = h__[i__ + i__ * h_dim1];
-	    h33 = h__[i__ - 1 + (i__ - 1) * h_dim1];
-	    h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ *
-		    h_dim1];
-	    s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) *
-		    h_dim1];
-	    disc = (h33 - h44) * .5;
-	    disc = disc * disc + h43h34;
-	    if (disc > 0.) {
-
-/*              Real roots: use Wilkinson's shift twice */
-
-		disc = sqrt(disc);
-		ave = (h33 + h44) * .5;
-		if (abs(h33) - abs(h44) > 0.) {
-		    h33 = h33 * h44 - h43h34;
-		    h44 = h33 / (d_sign(&disc, &ave) + ave);
-		} else {
-		    h44 = d_sign(&disc, &ave) + ave;
-		}
-		h33 = h44;
-		h43h34 = 0.;
-	    }
-	}
-
-/*        Look for two consecutive small subdiagonal elements. */
-
-	i__2 = l;
-	for (m = i__ - 2; m >= i__2; --m) {
-/*
-             Determine the effect of starting the double-shift QR
-             iteration at row M, and see if this would make H(M,M-1)
-             negligible.
-*/
-
-	    h11 = h__[m + m * h_dim1];
-	    h22 = h__[m + 1 + (m + 1) * h_dim1];
-	    h21 = h__[m + 1 + m * h_dim1];
-	    h12 = h__[m + (m + 1) * h_dim1];
-	    h44s = h44 - h11;
-	    h33s = h33 - h11;
-	    v1 = (h33s * h44s - h43h34) / h21 + h12;
-	    v2 = h22 - h11 - h33s - h44s;
-	    v3 = h__[m + 2 + (m + 1) * h_dim1];
-	    s = abs(v1) + abs(v2) + abs(v3);
-	    v1 /= s;
-	    v2 /= s;
-	    v3 /= s;
-	    v[0] = v1;
-	    v[1] = v2;
-	    v[2] = v3;
-	    if (m == l) {
-		goto L50;
-	    }
-	    h00 = h__[m - 1 + (m - 1) * h_dim1];
-	    h10 = h__[m + (m - 1) * h_dim1];
-	    tst1 = abs(v1) * (abs(h00) + abs(h11) + abs(h22));
-	    if (abs(h10) * (abs(v2) + abs(v3)) <= ulp * tst1) {
-		goto L50;
-	    }
-/* L40: */
-	}
-L50:
-
-/*        Double-shift QR step */
-
-	i__2 = i__ - 1;
-	for (k = m; k <= i__2; ++k) {
-
-/*
-             The first iteration of this loop determines a reflection G
-             from the vector V and applies it from left and right to H,
-             thus creating a nonzero bulge below the subdiagonal.
-
-             Each subsequent iteration determines a reflection G to
-             restore the Hessenberg form in the (K-1)th column, and thus
-             chases the bulge one step toward the bottom of the active
-             submatrix. NR is the order of G.
-
-   Computing MIN
-*/
-	    i__3 = 3, i__4 = i__ - k + 1;
-	    nr = min(i__3,i__4);
-	    if (k > m) {
-		dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
-	    }
-	    dlarfg_(&nr, v, &v[1], &c__1, &t1);
-	    if (k > m) {
-		h__[k + (k - 1) * h_dim1] = v[0];
-		h__[k + 1 + (k - 1) * h_dim1] = 0.;
-		if (k < i__ - 1) {
-		    h__[k + 2 + (k - 1) * h_dim1] = 0.;
-		}
-	    } else if (m > l) {
-		h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1];
-	    }
-	    v2 = v[1];
-	    t2 = t1 * v2;
-	    if (nr == 3) {
-		v3 = v[2];
-		t3 = t1 * v3;
-
-/*
-                Apply G from the left to transform the rows of the matrix
-                in columns K to I2.
-*/
-
-		i__3 = i2;
-		for (j = k; j <= i__3; ++j) {
-		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]
-			    + v3 * h__[k + 2 + j * h_dim1];
-		    h__[k + j * h_dim1] -= sum * t1;
-		    h__[k + 1 + j * h_dim1] -= sum * t2;
-		    h__[k + 2 + j * h_dim1] -= sum * t3;
-/* L60: */
-		}
-
-/*
-                Apply G from the right to transform the columns of the
-                matrix in rows I1 to min(K+3,I).
-
-   Computing MIN
-*/
-		i__4 = k + 3;
-		i__3 = min(i__4,i__);
-		for (j = i1; j <= i__3; ++j) {
-		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
-			     + v3 * h__[j + (k + 2) * h_dim1];
-		    h__[j + k * h_dim1] -= sum * t1;
-		    h__[j + (k + 1) * h_dim1] -= sum * t2;
-		    h__[j + (k + 2) * h_dim1] -= sum * t3;
-/* L70: */
-		}
-
-		if (*wantz) {
-
-/*                 Accumulate transformations in the matrix Z */
-
-		    i__3 = *ihiz;
-		    for (j = *iloz; j <= i__3; ++j) {
-			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
-				z_dim1] + v3 * z__[j + (k + 2) * z_dim1];
-			z__[j + k * z_dim1] -= sum * t1;
-			z__[j + (k + 1) * z_dim1] -= sum * t2;
-			z__[j + (k + 2) * z_dim1] -= sum * t3;
-/* L80: */
-		    }
-		}
-	    } else if (nr == 2) {
-
-/*
-                Apply G from the left to transform the rows of the matrix
-                in columns K to I2.
-*/
-
-		i__3 = i2;
-		for (j = k; j <= i__3; ++j) {
-		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1];
-		    h__[k + j * h_dim1] -= sum * t1;
-		    h__[k + 1 + j * h_dim1] -= sum * t2;
-/* L90: */
-		}
-
-/*
-                Apply G from the right to transform the columns of the
-                matrix in rows I1 to min(K+3,I).
-*/
-
-		i__3 = i__;
-		for (j = i1; j <= i__3; ++j) {
-		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
-			    ;
-		    h__[j + k * h_dim1] -= sum * t1;
-		    h__[j + (k + 1) * h_dim1] -= sum * t2;
-/* L100: */
-		}
-
-		if (*wantz) {
-
-/*                 Accumulate transformations in the matrix Z */
-
-		    i__3 = *ihiz;
-		    for (j = *iloz; j <= i__3; ++j) {
-			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
-				z_dim1];
-			z__[j + k * z_dim1] -= sum * t1;
-			z__[j + (k + 1) * z_dim1] -= sum * t2;
-/* L110: */
-		    }
-		}
-	    }
-/* L120: */
-	}
-
-/* L130: */
-    }
-
-/*     Failure to converge in remaining number of iterations */
-
-    *info = i__;
-    return 0;
-
-L140:
-
-    if (l == i__) {
-
-/*        H(I,I-1) is negligible: one eigenvalue has converged. */
-
-	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.;
-    } else if (l == i__ - 1) {
-
-/*
-          H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
-
-          Transform the 2-by-2 submatrix to standard Schur form,
-          and compute and store the eigenvalues.
-*/
-
-	dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ *
-		h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ *
-		h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs,
-		&sn);
-
-	if (*wantt) {
-
-/*           Apply the transformation to the rest of H. */
-
-	    if (i2 > i__) {
-		i__1 = i2 - i__;
-		drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[
-			i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn);
-	    }
-	    i__1 = i__ - i1 - 1;
-	    drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ *
-		     h_dim1], &c__1, &cs, &sn);
-	}
-	if (*wantz) {
-
-/*           Apply the transformation to Z. */
-
-	    drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz +
-		    i__ * z_dim1], &c__1, &cs, &sn);
-	}
-    }
-
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with new value of I.
-*/
-
-    itn -= its;
-    i__ = l - 1;
-    goto L10;
-
-L150:
-    return 0;
-
-/*     End of DLAHQR */
-
-} /* dlahqr_ */
-
-/* Subroutine */ int dlahrd_(integer *n, integer *k, integer *nb, doublereal *
-	a, integer *lda, doublereal *tau, doublereal *t, integer *ldt,
-	doublereal *y, integer *ldy)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
-	    i__3;
-    doublereal d__1;
-
-    /* Local variables */
-    static integer i__;
-    static doublereal ei;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *), dgemv_(char *, integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *), dcopy_(integer *, doublereal *,
-	    integer *, doublereal *, integer *), daxpy_(integer *, doublereal
-	    *, doublereal *, integer *, doublereal *, integer *), dtrmv_(char
-	    *, char *, char *, integer *, doublereal *, integer *, doublereal
-	    *, integer *), dlarfg_(integer *,
-	    doublereal *, doublereal *, integer *, doublereal *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLAHRD reduces the first NB columns of a real general n-by-(n-k+1)
-    matrix A so that elements below the k-th subdiagonal are zero. The
-    reduction is performed by an orthogonal similarity transformation
-    Q' * A * Q. The routine returns the matrices V and T which determine
-    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
-
-    This is an auxiliary routine called by DGEHRD.
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The order of the matrix A.
-
-    K       (input) INTEGER
-            The offset for the reduction. Elements below the k-th
-            subdiagonal in the first NB columns are reduced to zero.
-
-    NB      (input) INTEGER
-            The number of columns to be reduced.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1)
-            On entry, the n-by-(n-k+1) general matrix A.
-            On exit, the elements on and above the k-th subdiagonal in
-            the first NB columns are overwritten with the corresponding
-            elements of the reduced matrix; the elements below the k-th
-            subdiagonal, with the array TAU, represent the matrix Q as a
-            product of elementary reflectors. The other columns of A are
-            unchanged. See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    TAU     (output) DOUBLE PRECISION array, dimension (NB)
-            The scalar factors of the elementary reflectors. See Further
-            Details.
-
-    T       (output) DOUBLE PRECISION array, dimension (LDT,NB)
-            The upper triangular matrix T.
-
-    LDT     (input) INTEGER
-            The leading dimension of the array T.  LDT >= NB.
-
-    Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
-            The n-by-nb matrix Y.
-
-    LDY     (input) INTEGER
-            The leading dimension of the array Y. LDY >= N.
-
-    Further Details
-    ===============
-
-    The matrix Q is represented as a product of nb elementary reflectors
-
-       Q = H(1) H(2) . . . H(nb).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
-    A(i+k+1:n,i), and tau in TAU(i).
-
-    The elements of the vectors v together form the (n-k+1)-by-nb matrix
-    V which is needed, with T and Y, to apply the transformation to the
-    unreduced part of the matrix, using an update of the form:
-    A := (I - V*T*V') * (A - Y*V').
-
-    The contents of A on exit are illustrated by the following example
-    with n = 7, k = 3 and nb = 2:
-
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
-       ( h   h   a   a   a )
-       ( v1  h   a   a   a )
-       ( v1  v2  a   a   a )
-       ( v1  v2  a   a   a )
-
-    where a denotes an element of the original matrix A, h denotes a
-    modified element of the upper Hessenberg matrix H, and vi denotes an
-    element of the vector defining H(i).
-
-    =====================================================================
-
-
-       Quick return if possible
-*/
-
-    /* Parameter adjustments */
-    --tau;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-    y_dim1 = *ldy;
-    y_offset = 1 + y_dim1;
-    y -= y_offset;
-
-    /* Function Body */
-    if (*n <= 1) {
-	return 0;
-    }
-
-    i__1 = *nb;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (i__ > 1) {
-
-/*
-             Update A(1:n,i)
-
-             Compute i-th column of A - Y * V'
-*/
-
-	    i__2 = i__ - 1;
-	    dgemv_("No transpose", n, &i__2, &c_b3001, &y[y_offset], ldy, &a[*
-		    k + i__ - 1 + a_dim1], lda, &c_b2865, &a[i__ * a_dim1 + 1]
-		    , &c__1);
-
-/*
-             Apply I - V * T' * V' to this column (call it b) from the
-             left, using the last column of T as workspace
-
-             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
-                      ( V2 )             ( b2 )
-
-             where V1 is unit lower triangular
-
-             w := V1' * b1
-*/
-
-	    i__2 = i__ - 1;
-	    dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
-		    1], &c__1);
-	    i__2 = i__ - 1;
-	    dtrmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1],
-		    lda, &t[*nb * t_dim1 + 1], &c__1);
-
-/*           w := w + V2'*b2 */
-
-	    i__2 = *n - *k - i__ + 1;
-	    i__3 = i__ - 1;
-	    dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[*k + i__ + a_dim1],
-		     lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2865, &t[*
-		    nb * t_dim1 + 1], &c__1);
-
-/*           w := T'*w */
-
-	    i__2 = i__ - 1;
-	    dtrmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt,
-		     &t[*nb * t_dim1 + 1], &c__1);
-
-/*           b2 := b2 - V2*w */
-
-	    i__2 = *n - *k - i__ + 1;
-	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[*k + i__ +
-		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b2865, &a[*
-		    k + i__ + i__ * a_dim1], &c__1);
-
-/*           b1 := b1 - V1*w */
-
-	    i__2 = i__ - 1;
-	    dtrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
-		    , lda, &t[*nb * t_dim1 + 1], &c__1);
-	    i__2 = i__ - 1;
-	    daxpy_(&i__2, &c_b3001, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 +
-		    i__ * a_dim1], &c__1);
-
-	    a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei;
-	}
-
-/*
-          Generate the elementary reflector H(i) to annihilate
-          A(k+i+1:n,i)
-*/
-
-	i__2 = *n - *k - i__ + 1;
-/* Computing MIN */
-	i__3 = *k + i__ + 1;
-	dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ *
-		a_dim1], &c__1, &tau[i__]);
-	ei = a[*k + i__ + i__ * a_dim1];
-	a[*k + i__ + i__ * a_dim1] = 1.;
-
-/*        Compute  Y(1:n,i) */
-
-	i__2 = *n - *k - i__ + 1;
-	dgemv_("No transpose", n, &i__2, &c_b2865, &a[(i__ + 1) * a_dim1 + 1],
-		 lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2879, &y[i__ *
-		y_dim1 + 1], &c__1);
-	i__2 = *n - *k - i__ + 1;
-	i__3 = i__ - 1;
-	dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[*k + i__ + a_dim1],
-		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2879, &t[i__ *
-		t_dim1 + 1], &c__1);
-	i__2 = i__ - 1;
-	dgemv_("No transpose", n, &i__2, &c_b3001, &y[y_offset], ldy, &t[i__ *
-		 t_dim1 + 1], &c__1, &c_b2865, &y[i__ * y_dim1 + 1], &c__1);
-	dscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
-
-/*        Compute T(1:i,i) */
-
-	i__2 = i__ - 1;
-	d__1 = -tau[i__];
-	dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1);
-	i__2 = i__ - 1;
-	dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
-		&t[i__ * t_dim1 + 1], &c__1)
-		;
-	t[i__ + i__ * t_dim1] = tau[i__];
-
-/* L10: */
-    }
-    a[*k + *nb + *nb * a_dim1] = ei;
-
-    return 0;
-
-/*     End of DLAHRD */
-
-} /* dlahrd_ */
-
-/* Subroutine */ int dlaln2_(logical *ltrans, integer *na, integer *nw,
-	doublereal *smin, doublereal *ca, doublereal *a, integer *lda,
-	doublereal *d1, doublereal *d2, doublereal *b, integer *ldb,
-	doublereal *wr, doublereal *wi, doublereal *x, integer *ldx,
-	doublereal *scale, doublereal *xnorm, integer *info)
-{
-    /* Initialized data */
-
-    static logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
-    static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
-    static integer ipivot[16]	/* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2,
-	    4,3,2,1 };
-
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset;
-    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
-    static doublereal equiv_0[4], equiv_1[4];
-
-    /* Local variables */
-    static integer j;
-#define ci (equiv_0)
-#define cr (equiv_1)
-    static doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22,
-	    cr21, cr22, li21, csi, ui11, lr21, ui12, ui22;
-#define civ (equiv_0)
-    static doublereal csr, ur11, ur12, ur22;
-#define crv (equiv_1)
-    static doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs;
-    static integer icmax;
-    static doublereal bnorm, cnorm, smini;
-
-    extern /* Subroutine */ int dladiv_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *);
-    static doublereal bignum, smlnum;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLALN2 solves a system of the form  (ca A - w D ) X = s B
-    or (ca A' - w D) X = s B   with possible scaling ("s") and
-    perturbation of A.  (A' means A-transpose.)
-
-    A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
-    real diagonal matrix, w is a real or complex value, and X and B are
-    NA x 1 matrices -- real if w is real, complex if w is complex.  NA
-    may be 1 or 2.
-
-    If w is complex, X and B are represented as NA x 2 matrices,
-    the first column of each being the real part and the second
-    being the imaginary part.
-
-    "s" is a scaling factor (.LE. 1), computed by DLALN2, which is
-    so chosen that X can be computed without overflow.  X is further
-    scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
-    than overflow.
-
-    If both singular values of (ca A - w D) are less than SMIN,
-    SMIN*identity will be used instead of (ca A - w D).  If only one
-    singular value is less than SMIN, one element of (ca A - w D) will be
-    perturbed enough to make the smallest singular value roughly SMIN.
-    If both singular values are at least SMIN, (ca A - w D) will not be
-    perturbed.  In any case, the perturbation will be at most some small
-    multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
-    are computed by infinity-norm approximations, and thus will only be
-    correct to a factor of 2 or so.
-
-    Note: all input quantities are assumed to be smaller than overflow
-    by a reasonable factor.  (See BIGNUM.)
-
-    Arguments
-    ==========
-
-    LTRANS  (input) LOGICAL
-            =.TRUE.:  A-transpose will be used.
-            =.FALSE.: A will be used (not transposed.)
-
-    NA      (input) INTEGER
-            The size of the matrix A.  It may (only) be 1 or 2.
-
-    NW      (input) INTEGER
-            1 if "w" is real, 2 if "w" is complex.  It may only be 1
-            or 2.
-
-    SMIN    (input) DOUBLE PRECISION
-            The desired lower bound on the singular values of A.  This
-            should be a safe distance away from underflow or overflow,
-            say, between (underflow/machine precision) and  (machine
-            precision * overflow ).  (See BIGNUM and ULP.)
-
-    CA      (input) DOUBLE PRECISION
-            The coefficient c, which A is multiplied by.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,NA)
-            The NA x NA matrix A.
-
-    LDA     (input) INTEGER
-            The leading dimension of A.  It must be at least NA.
-
-    D1      (input) DOUBLE PRECISION
-            The 1,1 element in the diagonal matrix D.
-
-    D2      (input) DOUBLE PRECISION
-            The 2,2 element in the diagonal matrix D.  Not used if NW=1.
-
-    B       (input) DOUBLE PRECISION array, dimension (LDB,NW)
-            The NA x NW matrix B (right-hand side).  If NW=2 ("w" is
-            complex), column 1 contains the real part of B and column 2
-            contains the imaginary part.
-
-    LDB     (input) INTEGER
-            The leading dimension of B.  It must be at least NA.
-
-    WR      (input) DOUBLE PRECISION
-            The real part of the scalar "w".
-
-    WI      (input) DOUBLE PRECISION
-            The imaginary part of the scalar "w".  Not used if NW=1.
-
-    X       (output) DOUBLE PRECISION array, dimension (LDX,NW)
-            The NA x NW matrix X (unknowns), as computed by DLALN2.
-            If NW=2 ("w" is complex), on exit, column 1 will contain
-            the real part of X and column 2 will contain the imaginary
-            part.
-
-    LDX     (input) INTEGER
-            The leading dimension of X.  It must be at least NA.
-
-    SCALE   (output) DOUBLE PRECISION
-            The scale factor that B must be multiplied by to insure
-            that overflow does not occur when computing X.  Thus,
-            (ca A - w D) X  will be SCALE*B, not B (ignoring
-            perturbations of A.)  It will be at most 1.
-
-    XNORM   (output) DOUBLE PRECISION
-            The infinity-norm of X, when X is regarded as an NA x NW
-            real matrix.
-
-    INFO    (output) INTEGER
-            An error flag.  It will be set to zero if no error occurs,
-            a negative number if an argument is in error, or a positive
-            number if  ca A - w D  had to be perturbed.
-            The possible values are:
-            = 0: No error occurred, and (ca A - w D) did not have to be
-                   perturbed.
-            = 1: (ca A - w D) had to be perturbed to make its smallest
-                 (or only) singular value greater than SMIN.
-            NOTE: In the interests of speed, this routine does not
-                  check the inputs for errors.
-
-   =====================================================================
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    x_dim1 = *ldx;
-    x_offset = 1 + x_dim1;
-    x -= x_offset;
-
-    /* Function Body */
-
-/*     Compute BIGNUM */
-
-    smlnum = 2. * SAFEMINIMUM;
-    bignum = 1. / smlnum;
-    smini = max(*smin,smlnum);
-
-/*     Don't check for input errors */
-
-    *info = 0;
-
-/*     Standard Initializations */
-
-    *scale = 1.;
-
-    if (*na == 1) {
-
-/*        1 x 1  (i.e., scalar) system   C X = B */
-
-	if (*nw == 1) {
-
-/*
-             Real 1x1 system.
-
-             C = ca A - w D
-*/
-
-	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
-	    cnorm = abs(csr);
-
-/*           If | C | < SMINI, use C = SMINI */
-
-	    if (cnorm < smini) {
-		csr = smini;
-		cnorm = smini;
-		*info = 1;
-	    }
-
-/*           Check scaling for  X = B / C */
-
-	    bnorm = (d__1 = b[b_dim1 + 1], abs(d__1));
-	    if (cnorm < 1. && bnorm > 1.) {
-		if (bnorm > bignum * cnorm) {
-		    *scale = 1. / bnorm;
-		}
-	    }
-
-/*           Compute X */
-
-	    x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr;
-	    *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1));
-	} else {
-
-/*
-             Complex 1x1 system (w is complex)
-
-             C = ca A - w D
-*/
-
-	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
-	    csi = -(*wi) * *d1;
-	    cnorm = abs(csr) + abs(csi);
-
-/*           If | C | < SMINI, use C = SMINI */
-
-	    if (cnorm < smini) {
-		csr = smini;
-		csi = 0.;
-		cnorm = smini;
-		*info = 1;
-	    }
-
-/*           Check scaling for  X = B / C */
-
-	    bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[((b_dim1) <<
-		     (1)) + 1], abs(d__2));
-	    if (cnorm < 1. && bnorm > 1.) {
-		if (bnorm > bignum * cnorm) {
-		    *scale = 1. / bnorm;
-		}
-	    }
-
-/*           Compute X */
-
-	    d__1 = *scale * b[b_dim1 + 1];
-	    d__2 = *scale * b[((b_dim1) << (1)) + 1];
-	    dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[((x_dim1) <<
-		    (1)) + 1]);
-	    *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[((x_dim1)
-		    << (1)) + 1], abs(d__2));
-	}
-
-    } else {
-
-/*
-          2x2 System
-
-          Compute the real part of  C = ca A - w D  (or  ca A' - w D )
-*/
-
-	cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1;
-	cr[3] = *ca * a[((a_dim1) << (1)) + 2] - *wr * *d2;
-	if (*ltrans) {
-	    cr[2] = *ca * a[a_dim1 + 2];
-	    cr[1] = *ca * a[((a_dim1) << (1)) + 1];
-	} else {
-	    cr[1] = *ca * a[a_dim1 + 2];
-	    cr[2] = *ca * a[((a_dim1) << (1)) + 1];
-	}
-
-	if (*nw == 1) {
-
-/*
-             Real 2x2 system  (w is real)
-
-             Find the largest element in C
-*/
-
-	    cmax = 0.;
-	    icmax = 0;
-
-	    for (j = 1; j <= 4; ++j) {
-		if ((d__1 = crv[j - 1], abs(d__1)) > cmax) {
-		    cmax = (d__1 = crv[j - 1], abs(d__1));
-		    icmax = j;
-		}
-/* L10: */
-	    }
-
-/*           If norm(C) < SMINI, use SMINI*identity. */
-
-	    if (cmax < smini) {
-/* Computing MAX */
-		d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[
-			b_dim1 + 2], abs(d__2));
-		bnorm = max(d__3,d__4);
-		if (smini < 1. && bnorm > 1.) {
-		    if (bnorm > bignum * smini) {
-			*scale = 1. / bnorm;
-		    }
-		}
-		temp = *scale / smini;
-		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
-		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
-		*xnorm = temp * bnorm;
-		*info = 1;
-		return 0;
-	    }
-
-/*           Gaussian elimination with complete pivoting. */
-
-	    ur11 = crv[icmax - 1];
-	    cr21 = crv[ipivot[((icmax) << (2)) - 3] - 1];
-	    ur12 = crv[ipivot[((icmax) << (2)) - 2] - 1];
-	    cr22 = crv[ipivot[((icmax) << (2)) - 1] - 1];
-	    ur11r = 1. / ur11;
-	    lr21 = ur11r * cr21;
-	    ur22 = cr22 - ur12 * lr21;
-
-/*           If smaller pivot < SMINI, use SMINI */
-
-	    if (abs(ur22) < smini) {
-		ur22 = smini;
-		*info = 1;
-	    }
-	    if (rswap[icmax - 1]) {
-		br1 = b[b_dim1 + 2];
-		br2 = b[b_dim1 + 1];
-	    } else {
-		br1 = b[b_dim1 + 1];
-		br2 = b[b_dim1 + 2];
-	    }
-	    br2 -= lr21 * br1;
-/* Computing MAX */
-	    d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2);
-	    bbnd = max(d__2,d__3);
-	    if (bbnd > 1. && abs(ur22) < 1.) {
-		if (bbnd >= bignum * abs(ur22)) {
-		    *scale = 1. / bbnd;
-		}
-	    }
-
-	    xr2 = br2 * *scale / ur22;
-	    xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12);
-	    if (zswap[icmax - 1]) {
-		x[x_dim1 + 1] = xr2;
-		x[x_dim1 + 2] = xr1;
-	    } else {
-		x[x_dim1 + 1] = xr1;
-		x[x_dim1 + 2] = xr2;
-	    }
-/* Computing MAX */
-	    d__1 = abs(xr1), d__2 = abs(xr2);
-	    *xnorm = max(d__1,d__2);
-
-/*           Further scaling if  norm(A) norm(X) > overflow */
-
-	    if (*xnorm > 1. && cmax > 1.) {
-		if (*xnorm > bignum / cmax) {
-		    temp = cmax / bignum;
-		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
-		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
-		    *xnorm = temp * *xnorm;
-		    *scale = temp * *scale;
-		}
-	    }
-	} else {
-
-/*
-             Complex 2x2 system  (w is complex)
-
-             Find the largest element in C
-*/
-
-	    ci[0] = -(*wi) * *d1;
-	    ci[1] = 0.;
-	    ci[2] = 0.;
-	    ci[3] = -(*wi) * *d2;
-	    cmax = 0.;
-	    icmax = 0;
-
-	    for (j = 1; j <= 4; ++j) {
-		if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs(
-			d__2)) > cmax) {
-		    cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1]
-			    , abs(d__2));
-		    icmax = j;
-		}
-/* L20: */
-	    }
-
-/*           If norm(C) < SMINI, use SMINI*identity. */
-
-	    if (cmax < smini) {
-/* Computing MAX */
-		d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[((b_dim1)
-			 << (1)) + 1], abs(d__2)), d__6 = (d__3 = b[b_dim1 +
-			2], abs(d__3)) + (d__4 = b[((b_dim1) << (1)) + 2],
-			abs(d__4));
-		bnorm = max(d__5,d__6);
-		if (smini < 1. && bnorm > 1.) {
-		    if (bnorm > bignum * smini) {
-			*scale = 1. / bnorm;
-		    }
-		}
-		temp = *scale / smini;
-		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
-		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
-		x[((x_dim1) << (1)) + 1] = temp * b[((b_dim1) << (1)) + 1];
-		x[((x_dim1) << (1)) + 2] = temp * b[((b_dim1) << (1)) + 2];
-		*xnorm = temp * bnorm;
-		*info = 1;
-		return 0;
-	    }
-
-/*           Gaussian elimination with complete pivoting. */
-
-	    ur11 = crv[icmax - 1];
-	    ui11 = civ[icmax - 1];
-	    cr21 = crv[ipivot[((icmax) << (2)) - 3] - 1];
-	    ci21 = civ[ipivot[((icmax) << (2)) - 3] - 1];
-	    ur12 = crv[ipivot[((icmax) << (2)) - 2] - 1];
-	    ui12 = civ[ipivot[((icmax) << (2)) - 2] - 1];
-	    cr22 = crv[ipivot[((icmax) << (2)) - 1] - 1];
-	    ci22 = civ[ipivot[((icmax) << (2)) - 1] - 1];
-	    if ((icmax == 1) || (icmax == 4)) {
-
-/*              Code when off-diagonals of pivoted C are real */
-
-		if (abs(ur11) > abs(ui11)) {
-		    temp = ui11 / ur11;
-/* Computing 2nd power */
-		    d__1 = temp;
-		    ur11r = 1. / (ur11 * (d__1 * d__1 + 1.));
-		    ui11r = -temp * ur11r;
-		} else {
-		    temp = ur11 / ui11;
-/* Computing 2nd power */
-		    d__1 = temp;
-		    ui11r = -1. / (ui11 * (d__1 * d__1 + 1.));
-		    ur11r = -temp * ui11r;
-		}
-		lr21 = cr21 * ur11r;
-		li21 = cr21 * ui11r;
-		ur12s = ur12 * ur11r;
-		ui12s = ur12 * ui11r;
-		ur22 = cr22 - ur12 * lr21;
-		ui22 = ci22 - ur12 * li21;
-	    } else {
-
-/*              Code when diagonals of pivoted C are real */
-
-		ur11r = 1. / ur11;
-		ui11r = 0.;
-		lr21 = cr21 * ur11r;
-		li21 = ci21 * ur11r;
-		ur12s = ur12 * ur11r;
-		ui12s = ui12 * ur11r;
-		ur22 = cr22 - ur12 * lr21 + ui12 * li21;
-		ui22 = -ur12 * li21 - ui12 * lr21;
-	    }
-	    u22abs = abs(ur22) + abs(ui22);
-
-/*           If smaller pivot < SMINI, use SMINI */
-
-	    if (u22abs < smini) {
-		ur22 = smini;
-		ui22 = 0.;
-		*info = 1;
-	    }
-	    if (rswap[icmax - 1]) {
-		br2 = b[b_dim1 + 1];
-		br1 = b[b_dim1 + 2];
-		bi2 = b[((b_dim1) << (1)) + 1];
-		bi1 = b[((b_dim1) << (1)) + 2];
-	    } else {
-		br1 = b[b_dim1 + 1];
-		br2 = b[b_dim1 + 2];
-		bi1 = b[((b_dim1) << (1)) + 1];
-		bi2 = b[((b_dim1) << (1)) + 2];
-	    }
-	    br2 = br2 - lr21 * br1 + li21 * bi1;
-	    bi2 = bi2 - li21 * br1 - lr21 * bi1;
-/* Computing MAX */
-	    d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r))
-		    ), d__2 = abs(br2) + abs(bi2);
-	    bbnd = max(d__1,d__2);
-	    if (bbnd > 1. && u22abs < 1.) {
-		if (bbnd >= bignum * u22abs) {
-		    *scale = 1. / bbnd;
-		    br1 = *scale * br1;
-		    bi1 = *scale * bi1;
-		    br2 = *scale * br2;
-		    bi2 = *scale * bi2;
-		}
-	    }
-
-	    dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2);
-	    xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2;
-	    xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2;
-	    if (zswap[icmax - 1]) {
-		x[x_dim1 + 1] = xr2;
-		x[x_dim1 + 2] = xr1;
-		x[((x_dim1) << (1)) + 1] = xi2;
-		x[((x_dim1) << (1)) + 2] = xi1;
-	    } else {
-		x[x_dim1 + 1] = xr1;
-		x[x_dim1 + 2] = xr2;
-		x[((x_dim1) << (1)) + 1] = xi1;
-		x[((x_dim1) << (1)) + 2] = xi2;
-	    }
-/* Computing MAX */
-	    d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2);
-	    *xnorm = max(d__1,d__2);
-
-/*           Further scaling if  norm(A) norm(X) > overflow */
-
-	    if (*xnorm > 1. && cmax > 1.) {
-		if (*xnorm > bignum / cmax) {
-		    temp = cmax / bignum;
-		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
-		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
-		    x[((x_dim1) << (1)) + 1] = temp * x[((x_dim1) << (1)) + 1]
-			    ;
-		    x[((x_dim1) << (1)) + 2] = temp * x[((x_dim1) << (1)) + 2]
-			    ;
-		    *xnorm = temp * *xnorm;
-		    *scale = temp * *scale;
-		}
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DLALN2 */
-
-} /* dlaln2_ */
-
-#undef crv
-#undef civ
-#undef cr
-#undef ci
-
-
-/* Subroutine */ int dlals0_(integer *icompq, integer *nl, integer *nr,
-	integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal
-	*bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol,
-	integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *
-	poles, doublereal *difl, doublereal *difr, doublereal *z__, integer *
-	k, doublereal *c__, doublereal *s, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset,
-	    difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1,
-	    poles_offset, i__1, i__2;
-    doublereal d__1;
-
-    /* Local variables */
-    static integer i__, j, m, n;
-    static doublereal dj;
-    static integer nlp1;
-    static doublereal temp;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    static doublereal diflj, difrj, dsigj;
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *), dcopy_(integer *,
-	    doublereal *, integer *, doublereal *, integer *);
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dlacpy_(char *, integer *, integer
-	    *, doublereal *, integer *, doublereal *, integer *),
-	    xerbla_(char *, integer *);
-    static doublereal dsigjp;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       December 1, 1999
-
-
-    Purpose
-    =======
-
-    DLALS0 applies back the multiplying factors of either the left or the
-    right singular vector matrix of a diagonal matrix appended by a row
-    to the right hand side matrix B in solving the least squares problem
-    using the divide-and-conquer SVD approach.
-
-    For the left singular vector matrix, three types of orthogonal
-    matrices are involved:
-
-    (1L) Givens rotations: the number of such rotations is GIVPTR; the
-         pairs of columns/rows they were applied to are stored in GIVCOL;
-         and the C- and S-values of these rotations are stored in GIVNUM.
-
-    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
-         row, and for J=2:N, PERM(J)-th row of B is to be moved to the
-         J-th row.
-
-    (3L) The left singular vector matrix of the remaining matrix.
-
-    For the right singular vector matrix, four types of orthogonal
-    matrices are involved:
-
-    (1R) The right singular vector matrix of the remaining matrix.
-
-    (2R) If SQRE = 1, one extra Givens rotation to generate the right
-         null space.
-
-    (3R) The inverse transformation of (2L).
-
-    (4R) The inverse transformation of (1L).
-
-    Arguments
-    =========
-
-    ICOMPQ (input) INTEGER
-           Specifies whether singular vectors are to be computed in
-           factored form:
-           = 0: Left singular vector matrix.
-           = 1: Right singular vector matrix.
-
-    NL     (input) INTEGER
-           The row dimension of the upper block. NL >= 1.
-
-    NR     (input) INTEGER
-           The row dimension of the lower block. NR >= 1.
-
-    SQRE   (input) INTEGER
-           = 0: the lower block is an NR-by-NR square matrix.
-           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
-
-           The bidiagonal matrix has row dimension N = NL + NR + 1,
-           and column dimension M = N + SQRE.
-
-    NRHS   (input) INTEGER
-           The number of columns of B and BX. NRHS must be at least 1.
-
-    B      (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS )
-           On input, B contains the right hand sides of the least
-           squares problem in rows 1 through M. On output, B contains
-           the solution X in rows 1 through N.
-
-    LDB    (input) INTEGER
-           The leading dimension of B. LDB must be at least
-           max(1,MAX( M, N ) ).
-
-    BX     (workspace) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
-
-    LDBX   (input) INTEGER
-           The leading dimension of BX.
-
-    PERM   (input) INTEGER array, dimension ( N )
-           The permutations (from deflation and sorting) applied
-           to the two blocks.
-
-    GIVPTR (input) INTEGER
-           The number of Givens rotations which took place in this
-           subproblem.
-
-    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
-           Each pair of numbers indicates a pair of rows/columns
-           involved in a Givens rotation.
-
-    LDGCOL (input) INTEGER
-           The leading dimension of GIVCOL, must be at least N.
-
-    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
-           Each number indicates the C or S value used in the
-           corresponding Givens rotation.
-
-    LDGNUM (input) INTEGER
-           The leading dimension of arrays DIFR, POLES and
-           GIVNUM, must be at least K.
-
-    POLES  (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
-           On entry, POLES(1:K, 1) contains the new singular
-           values obtained from solving the secular equation, and
-           POLES(1:K, 2) is an array containing the poles in the secular
-           equation.
-
-    DIFL   (input) DOUBLE PRECISION array, dimension ( K ).
-           On entry, DIFL(I) is the distance between I-th updated
-           (undeflated) singular value and the I-th (undeflated) old
-           singular value.
-
-    DIFR   (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ).
-           On entry, DIFR(I, 1) contains the distances between I-th
-           updated (undeflated) singular value and the I+1-th
-           (undeflated) old singular value. And DIFR(I, 2) is the
-           normalizing factor for the I-th right singular vector.
-
-    Z      (input) DOUBLE PRECISION array, dimension ( K )
-           Contain the components of the deflation-adjusted updating row
-           vector.
-
-    K      (input) INTEGER
-           Contains the dimension of the non-deflated matrix,
-           This is the order of the related secular equation. 1 <= K <=N.
-
-    C      (input) DOUBLE PRECISION
-           C contains garbage if SQRE =0 and the C-value of a Givens
-           rotation related to the right null space if SQRE = 1.
-
-    S      (input) DOUBLE PRECISION
-           S contains garbage if SQRE =0 and the S-value of a Givens
-           rotation related to the right null space if SQRE = 1.
-
-    WORK   (workspace) DOUBLE PRECISION array, dimension ( K )
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    bx_dim1 = *ldbx;
-    bx_offset = 1 + bx_dim1;
-    bx -= bx_offset;
-    --perm;
-    givcol_dim1 = *ldgcol;
-    givcol_offset = 1 + givcol_dim1;
-    givcol -= givcol_offset;
-    difr_dim1 = *ldgnum;
-    difr_offset = 1 + difr_dim1;
-    difr -= difr_offset;
-    poles_dim1 = *ldgnum;
-    poles_offset = 1 + poles_dim1;
-    poles -= poles_offset;
-    givnum_dim1 = *ldgnum;
-    givnum_offset = 1 + givnum_dim1;
-    givnum -= givnum_offset;
-    --difl;
-    --z__;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*nl < 1) {
-	*info = -2;
-    } else if (*nr < 1) {
-	*info = -3;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
-	*info = -4;
-    }
-
-    n = *nl + *nr + 1;
-
-    if (*nrhs < 1) {
-	*info = -5;
-    } else if (*ldb < n) {
-	*info = -7;
-    } else if (*ldbx < n) {
-	*info = -9;
-    } else if (*givptr < 0) {
-	*info = -11;
-    } else if (*ldgcol < n) {
-	*info = -13;
-    } else if (*ldgnum < n) {
-	*info = -15;
-    } else if (*k < 1) {
-	*info = -20;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLALS0", &i__1);
-	return 0;
-    }
-
-    m = n + *sqre;
-    nlp1 = *nl + 1;
-
-    if (*icompq == 0) {
-
-/*
-          Apply back orthogonal transformations from the left.
-
-          Step (1L): apply back the Givens rotations performed.
-*/
-
-	i__1 = *givptr;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    drot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
-		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
-		    i__ + ((givnum_dim1) << (1))], &givnum[i__ + givnum_dim1])
-		    ;
-/* L10: */
-	}
-
-/*        Step (2L): permute rows of B. */
-
-	dcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
-	i__1 = n;
-	for (i__ = 2; i__ <= i__1; ++i__) {
-	    dcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1],
-		    ldbx);
-/* L20: */
-	}
-
-/*
-          Step (3L): apply the inverse of the left singular vector
-          matrix to BX.
-*/
-
-	if (*k == 1) {
-	    dcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
-	    if (z__[1] < 0.) {
-		dscal_(nrhs, &c_b3001, &b[b_offset], ldb);
-	    }
-	} else {
-	    i__1 = *k;
-	    for (j = 1; j <= i__1; ++j) {
-		diflj = difl[j];
-		dj = poles[j + poles_dim1];
-		dsigj = -poles[j + ((poles_dim1) << (1))];
-		if (j < *k) {
-		    difrj = -difr[j + difr_dim1];
-		    dsigjp = -poles[j + 1 + ((poles_dim1) << (1))];
-		}
-		if ((z__[j] == 0.) || (poles[j + ((poles_dim1) << (1))] == 0.)
-			) {
-		    work[j] = 0.;
-		} else {
-		    work[j] = -poles[j + ((poles_dim1) << (1))] * z__[j] /
-			    diflj / (poles[j + ((poles_dim1) << (1))] + dj);
-		}
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    if ((z__[i__] == 0.) || (poles[i__ + ((poles_dim1) << (1))
-			    ] == 0.)) {
-			work[i__] = 0.;
-		    } else {
-			work[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
-				i__] / (dlamc3_(&poles[i__ + ((poles_dim1) <<
-				(1))], &dsigj) - diflj) / (poles[i__ + ((
-				poles_dim1) << (1))] + dj);
-		    }
-/* L30: */
-		}
-		i__2 = *k;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    if ((z__[i__] == 0.) || (poles[i__ + ((poles_dim1) << (1))
-			    ] == 0.)) {
-			work[i__] = 0.;
-		    } else {
-			work[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
-				i__] / (dlamc3_(&poles[i__ + ((poles_dim1) <<
-				(1))], &dsigjp) + difrj) / (poles[i__ + ((
-				poles_dim1) << (1))] + dj);
-		    }
-/* L40: */
-		}
-		work[1] = -1.;
-		temp = dnrm2_(k, &work[1], &c__1);
-		dgemv_("T", k, nrhs, &c_b2865, &bx[bx_offset], ldbx, &work[1],
-			 &c__1, &c_b2879, &b[j + b_dim1], ldb);
-		dlascl_("G", &c__0, &c__0, &temp, &c_b2865, &c__1, nrhs, &b[j
-			+ b_dim1], ldb, info);
-/* L50: */
-	    }
-	}
-
-/*        Move the deflated rows of BX to B also. */
-
-	if (*k < max(m,n)) {
-	    i__1 = n - *k;
-	    dlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1
-		    + b_dim1], ldb);
-	}
-    } else {
-
-/*
-          Apply back the right orthogonal transformations.
-
-          Step (1R): apply back the new right singular vector matrix
-          to B.
-*/
-
-	if (*k == 1) {
-	    dcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
-	} else {
-	    i__1 = *k;
-	    for (j = 1; j <= i__1; ++j) {
-		dsigj = poles[j + ((poles_dim1) << (1))];
-		if (z__[j] == 0.) {
-		    work[j] = 0.;
-		} else {
-		    work[j] = -z__[j] / difl[j] / (dsigj + poles[j +
-			    poles_dim1]) / difr[j + ((difr_dim1) << (1))];
-		}
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    if (z__[j] == 0.) {
-			work[i__] = 0.;
-		    } else {
-			d__1 = -poles[i__ + 1 + ((poles_dim1) << (1))];
-			work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[
-				i__ + difr_dim1]) / (dsigj + poles[i__ +
-				poles_dim1]) / difr[i__ + ((difr_dim1) << (1))
-				];
-		    }
-/* L60: */
-		}
-		i__2 = *k;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    if (z__[j] == 0.) {
-			work[i__] = 0.;
-		    } else {
-			d__1 = -poles[i__ + ((poles_dim1) << (1))];
-			work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[
-				i__]) / (dsigj + poles[i__ + poles_dim1]) /
-				difr[i__ + ((difr_dim1) << (1))];
-		    }
-/* L70: */
-		}
-		dgemv_("T", k, nrhs, &c_b2865, &b[b_offset], ldb, &work[1], &
-			c__1, &c_b2879, &bx[j + bx_dim1], ldbx);
-/* L80: */
-	    }
-	}
-
-/*
-          Step (2R): if SQRE = 1, apply back the rotation that is
-          related to the right null space of the subproblem.
-*/
-
-	if (*sqre == 1) {
-	    dcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
-	    drot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__,
-		    s);
-	}
-	if (*k < max(m,n)) {
-	    i__1 = n - *k;
-	    dlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 +
-		    bx_dim1], ldbx);
-	}
-
-/*        Step (3R): permute rows of B. */
-
-	dcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
-	if (*sqre == 1) {
-	    dcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
-	}
-	i__1 = n;
-	for (i__ = 2; i__ <= i__1; ++i__) {
-	    dcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1],
-		    ldb);
-/* L90: */
-	}
-
-/*        Step (4R): apply back the Givens rotations performed. */
-
-	for (i__ = *givptr; i__ >= 1; --i__) {
-	    d__1 = -givnum[i__ + givnum_dim1];
-	    drot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
-		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
-		    i__ + ((givnum_dim1) << (1))], &d__1);
-/* L100: */
-	}
-    }
-
-    return 0;
-
-/*     End of DLALS0 */
-
-} /* dlals0_ */
-
-/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n,
-	integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer *
-	ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k,
-	doublereal *difl, doublereal *difr, doublereal *z__, doublereal *
-	poles, integer *givptr, integer *givcol, integer *ldgcol, integer *
-	perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *
-	work, integer *iwork, integer *info)
-{
-    /* System generated locals */
-    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1,
-	    b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1,
-	    difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset,
-	     u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1,
-	    i__2;
-
-    /* Builtin functions */
-    integer pow_ii(integer *, integer *);
-
-    /* Local variables */
-    static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl,
-	    ndb1, nlp1, lvl2, nrp1, nlvl, sqre;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer inode, ndiml, ndimr;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlals0_(integer *, integer *, integer *,
-	     integer *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, integer *, integer *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
-	     integer *), dlasdt_(integer *, integer *, integer *, integer *,
-	    integer *, integer *, integer *), xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLALSA is an itermediate step in solving the least squares problem
-    by computing the SVD of the coefficient matrix in compact form (The
-    singular vectors are computed as products of simple orthorgonal
-    matrices.).
-
-    If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector
-    matrix of an upper bidiagonal matrix to the right hand side; and if
-    ICOMPQ = 1, DLALSA applies the right singular vector matrix to the
-    right hand side. The singular vector matrices were generated in
-    compact form by DLALSA.
-
-    Arguments
-    =========
-
-
-    ICOMPQ (input) INTEGER
-           Specifies whether the left or the right singular vector
-           matrix is involved.
-           = 0: Left singular vector matrix
-           = 1: Right singular vector matrix
-
-    SMLSIZ (input) INTEGER
-           The maximum size of the subproblems at the bottom of the
-           computation tree.
-
-    N      (input) INTEGER
-           The row and column dimensions of the upper bidiagonal matrix.
-
-    NRHS   (input) INTEGER
-           The number of columns of B and BX. NRHS must be at least 1.
-
-    B      (input) DOUBLE PRECISION array, dimension ( LDB, NRHS )
-           On input, B contains the right hand sides of the least
-           squares problem in rows 1 through M. On output, B contains
-           the solution X in rows 1 through N.
-
-    LDB    (input) INTEGER
-           The leading dimension of B in the calling subprogram.
-           LDB must be at least max(1,MAX( M, N ) ).
-
-    BX     (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
-           On exit, the result of applying the left or right singular
-           vector matrix to B.
-
-    LDBX   (input) INTEGER
-           The leading dimension of BX.
-
-    U      (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
-           On entry, U contains the left singular vector matrices of all
-           subproblems at the bottom level.
-
-    LDU    (input) INTEGER, LDU = > N.
-           The leading dimension of arrays U, VT, DIFL, DIFR,
-           POLES, GIVNUM, and Z.
-
-    VT     (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
-           On entry, VT' contains the right singular vector matrices of
-           all subproblems at the bottom level.
-
-    K      (input) INTEGER array, dimension ( N ).
-
-    DIFL   (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
-           where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
-
-    DIFR   (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
-           On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
-           distances between singular values on the I-th level and
-           singular values on the (I -1)-th level, and DIFR(*, 2 * I)
-           record the normalizing factors of the right singular vectors
-           matrices of subproblems on I-th level.
-
-    Z      (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
-           On entry, Z(1, I) contains the components of the deflation-
-           adjusted updating row vector for subproblems on the I-th
-           level.
-
-    POLES  (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
-           On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
-           singular values involved in the secular equations on the I-th
-           level.
-
-    GIVPTR (input) INTEGER array, dimension ( N ).
-           On entry, GIVPTR( I ) records the number of Givens
-           rotations performed on the I-th problem on the computation
-           tree.
-
-    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
-           On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
-           locations of Givens rotations performed on the I-th level on
-           the computation tree.
-
-    LDGCOL (input) INTEGER, LDGCOL = > N.
-           The leading dimension of arrays GIVCOL and PERM.
-
-    PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
-           On entry, PERM(*, I) records permutations done on the I-th
-           level of the computation tree.
-
-    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
-           On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
-           values of Givens rotations performed on the I-th level on the
-           computation tree.
-
-    C      (input) DOUBLE PRECISION array, dimension ( N ).
-           On entry, if the I-th subproblem is not square,
-           C( I ) contains the C-value of a Givens rotation related to
-           the right null space of the I-th subproblem.
-
-    S      (input) DOUBLE PRECISION array, dimension ( N ).
-           On entry, if the I-th subproblem is not square,
-           S( I ) contains the S-value of a Givens rotation related to
-           the right null space of the I-th subproblem.
-
-    WORK   (workspace) DOUBLE PRECISION array.
-           The dimension must be at least N.
-
-    IWORK  (workspace) INTEGER array.
-           The dimension must be at least 3 * N
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    bx_dim1 = *ldbx;
-    bx_offset = 1 + bx_dim1;
-    bx -= bx_offset;
-    givnum_dim1 = *ldu;
-    givnum_offset = 1 + givnum_dim1;
-    givnum -= givnum_offset;
-    poles_dim1 = *ldu;
-    poles_offset = 1 + poles_dim1;
-    poles -= poles_offset;
-    z_dim1 = *ldu;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    difr_dim1 = *ldu;
-    difr_offset = 1 + difr_dim1;
-    difr -= difr_offset;
-    difl_dim1 = *ldu;
-    difl_offset = 1 + difl_dim1;
-    difl -= difl_offset;
-    vt_dim1 = *ldu;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    --k;
-    --givptr;
-    perm_dim1 = *ldgcol;
-    perm_offset = 1 + perm_dim1;
-    perm -= perm_offset;
-    givcol_dim1 = *ldgcol;
-    givcol_offset = 1 + givcol_dim1;
-    givcol -= givcol_offset;
-    --c__;
-    --s;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*smlsiz < 3) {
-	*info = -2;
-    } else if (*n < *smlsiz) {
-	*info = -3;
-    } else if (*nrhs < 1) {
-	*info = -4;
-    } else if (*ldb < *n) {
-	*info = -6;
-    } else if (*ldbx < *n) {
-	*info = -8;
-    } else if (*ldu < *n) {
-	*info = -10;
-    } else if (*ldgcol < *n) {
-	*info = -19;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLALSA", &i__1);
-	return 0;
-    }
-
-/*     Book-keeping and  setting up the computation tree. */
-
-    inode = 1;
-    ndiml = inode + *n;
-    ndimr = ndiml + *n;
-
-    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
-	    smlsiz);
-
-/*
-       The following code applies back the left singular vector factors.
-       For applying back the right singular vector factors, go to 50.
-*/
-
-    if (*icompq == 1) {
-	goto L50;
-    }
-
-/*
-       The nodes on the bottom level of the tree were solved
-       by DLASDQ. The corresponding left and right singular vector
-       matrices are in explicit form. First apply back the left
-       singular vector matrices.
-*/
-
-    ndb1 = (nd + 1) / 2;
-    i__1 = nd;
-    for (i__ = ndb1; i__ <= i__1; ++i__) {
-
-/*
-          IC : center row of each node
-          NL : number of rows of left  subproblem
-          NR : number of rows of right subproblem
-          NLF: starting row of the left   subproblem
-          NRF: starting row of the right  subproblem
-*/
-
-	i1 = i__ - 1;
-	ic = iwork[inode + i1];
-	nl = iwork[ndiml + i1];
-	nr = iwork[ndimr + i1];
-	nlf = ic - nl;
-	nrf = ic + 1;
-	dgemm_("T", "N", &nl, nrhs, &nl, &c_b2865, &u[nlf + u_dim1], ldu, &b[
-		nlf + b_dim1], ldb, &c_b2879, &bx[nlf + bx_dim1], ldbx);
-	dgemm_("T", "N", &nr, nrhs, &nr, &c_b2865, &u[nrf + u_dim1], ldu, &b[
-		nrf + b_dim1], ldb, &c_b2879, &bx[nrf + bx_dim1], ldbx);
-/* L10: */
-    }
-
-/*
-       Next copy the rows of B that correspond to unchanged rows
-       in the bidiagonal matrix to BX.
-*/
-
-    i__1 = nd;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	ic = iwork[inode + i__ - 1];
-	dcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx);
-/* L20: */
-    }
-
-/*
-       Finally go through the left singular vector matrices of all
-       the other subproblems bottom-up on the tree.
-*/
-
-    j = pow_ii(&c__2, &nlvl);
-    sqre = 0;
-
-    for (lvl = nlvl; lvl >= 1; --lvl) {
-	lvl2 = ((lvl) << (1)) - 1;
-
-/*
-          find the first node LF and last node LL on
-          the current level LVL
-*/
-
-	if (lvl == 1) {
-	    lf = 1;
-	    ll = 1;
-	} else {
-	    i__1 = lvl - 1;
-	    lf = pow_ii(&c__2, &i__1);
-	    ll = ((lf) << (1)) - 1;
-	}
-	i__1 = ll;
-	for (i__ = lf; i__ <= i__1; ++i__) {
-	    im1 = i__ - 1;
-	    ic = iwork[inode + im1];
-	    nl = iwork[ndiml + im1];
-	    nr = iwork[ndimr + im1];
-	    nlf = ic - nl;
-	    nrf = ic + 1;
-	    --j;
-	    dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, &
-		    b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], &
-		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
-		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
-		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
-		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
-		    j], &s[j], &work[1], info);
-/* L30: */
-	}
-/* L40: */
-    }
-    goto L90;
-
-/*     ICOMPQ = 1: applying back the right singular vector factors. */
-
-L50:
-
-/*
-       First now go through the right singular vector matrices of all
-       the tree nodes top-down.
-*/
-
-    j = 0;
-    i__1 = nlvl;
-    for (lvl = 1; lvl <= i__1; ++lvl) {
-	lvl2 = ((lvl) << (1)) - 1;
-
-/*
-          Find the first node LF and last node LL on
-          the current level LVL.
-*/
-
-	if (lvl == 1) {
-	    lf = 1;
-	    ll = 1;
-	} else {
-	    i__2 = lvl - 1;
-	    lf = pow_ii(&c__2, &i__2);
-	    ll = ((lf) << (1)) - 1;
-	}
-	i__2 = lf;
-	for (i__ = ll; i__ >= i__2; --i__) {
-	    im1 = i__ - 1;
-	    ic = iwork[inode + im1];
-	    nl = iwork[ndiml + im1];
-	    nr = iwork[ndimr + im1];
-	    nlf = ic - nl;
-	    nrf = ic + 1;
-	    if (i__ == ll) {
-		sqre = 0;
-	    } else {
-		sqre = 1;
-	    }
-	    ++j;
-	    dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[
-		    nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], &
-		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
-		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
-		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
-		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
-		    j], &s[j], &work[1], info);
-/* L60: */
-	}
-/* L70: */
-    }
-
-/*
-       The nodes on the bottom level of the tree were solved
-       by DLASDQ. The corresponding right singular vector
-       matrices are in explicit form. Apply them back.
-*/
-
-    ndb1 = (nd + 1) / 2;
-    i__1 = nd;
-    for (i__ = ndb1; i__ <= i__1; ++i__) {
-	i1 = i__ - 1;
-	ic = iwork[inode + i1];
-	nl = iwork[ndiml + i1];
-	nr = iwork[ndimr + i1];
-	nlp1 = nl + 1;
-	if (i__ == nd) {
-	    nrp1 = nr;
-	} else {
-	    nrp1 = nr + 1;
-	}
-	nlf = ic - nl;
-	nrf = ic + 1;
-	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b2865, &vt[nlf + vt_dim1],
-		ldu, &b[nlf + b_dim1], ldb, &c_b2879, &bx[nlf + bx_dim1],
-		ldbx);
-	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b2865, &vt[nrf + vt_dim1],
-		ldu, &b[nrf + b_dim1], ldb, &c_b2879, &bx[nrf + bx_dim1],
-		ldbx);
-/* L80: */
-    }
-
-L90:
-
-    return 0;
-
-/*     End of DLALSA */
-
-} /* dlalsa_ */
-
-/* Subroutine */ int dlalsd_(char *uplo, integer *smlsiz, integer *n, integer
-	*nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb,
-	doublereal *rcond, integer *rank, doublereal *work, integer *iwork,
-	integer *info)
-{
-    /* System generated locals */
-    integer b_dim1, b_offset, i__1, i__2;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double log(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static integer c__, i__, j, k;
-    static doublereal r__;
-    static integer s, u, z__;
-    static doublereal cs;
-    static integer bx;
-    static doublereal sn;
-    static integer st, vt, nm1, st1;
-    static doublereal eps;
-    static integer iwk;
-    static doublereal tol;
-    static integer difl, difr, perm, nsub;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static integer nlvl, sqre, bxst;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *),
-	     dcopy_(integer *, doublereal *, integer *, doublereal *, integer
-	    *);
-    static integer poles, sizei, nsize, nwork, icmpq1, icmpq2;
-
-    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *, integer *, integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
-	     integer *), dlalsa_(integer *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, integer *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     integer *, integer *), dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *);
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer
-	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
-	     integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlacpy_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *), dlaset_(char *, integer *, integer *,
-	     doublereal *, doublereal *, doublereal *, integer *),
-	    xerbla_(char *, integer *);
-    static integer givcol;
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
-	    integer *);
-    static doublereal orgnrm;
-    static integer givnum, givptr, smlszp;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    DLALSD uses the singular value decomposition of A to solve the least
-    squares problem of finding X to minimize the Euclidean norm of each
-    column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
-    are N-by-NRHS. The solution X overwrites B.
-
-    The singular values of A smaller than RCOND times the largest
-    singular value are treated as zero in solving the least squares
-    problem; in this case a minimum norm solution is returned.
-    The actual singular values are returned in D in ascending order.
-
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
-
-    Arguments
-    =========
-
-    UPLO   (input) CHARACTER*1
-           = 'U': D and E define an upper bidiagonal matrix.
-           = 'L': D and E define a  lower bidiagonal matrix.
-
-    SMLSIZ (input) INTEGER
-           The maximum size of the subproblems at the bottom of the
-           computation tree.
-
-    N      (input) INTEGER
-           The dimension of the  bidiagonal matrix.  N >= 0.
-
-    NRHS   (input) INTEGER
-           The number of columns of B. NRHS must be at least 1.
-
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry D contains the main diagonal of the bidiagonal
-           matrix. On exit, if INFO = 0, D contains its singular values.
-
-    E      (input) DOUBLE PRECISION array, dimension (N-1)
-           Contains the super-diagonal entries of the bidiagonal matrix.
-           On exit, E has been destroyed.
-
-    B      (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
-           On input, B contains the right hand sides of the least
-           squares problem. On output, B contains the solution X.
-
-    LDB    (input) INTEGER
-           The leading dimension of B in the calling subprogram.
-           LDB must be at least max(1,N).
-
-    RCOND  (input) DOUBLE PRECISION
-           The singular values of A less than or equal to RCOND times
-           the largest singular value are treated as zero in solving
-           the least squares problem. If RCOND is negative,
-           machine precision is used instead.
-           For example, if diag(S)*X=B were the least squares problem,
-           where diag(S) is a diagonal matrix of singular values, the
-           solution would be X(i) = B(i) / S(i) if S(i) is greater than
-           RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
-           RCOND*max(S).
-
-    RANK   (output) INTEGER
-           The number of singular values of A greater than RCOND times
-           the largest singular value.
-
-    WORK   (workspace) DOUBLE PRECISION array, dimension at least
-           (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),
-           where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).
-
-    IWORK  (workspace) INTEGER array, dimension at least
-           (3*N*NLVL + 11*N)
-
-    INFO   (output) INTEGER
-           = 0:  successful exit.
-           < 0:  if INFO = -i, the i-th argument had an illegal value.
-           > 0:  The algorithm failed to compute an singular value while
-                 working on the submatrix lying in rows and columns
-                 INFO/(N+1) through MOD(INFO,N+1).
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*n < 0) {
-	*info = -3;
-    } else if (*nrhs < 1) {
-	*info = -4;
-    } else if ((*ldb < 1) || (*ldb < *n)) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLALSD", &i__1);
-	return 0;
-    }
-
-    eps = EPSILON;
-
-/*     Set up the tolerance. */
-
-    if ((*rcond <= 0.) || (*rcond >= 1.)) {
-	*rcond = eps;
-    }
-
-    *rank = 0;
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-	return 0;
-    } else if (*n == 1) {
-	if (d__[1] == 0.) {
-	    dlaset_("A", &c__1, nrhs, &c_b2879, &c_b2879, &b[b_offset], ldb);
-	} else {
-	    *rank = 1;
-	    dlascl_("G", &c__0, &c__0, &d__[1], &c_b2865, &c__1, nrhs, &b[
-		    b_offset], ldb, info);
-	    d__[1] = abs(d__[1]);
-	}
-	return 0;
-    }
-
-/*     Rotate the matrix if it is lower bidiagonal. */
-
-    if (*(unsigned char *)uplo == 'L') {
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
-	    d__[i__] = r__;
-	    e[i__] = sn * d__[i__ + 1];
-	    d__[i__ + 1] = cs * d__[i__ + 1];
-	    if (*nrhs == 1) {
-		drot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], &
-			c__1, &cs, &sn);
-	    } else {
-		work[((i__) << (1)) - 1] = cs;
-		work[i__ * 2] = sn;
-	    }
-/* L10: */
-	}
-	if (*nrhs > 1) {
-	    i__1 = *nrhs;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-		i__2 = *n - 1;
-		for (j = 1; j <= i__2; ++j) {
-		    cs = work[((j) << (1)) - 1];
-		    sn = work[j * 2];
-		    drot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ *
-			     b_dim1], &c__1, &cs, &sn);
-/* L20: */
-		}
-/* L30: */
-	    }
-	}
-    }
-
-/*     Scale. */
-
-    nm1 = *n - 1;
-    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.) {
-	dlaset_("A", n, nrhs, &c_b2879, &c_b2879, &b[b_offset], ldb);
-	return 0;
-    }
-
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, n, &c__1, &d__[1], n, info);
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, &nm1, &c__1, &e[1], &nm1,
-	    info);
-
-/*
-       If N is smaller than the minimum divide size SMLSIZ, then solve
-       the problem with another solver.
-*/
-
-    if (*n <= *smlsiz) {
-	nwork = *n * *n + 1;
-	dlaset_("A", n, n, &c_b2879, &c_b2865, &work[1], n);
-	dlasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, &
-		work[1], n, &b[b_offset], ldb, &work[nwork], info);
-	if (*info != 0) {
-	    return 0;
-	}
-	tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (d__[i__] <= tol) {
-		dlaset_("A", &c__1, nrhs, &c_b2879, &c_b2879, &b[i__ + b_dim1]
-			, ldb);
-	    } else {
-		dlascl_("G", &c__0, &c__0, &d__[i__], &c_b2865, &c__1, nrhs, &
-			b[i__ + b_dim1], ldb, info);
-		++(*rank);
-	    }
-/* L40: */
-	}
-	dgemm_("T", "N", n, nrhs, n, &c_b2865, &work[1], n, &b[b_offset], ldb,
-		 &c_b2879, &work[nwork], n);
-	dlacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb);
-
-/*        Unscale. */
-
-	dlascl_("G", &c__0, &c__0, &c_b2865, &orgnrm, n, &c__1, &d__[1], n,
-		info);
-	dlasrt_("D", n, &d__[1], info);
-	dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, n, nrhs, &b[b_offset],
-		ldb, info);
-
-	return 0;
-    }
-
-/*     Book-keeping and setting up some constants. */
-
-    nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) /
-	    log(2.)) + 1;
-
-    smlszp = *smlsiz + 1;
-
-    u = 1;
-    vt = *smlsiz * *n + 1;
-    difl = vt + smlszp * *n;
-    difr = difl + nlvl * *n;
-    z__ = difr + ((nlvl * *n) << (1));
-    c__ = z__ + nlvl * *n;
-    s = c__ + *n;
-    poles = s + *n;
-    givnum = poles + ((nlvl) << (1)) * *n;
-    bx = givnum + ((nlvl) << (1)) * *n;
-    nwork = bx + *n * *nrhs;
-
-    sizei = *n + 1;
-    k = sizei + *n;
-    givptr = k + *n;
-    perm = givptr + *n;
-    givcol = perm + nlvl * *n;
-    iwk = givcol + ((nlvl * *n) << (1));
-
-    st = 1;
-    sqre = 0;
-    icmpq1 = 1;
-    icmpq2 = 0;
-    nsub = 0;
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((d__1 = d__[i__], abs(d__1)) < eps) {
-	    d__[i__] = d_sign(&eps, &d__[i__]);
-	}
-/* L50: */
-    }
-
-    i__1 = nm1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (((d__1 = e[i__], abs(d__1)) < eps) || (i__ == nm1)) {
-	    ++nsub;
-	    iwork[nsub] = st;
-
-/*
-             Subproblem found. First determine its size and then
-             apply divide and conquer on it.
-*/
-
-	    if (i__ < nm1) {
-
-/*              A subproblem with E(I) small for I < NM1. */
-
-		nsize = i__ - st + 1;
-		iwork[sizei + nsub - 1] = nsize;
-	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
-
-/*              A subproblem with E(NM1) not too small but I = NM1. */
-
-		nsize = *n - st + 1;
-		iwork[sizei + nsub - 1] = nsize;
-	    } else {
-
-/*
-                A subproblem with E(NM1) small. This implies an
-                1-by-1 subproblem at D(N), which is not solved
-                explicitly.
-*/
-
-		nsize = i__ - st + 1;
-		iwork[sizei + nsub - 1] = nsize;
-		++nsub;
-		iwork[nsub] = *n;
-		iwork[sizei + nsub - 1] = 1;
-		dcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n);
-	    }
-	    st1 = st - 1;
-	    if (nsize == 1) {
-
-/*
-                This is a 1-by-1 subproblem and is not solved
-                explicitly.
-*/
-
-		dcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n);
-	    } else if (nsize <= *smlsiz) {
-
-/*              This is a small subproblem and is solved by DLASDQ. */
-
-		dlaset_("A", &nsize, &nsize, &c_b2879, &c_b2865, &work[vt +
-			st1], n);
-		dlasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[
-			st], &work[vt + st1], n, &work[nwork], n, &b[st +
-			b_dim1], ldb, &work[nwork], info);
-		if (*info != 0) {
-		    return 0;
-		}
-		dlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx +
-			st1], n);
-	    } else {
-
-/*              A large problem. Solve it using divide and conquer. */
-
-		dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], &
-			work[u + st1], n, &work[vt + st1], &iwork[k + st1], &
-			work[difl + st1], &work[difr + st1], &work[z__ + st1],
-			 &work[poles + st1], &iwork[givptr + st1], &iwork[
-			givcol + st1], n, &iwork[perm + st1], &work[givnum +
-			st1], &work[c__ + st1], &work[s + st1], &work[nwork],
-			&iwork[iwk], info);
-		if (*info != 0) {
-		    return 0;
-		}
-		bxst = bx + st1;
-		dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, &
-			work[bxst], n, &work[u + st1], n, &work[vt + st1], &
-			iwork[k + st1], &work[difl + st1], &work[difr + st1],
-			&work[z__ + st1], &work[poles + st1], &iwork[givptr +
-			st1], &iwork[givcol + st1], n, &iwork[perm + st1], &
-			work[givnum + st1], &work[c__ + st1], &work[s + st1],
-			&work[nwork], &iwork[iwk], info);
-		if (*info != 0) {
-		    return 0;
-		}
-	    }
-	    st = i__ + 1;
-	}
-/* L60: */
-    }
-
-/*     Apply the singular values and treat the tiny ones as zero. */
-
-    tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*
-          Some of the elements in D can be negative because 1-by-1
-          subproblems were not solved explicitly.
-*/
-
-	if ((d__1 = d__[i__], abs(d__1)) <= tol) {
-	    dlaset_("A", &c__1, nrhs, &c_b2879, &c_b2879, &work[bx + i__ - 1],
-		     n);
-	} else {
-	    ++(*rank);
-	    dlascl_("G", &c__0, &c__0, &d__[i__], &c_b2865, &c__1, nrhs, &
-		    work[bx + i__ - 1], n, info);
-	}
-	d__[i__] = (d__1 = d__[i__], abs(d__1));
-/* L70: */
-    }
-
-/*     Now apply back the right singular vectors. */
-
-    icmpq2 = 1;
-    i__1 = nsub;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	st = iwork[i__];
-	st1 = st - 1;
-	nsize = iwork[sizei + i__ - 1];
-	bxst = bx + st1;
-	if (nsize == 1) {
-	    dcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb);
-	} else if (nsize <= *smlsiz) {
-	    dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b2865, &work[vt + st1],
-		    n, &work[bxst], n, &c_b2879, &b[st + b_dim1], ldb);
-	} else {
-	    dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st +
-		    b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[
-		    k + st1], &work[difl + st1], &work[difr + st1], &work[z__
-		    + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[
-		    givcol + st1], n, &iwork[perm + st1], &work[givnum + st1],
-		     &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[
-		    iwk], info);
-	    if (*info != 0) {
-		return 0;
-	    }
-	}
-/* L80: */
-    }
-
-/*     Unscale and sort the singular values. */
-
-    dlascl_("G", &c__0, &c__0, &c_b2865, &orgnrm, n, &c__1, &d__[1], n, info);
-    dlasrt_("D", n, &d__[1], info);
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, n, nrhs, &b[b_offset], ldb,
-	    info);
-
-    return 0;
-
-/*     End of DLALSD */
-
-} /* dlalsd_ */
-
-/* Subroutine */ int dlamrg_(integer *n1, integer *n2, doublereal *a, integer
-	*dtrd1, integer *dtrd2, integer *index)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, ind1, ind2, n1sv, n2sv;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    DLAMRG will create a permutation list which will merge the elements
-    of A (which is composed of two independently sorted sets) into a
-    single set which is sorted in ascending order.
-
-    Arguments
-    =========
-
-    N1     (input) INTEGER
-    N2     (input) INTEGER
-           These arguements contain the respective lengths of the two
-           sorted lists to be merged.
-
-    A      (input) DOUBLE PRECISION array, dimension (N1+N2)
-           The first N1 elements of A contain a list of numbers which
-           are sorted in either ascending or descending order.  Likewise
-           for the final N2 elements.
-
-    DTRD1  (input) INTEGER
-    DTRD2  (input) INTEGER
-           These are the strides to be taken through the array A.
-           Allowable strides are 1 and -1.  They indicate whether a
-           subset of A is sorted in ascending (DTRDx = 1) or descending
-           (DTRDx = -1) order.
-
-    INDEX  (output) INTEGER array, dimension (N1+N2)
-           On exit this array will contain a permutation such that
-           if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be
-           sorted in ascending order.
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --index;
-    --a;
-
-    /* Function Body */
-    n1sv = *n1;
-    n2sv = *n2;
-    if (*dtrd1 > 0) {
-	ind1 = 1;
-    } else {
-	ind1 = *n1;
-    }
-    if (*dtrd2 > 0) {
-	ind2 = *n1 + 1;
-    } else {
-	ind2 = *n1 + *n2;
-    }
-    i__ = 1;
-/*     while ( (N1SV > 0) & (N2SV > 0) ) */
-L10:
-    if (n1sv > 0 && n2sv > 0) {
-	if (a[ind1] <= a[ind2]) {
-	    index[i__] = ind1;
-	    ++i__;
-	    ind1 += *dtrd1;
-	    --n1sv;
-	} else {
-	    index[i__] = ind2;
-	    ++i__;
-	    ind2 += *dtrd2;
-	    --n2sv;
-	}
-	goto L10;
-    }
-/*     end while */
-    if (n1sv == 0) {
-	i__1 = n2sv;
-	for (n1sv = 1; n1sv <= i__1; ++n1sv) {
-	    index[i__] = ind2;
-	    ++i__;
-	    ind2 += *dtrd2;
-/* L20: */
-	}
-    } else {
-/*     N2SV .EQ. 0 */
-	i__1 = n1sv;
-	for (n2sv = 1; n2sv <= i__1; ++n2sv) {
-	    index[i__] = ind1;
-	    ++i__;
-	    ind1 += *dtrd1;
-/* L30: */
-	}
-    }
-
-    return 0;
-
-/*     End of DLAMRG */
-
-} /* dlamrg_ */
-
-doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer
-	*lda, doublereal *work)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    doublereal ret_val, d__1, d__2, d__3;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, j;
-    static doublereal sum, scale;
-    extern logical lsame_(char *, char *);
-    static doublereal value;
-    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
-	    doublereal *, doublereal *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLANGE  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    real matrix A.
-
-    Description
-    ===========
-
-    DLANGE returns the value
-
-       DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
-
-    Arguments
-    =========
-
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in DLANGE as described
-            above.
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.  When M = 0,
-            DLANGE is set to zero.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.  When N = 0,
-            DLANGE is set to zero.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            The m by n matrix A.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(M,1).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
-            where LWORK >= M when NORM = 'I'; otherwise, WORK is not
-            referenced.
-
-   =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --work;
-
-    /* Function Body */
-    if (min(*m,*n) == 0) {
-	value = 0.;
-    } else if (lsame_(norm, "M")) {
-
-/*        Find max(abs(A(i,j))). */
-
-	value = 0.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
-		value = max(d__2,d__3);
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else if ((lsame_(norm, "O")) || (*(unsigned char *
-	    )norm == '1')) {
-
-/*        Find norm1(A). */
-
-	value = 0.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = 0.;
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		sum += (d__1 = a[i__ + j * a_dim1], abs(d__1));
-/* L30: */
-	    }
-	    value = max(value,sum);
-/* L40: */
-	}
-    } else if (lsame_(norm, "I")) {
-
-/*        Find normI(A). */
-
-	i__1 = *m;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    work[i__] = 0.;
-/* L50: */
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1));
-/* L60: */
-	    }
-/* L70: */
-	}
-	value = 0.;
-	i__1 = *m;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	    d__1 = value, d__2 = work[i__];
-	    value = max(d__1,d__2);
-/* L80: */
-	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
-
-/*        Find normF(A). */
-
-	scale = 0.;
-	sum = 1.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L90: */
-	}
-	value = scale * sqrt(sum);
-    }
-
-    ret_val = value;
-    return ret_val;
-
-/*     End of DLANGE */
-
-} /* dlange_ */
-
-doublereal dlanhs_(char *norm, integer *n, doublereal *a, integer *lda,
-	doublereal *work)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    doublereal ret_val, d__1, d__2, d__3;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, j;
-    static doublereal sum, scale;
-    extern logical lsame_(char *, char *);
-    static doublereal value;
-    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
-	    doublereal *, doublereal *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLANHS  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    Hessenberg matrix A.
-
-    Description
-    ===========
-
-    DLANHS returns the value
-
-       DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
-
-    Arguments
-    =========
-
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in DLANHS as described
-            above.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, DLANHS is
-            set to zero.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            The n by n upper Hessenberg matrix A; the part of A below the
-            first sub-diagonal is not referenced.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(N,1).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
-            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-            referenced.
-
-   =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --work;
-
-    /* Function Body */
-    if (*n == 0) {
-	value = 0.;
-    } else if (lsame_(norm, "M")) {
-
-/*        Find max(abs(A(i,j))). */
-
-	value = 0.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
-		value = max(d__2,d__3);
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else if ((lsame_(norm, "O")) || (*(unsigned char *
-	    )norm == '1')) {
-
-/*        Find norm1(A). */
-
-	value = 0.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = 0.;
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		sum += (d__1 = a[i__ + j * a_dim1], abs(d__1));
-/* L30: */
-	    }
-	    value = max(value,sum);
-/* L40: */
-	}
-    } else if (lsame_(norm, "I")) {
-
-/*        Find normI(A). */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    work[i__] = 0.;
-/* L50: */
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1));
-/* L60: */
-	    }
-/* L70: */
-	}
-	value = 0.;
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	    d__1 = value, d__2 = work[i__];
-	    value = max(d__1,d__2);
-/* L80: */
-	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
-
-/*        Find normF(A). */
-
-	scale = 0.;
-	sum = 1.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L90: */
-	}
-	value = scale * sqrt(sum);
-    }
-
-    ret_val = value;
-    return ret_val;
-
-/*     End of DLANHS */
-
-} /* dlanhs_ */
-
-doublereal dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal ret_val, d__1, d__2, d__3, d__4, d__5;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__;
-    static doublereal sum, scale;
-    extern logical lsame_(char *, char *);
-    static doublereal anorm;
-    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
-	    doublereal *, doublereal *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DLANST  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    real symmetric tridiagonal matrix A.
-
-    Description
-    ===========
-
-    DLANST returns the value
-
-       DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
-
-    Arguments
-    =========
-
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in DLANST as described
-            above.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, DLANST is
-            set to zero.
-
-    D       (input) DOUBLE PRECISION array, dimension (N)
-            The diagonal elements of A.
-
-    E       (input) DOUBLE PRECISION array, dimension (N-1)
-            The (n-1) sub-diagonal or super-diagonal elements of A.
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --e;
-    --d__;
-
-    /* Function Body */
-    if (*n <= 0) {
-	anorm = 0.;
-    } else if (lsame_(norm, "M")) {
-
-/*        Find max(abs(A(i,j))). */
-
-	anorm = (d__1 = d__[*n], abs(d__1));
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	    d__2 = anorm, d__3 = (d__1 = d__[i__], abs(d__1));
-	    anorm = max(d__2,d__3);
-/* Computing MAX */
-	    d__2 = anorm, d__3 = (d__1 = e[i__], abs(d__1));
-	    anorm = max(d__2,d__3);
-/* L10: */
-	}
-    } else if (((lsame_(norm, "O")) || (*(unsigned char
-	    *)norm == '1')) || (lsame_(norm, "I"))) {
-
-/*        Find norm1(A). */
-
-	if (*n == 1) {
-	    anorm = abs(d__[1]);
-	} else {
-/* Computing MAX */
-	    d__3 = abs(d__[1]) + abs(e[1]), d__4 = (d__1 = e[*n - 1], abs(
-		    d__1)) + (d__2 = d__[*n], abs(d__2));
-	    anorm = max(d__3,d__4);
-	    i__1 = *n - 1;
-	    for (i__ = 2; i__ <= i__1; ++i__) {
-/* Computing MAX */
-		d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[
-			i__], abs(d__2)) + (d__3 = e[i__ - 1], abs(d__3));
-		anorm = max(d__4,d__5);
-/* L20: */
-	    }
-	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
-
-/*        Find normF(A). */
-
-	scale = 0.;
-	sum = 1.;
-	if (*n > 1) {
-	    i__1 = *n - 1;
-	    dlassq_(&i__1, &e[1], &c__1, &scale, &sum);
-	    sum *= 2;
-	}
-	dlassq_(n, &d__[1], &c__1, &scale, &sum);
-	anorm = scale * sqrt(sum);
-    }
-
-    ret_val = anorm;
-    return ret_val;
-
-/*     End of DLANST */
-
-} /* dlanst_ */
-
-doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer
-	*lda, doublereal *work)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    doublereal ret_val, d__1, d__2, d__3;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, j;
-    static doublereal sum, absa, scale;
-    extern logical lsame_(char *, char *);
-    static doublereal value;
-    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
-	    doublereal *, doublereal *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLANSY  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    real symmetric matrix A.
-
-    Description
-    ===========
-
-    DLANSY returns the value
-
-       DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
-
-    Arguments
-    =========
-
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in DLANSY as described
-            above.
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the upper or lower triangular part of the
-            symmetric matrix A is to be referenced.
-            = 'U':  Upper triangular part of A is referenced
-            = 'L':  Lower triangular part of A is referenced
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, DLANSY is
-            set to zero.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            The symmetric matrix A.  If UPLO = 'U', the leading n by n
-            upper triangular part of A contains the upper triangular part
-            of the matrix A, and the strictly lower triangular part of A
-            is not referenced.  If UPLO = 'L', the leading n by n lower
-            triangular part of A contains the lower triangular part of
-            the matrix A, and the strictly upper triangular part of A is
-            not referenced.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(N,1).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
-            where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-            WORK is not referenced.
-
-   =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --work;
-
-    /* Function Body */
-    if (*n == 0) {
-	value = 0.;
-    } else if (lsame_(norm, "M")) {
-
-/*        Find max(abs(A(i,j))). */
-
-	value = 0.;
-	if (lsame_(uplo, "U")) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		    d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(
-			    d__1));
-		    value = max(d__2,d__3);
-/* L10: */
-		}
-/* L20: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *n;
-		for (i__ = j; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		    d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(
-			    d__1));
-		    value = max(d__2,d__3);
-/* L30: */
-		}
-/* L40: */
-	    }
-	}
-    } else if (((lsame_(norm, "I")) || (lsame_(norm,
-	    "O"))) || (*(unsigned char *)norm == '1')) {
-
-/*        Find normI(A) ( = norm1(A), since A is symmetric). */
-
-	value = 0.;
-	if (lsame_(uplo, "U")) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		sum = 0.;
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    absa = (d__1 = a[i__ + j * a_dim1], abs(d__1));
-		    sum += absa;
-		    work[i__] += absa;
-/* L50: */
-		}
-		work[j] = sum + (d__1 = a[j + j * a_dim1], abs(d__1));
-/* L60: */
-	    }
-	    i__1 = *n;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-		d__1 = value, d__2 = work[i__];
-		value = max(d__1,d__2);
-/* L70: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-		work[i__] = 0.;
-/* L80: */
-	    }
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		sum = work[j] + (d__1 = a[j + j * a_dim1], abs(d__1));
-		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    absa = (d__1 = a[i__ + j * a_dim1], abs(d__1));
-		    sum += absa;
-		    work[i__] += absa;
-/* L90: */
-		}
-		value = max(value,sum);
-/* L100: */
-	    }
-	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
-
-/*        Find normF(A). */
-
-	scale = 0.;
-	sum = 1.;
-	if (lsame_(uplo, "U")) {
-	    i__1 = *n;
-	    for (j = 2; j <= i__1; ++j) {
-		i__2 = j - 1;
-		dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L110: */
-	    }
-	} else {
-	    i__1 = *n - 1;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *n - j;
-		dlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum);
-/* L120: */
-	    }
-	}
-	sum *= 2;
-	i__1 = *lda + 1;
-	dlassq_(n, &a[a_offset], &i__1, &scale, &sum);
-	value = scale * sqrt(sum);
-    }
-
-    ret_val = value;
-    return ret_val;
-
-/*     End of DLANSY */
-
-} /* dlansy_ */
-
-/* Subroutine */ int dlanv2_(doublereal *a, doublereal *b, doublereal *c__,
-	doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r,
-	 doublereal *rt2i, doublereal *cs, doublereal *sn)
-{
-    /* System generated locals */
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double d_sign(doublereal *, doublereal *), sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau,
-	    temp, scale, bcmax, bcmis, sigma;
-
-
-
-/*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric
-    matrix in standard form:
-
-         [ A  B ] = [ CS -SN ] [ AA  BB ] [ CS  SN ]
-         [ C  D ]   [ SN  CS ] [ CC  DD ] [-SN  CS ]
-
-    where either
-    1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or
-    2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex
-    conjugate eigenvalues.
-
-    Arguments
-    =========
-
-    A       (input/output) DOUBLE PRECISION
-    B       (input/output) DOUBLE PRECISION
-    C       (input/output) DOUBLE PRECISION
-    D       (input/output) DOUBLE PRECISION
-            On entry, the elements of the input matrix.
-            On exit, they are overwritten by the elements of the
-            standardised Schur form.
-
-    RT1R    (output) DOUBLE PRECISION
-    RT1I    (output) DOUBLE PRECISION
-    RT2R    (output) DOUBLE PRECISION
-    RT2I    (output) DOUBLE PRECISION
-            The real and imaginary parts of the eigenvalues. If the
-            eigenvalues are a complex conjugate pair, RT1I > 0.
-
-    CS      (output) DOUBLE PRECISION
-    SN      (output) DOUBLE PRECISION
-            Parameters of the rotation matrix.
-
-    Further Details
-    ===============
-
-    Modified by V. Sima, Research Institute for Informatics, Bucharest,
-    Romania, to reduce the risk of cancellation errors,
-    when computing real eigenvalues, and to ensure, if possible, that
-    abs(RT1R) >= abs(RT2R).
-
-    =====================================================================
-*/
-
-
-    eps = PRECISION;
-    if (*c__ == 0.) {
-	*cs = 1.;
-	*sn = 0.;
-	goto L10;
-
-    } else if (*b == 0.) {
-
-/*        Swap rows and columns */
-
-	*cs = 0.;
-	*sn = 1.;
-	temp = *d__;
-	*d__ = *a;
-	*a = temp;
-	*b = -(*c__);
-	*c__ = 0.;
-	goto L10;
-    } else if (*a - *d__ == 0. && d_sign(&c_b2865, b) != d_sign(&c_b2865, c__)
-	    ) {
-	*cs = 1.;
-	*sn = 0.;
-	goto L10;
-    } else {
-
-	temp = *a - *d__;
-	p = temp * .5;
-/* Computing MAX */
-	d__1 = abs(*b), d__2 = abs(*c__);
-	bcmax = max(d__1,d__2);
-/* Computing MIN */
-	d__1 = abs(*b), d__2 = abs(*c__);
-	bcmis = min(d__1,d__2) * d_sign(&c_b2865, b) * d_sign(&c_b2865, c__);
-/* Computing MAX */
-	d__1 = abs(p);
-	scale = max(d__1,bcmax);
-	z__ = p / scale * p + bcmax / scale * bcmis;
-
-/*
-          If Z is of the order of the machine accuracy, postpone the
-          decision on the nature of eigenvalues
-*/
-
-	if (z__ >= eps * 4.) {
-
-/*           Real eigenvalues. Compute A and D. */
-
-	    d__1 = sqrt(scale) * sqrt(z__);
-	    z__ = p + d_sign(&d__1, &p);
-	    *a = *d__ + z__;
-	    *d__ -= bcmax / z__ * bcmis;
-
-/*           Compute B and the rotation matrix */
-
-	    tau = dlapy2_(c__, &z__);
-	    *cs = z__ / tau;
-	    *sn = *c__ / tau;
-	    *b -= *c__;
-	    *c__ = 0.;
-	} else {
-
-/*
-             Complex eigenvalues, or real (almost) equal eigenvalues.
-             Make diagonal elements equal.
-*/
-
-	    sigma = *b + *c__;
-	    tau = dlapy2_(&sigma, &temp);
-	    *cs = sqrt((abs(sigma) / tau + 1.) * .5);
-	    *sn = -(p / (tau * *cs)) * d_sign(&c_b2865, &sigma);
-
-/*
-             Compute [ AA  BB ] = [ A  B ] [ CS -SN ]
-                     [ CC  DD ]   [ C  D ] [ SN  CS ]
-*/
-
-	    aa = *a * *cs + *b * *sn;
-	    bb = -(*a) * *sn + *b * *cs;
-	    cc = *c__ * *cs + *d__ * *sn;
-	    dd = -(*c__) * *sn + *d__ * *cs;
-
-/*
-             Compute [ A  B ] = [ CS  SN ] [ AA  BB ]
-                     [ C  D ]   [-SN  CS ] [ CC  DD ]
-*/
-
-	    *a = aa * *cs + cc * *sn;
-	    *b = bb * *cs + dd * *sn;
-	    *c__ = -aa * *sn + cc * *cs;
-	    *d__ = -bb * *sn + dd * *cs;
-
-	    temp = (*a + *d__) * .5;
-	    *a = temp;
-	    *d__ = temp;
-
-	    if (*c__ != 0.) {
-		if (*b != 0.) {
-		    if (d_sign(&c_b2865, b) == d_sign(&c_b2865, c__)) {
-
-/*                    Real eigenvalues: reduce to upper triangular form */
-
-			sab = sqrt((abs(*b)));
-			sac = sqrt((abs(*c__)));
-			d__1 = sab * sac;
-			p = d_sign(&d__1, c__);
-			tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1)));
-			*a = temp + p;
-			*d__ = temp - p;
-			*b -= *c__;
-			*c__ = 0.;
-			cs1 = sab * tau;
-			sn1 = sac * tau;
-			temp = *cs * cs1 - *sn * sn1;
-			*sn = *cs * sn1 + *sn * cs1;
-			*cs = temp;
-		    }
-		} else {
-		    *b = -(*c__);
-		    *c__ = 0.;
-		    temp = *cs;
-		    *cs = -(*sn);
-		    *sn = temp;
-		}
-	    }
-	}
-
-    }
-
-L10:
-
-/*     Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */
-
-    *rt1r = *a;
-    *rt2r = *d__;
-    if (*c__ == 0.) {
-	*rt1i = 0.;
-	*rt2i = 0.;
-    } else {
-	*rt1i = sqrt((abs(*b))) * sqrt((abs(*c__)));
-	*rt2i = -(*rt1i);
-    }
-    return 0;
-
-/*     End of DLANV2 */
-
-} /* dlanv2_ */
-
-doublereal dlapy2_(doublereal *x, doublereal *y)
-{
-    /* System generated locals */
-    doublereal ret_val, d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal w, z__, xabs, yabs;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
-    overflow.
-
-    Arguments
-    =========
-
-    X       (input) DOUBLE PRECISION
-    Y       (input) DOUBLE PRECISION
-            X and Y specify the values x and y.
-
-    =====================================================================
-*/
-
-
-    xabs = abs(*x);
-    yabs = abs(*y);
-    w = max(xabs,yabs);
-    z__ = min(xabs,yabs);
-    if (z__ == 0.) {
-	ret_val = w;
-    } else {
-/* Computing 2nd power */
-	d__1 = z__ / w;
-	ret_val = w * sqrt(d__1 * d__1 + 1.);
-    }
-    return ret_val;
-
-/*     End of DLAPY2 */
-
-} /* dlapy2_ */
-
-doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
-{
-    /* System generated locals */
-    doublereal ret_val, d__1, d__2, d__3;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal w, xabs, yabs, zabs;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause
-    unnecessary overflow.
-
-    Arguments
-    =========
-
-    X       (input) DOUBLE PRECISION
-    Y       (input) DOUBLE PRECISION
-    Z       (input) DOUBLE PRECISION
-            X, Y and Z specify the values x, y and z.
-
-    =====================================================================
-*/
-
-
-    xabs = abs(*x);
-    yabs = abs(*y);
-    zabs = abs(*z__);
-/* Computing MAX */
-    d__1 = max(xabs,yabs);
-    w = max(d__1,zabs);
-    if (w == 0.) {
-	ret_val = 0.;
-    } else {
-/* Computing 2nd power */
-	d__1 = xabs / w;
-/* Computing 2nd power */
-	d__2 = yabs / w;
-/* Computing 2nd power */
-	d__3 = zabs / w;
-	ret_val = w * sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3);
-    }
-    return ret_val;
-
-/*     End of DLAPY3 */
-
-} /* dlapy3_ */
-
-/* Subroutine */ int dlarf_(char *side, integer *m, integer *n, doublereal *v,
-	 integer *incv, doublereal *tau, doublereal *c__, integer *ldc,
-	doublereal *work)
-{
-    /* System generated locals */
-    integer c_dim1, c_offset;
-    doublereal d__1;
-
-    /* Local variables */
-    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DLARF applies a real elementary reflector H to a real m by n matrix
-    C, from either the left or the right. H is represented in the form
-
-          H = I - tau * v * v'
-
-    where tau is a real scalar and v is a real vector.
-
-    If tau = 0, then H is taken to be the unit matrix.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': form  H * C
-            = 'R': form  C * H
-
-    M       (input) INTEGER
-            The number of rows of the matrix C.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C.
-
-    V       (input) DOUBLE PRECISION array, dimension
-                       (1 + (M-1)*abs(INCV)) if SIDE = 'L'
-                    or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
-            The vector v in the representation of H. V is not used if
-            TAU = 0.
-
-    INCV    (input) INTEGER
-            The increment between elements of v. INCV <> 0.
-
-    TAU     (input) DOUBLE PRECISION
-            The value tau in the representation of H.
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the m by n matrix C.
-            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
-            or C * H if SIDE = 'R'.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension
-                           (N) if SIDE = 'L'
-                        or (M) if SIDE = 'R'
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --v;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    if (lsame_(side, "L")) {
-
-/*        Form  H * C */
-
-	if (*tau != 0.) {
-
-/*           w := C' * v */
-
-	    dgemv_("Transpose", m, n, &c_b2865, &c__[c_offset], ldc, &v[1],
-		    incv, &c_b2879, &work[1], &c__1);
-
-/*           C := C - v * w' */
-
-	    d__1 = -(*tau);
-	    dger_(m, n, &d__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
-		    ldc);
-	}
-    } else {
-
-/*        Form  C * H */
-
-	if (*tau != 0.) {
-
-/*           w := C * v */
-
-	    dgemv_("No transpose", m, n, &c_b2865, &c__[c_offset], ldc, &v[1],
-		     incv, &c_b2879, &work[1], &c__1);
-
-/*           C := C - w * v' */
-
-	    d__1 = -(*tau);
-	    dger_(m, n, &d__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
-		    ldc);
-	}
-    }
-    return 0;
-
-/*     End of DLARF */
-
-} /* dlarf_ */
-
-/* Subroutine */ int dlarfb_(char *side, char *trans, char *direct, char *
-	storev, integer *m, integer *n, integer *k, doublereal *v, integer *
-	ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc,
-	doublereal *work, integer *ldwork)
-{
-    /* System generated locals */
-    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
-	    work_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dtrmm_(char *, char *, char *, char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static char transt[1];
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DLARFB applies a real block reflector H or its transpose H' to a
-    real m by n matrix C, from either the left or the right.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply H or H' from the Left
-            = 'R': apply H or H' from the Right
-
-    TRANS   (input) CHARACTER*1
-            = 'N': apply H (No transpose)
-            = 'T': apply H' (Transpose)
-
-    DIRECT  (input) CHARACTER*1
-            Indicates how H is formed from a product of elementary
-            reflectors
-            = 'F': H = H(1) H(2) . . . H(k) (Forward)
-            = 'B': H = H(k) . . . H(2) H(1) (Backward)
-
-    STOREV  (input) CHARACTER*1
-            Indicates how the vectors which define the elementary
-            reflectors are stored:
-            = 'C': Columnwise
-            = 'R': Rowwise
-
-    M       (input) INTEGER
-            The number of rows of the matrix C.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C.
-
-    K       (input) INTEGER
-            The order of the matrix T (= the number of elementary
-            reflectors whose product defines the block reflector).
-
-    V       (input) DOUBLE PRECISION array, dimension
-                                  (LDV,K) if STOREV = 'C'
-                                  (LDV,M) if STOREV = 'R' and SIDE = 'L'
-                                  (LDV,N) if STOREV = 'R' and SIDE = 'R'
-            The matrix V. See further details.
-
-    LDV     (input) INTEGER
-            The leading dimension of the array V.
-            If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
-            if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
-            if STOREV = 'R', LDV >= K.
-
-    T       (input) DOUBLE PRECISION array, dimension (LDT,K)
-            The triangular k by k matrix T in the representation of the
-            block reflector.
-
-    LDT     (input) INTEGER
-            The leading dimension of the array T. LDT >= K.
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the m by n matrix C.
-            On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDA >= max(1,M).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,K)
-
-    LDWORK  (input) INTEGER
-            The leading dimension of the array WORK.
-            If SIDE = 'L', LDWORK >= max(1,N);
-            if SIDE = 'R', LDWORK >= max(1,M).
-
-    =====================================================================
-
-
-       Quick return if possible
-*/
-
-    /* Parameter adjustments */
-    v_dim1 = *ldv;
-    v_offset = 1 + v_dim1;
-    v -= v_offset;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    work_dim1 = *ldwork;
-    work_offset = 1 + work_dim1;
-    work -= work_offset;
-
-    /* Function Body */
-    if ((*m <= 0) || (*n <= 0)) {
-	return 0;
-    }
-
-    if (lsame_(trans, "N")) {
-	*(unsigned char *)transt = 'T';
-    } else {
-	*(unsigned char *)transt = 'N';
-    }
-
-    if (lsame_(storev, "C")) {
-
-	if (lsame_(direct, "F")) {
-
-/*
-             Let  V =  ( V1 )    (first K rows)
-                       ( V2 )
-             where  V1  is unit lower triangular.
-*/
-
-	    if (lsame_(side, "L")) {
-
-/*
-                Form  H * C  or  H' * C  where  C = ( C1 )
-                                                    ( C2 )
-
-                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
-
-                W := C1'
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
-/* L10: */
-		}
-
-/*              W := W * V1 */
-
-		dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &
-			c_b2865, &v[v_offset], ldv, &work[work_offset],
-			ldwork);
-		if (*m > *k) {
-
-/*                 W := W + C2'*V2 */
-
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b2865,
-			     &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1],
-			    ldv, &c_b2865, &work[work_offset], ldwork);
-		}
-
-/*              W := W * T'  or  W * T */
-
-		dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b2865, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - V * W' */
-
-		if (*m > *k) {
-
-/*                 C2 := C2 - V2 * W' */
-
-		    i__1 = *m - *k;
-		    dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b3001,
-			     &v[*k + 1 + v_dim1], ldv, &work[work_offset],
-			    ldwork, &c_b2865, &c__[*k + 1 + c_dim1], ldc);
-		}
-
-/*              W := W * V1' */
-
-		dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b2865,
-			&v[v_offset], ldv, &work[work_offset], ldwork);
-
-/*              C1 := C1 - W' */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
-/* L20: */
-		    }
-/* L30: */
-		}
-
-	    } else if (lsame_(side, "R")) {
-
-/*
-                Form  C * H  or  C * H'  where  C = ( C1  C2 )
-
-                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
-
-                W := C1
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
-			    work_dim1 + 1], &c__1);
-/* L40: */
-		}
-
-/*              W := W * V1 */
-
-		dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &
-			c_b2865, &v[v_offset], ldv, &work[work_offset],
-			ldwork);
-		if (*n > *k) {
-
-/*                 W := W + C2 * V2 */
-
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "No transpose", m, k, &i__1, &
-			    c_b2865, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k
-			    + 1 + v_dim1], ldv, &c_b2865, &work[work_offset],
-			    ldwork);
-		}
-
-/*              W := W * T  or  W * T' */
-
-		dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b2865, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - W * V' */
-
-		if (*n > *k) {
-
-/*                 C2 := C2 - W * V2' */
-
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b3001,
-			     &work[work_offset], ldwork, &v[*k + 1 + v_dim1],
-			    ldv, &c_b2865, &c__[(*k + 1) * c_dim1 + 1], ldc);
-		}
-
-/*              W := W * V1' */
-
-		dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b2865,
-			&v[v_offset], ldv, &work[work_offset], ldwork);
-
-/*              C1 := C1 - W */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
-/* L50: */
-		    }
-/* L60: */
-		}
-	    }
-
-	} else {
-
-/*
-             Let  V =  ( V1 )
-                       ( V2 )    (last K rows)
-             where  V2  is unit upper triangular.
-*/
-
-	    if (lsame_(side, "L")) {
-
-/*
-                Form  H * C  or  H' * C  where  C = ( C1 )
-                                                    ( C2 )
-
-                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
-
-                W := C2'
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
-/* L70: */
-		}
-
-/*              W := W * V2 */
-
-		dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &
-			c_b2865, &v[*m - *k + 1 + v_dim1], ldv, &work[
-			work_offset], ldwork);
-		if (*m > *k) {
-
-/*                 W := W + C1'*V1 */
-
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b2865,
-			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b2865,
-			     &work[work_offset], ldwork);
-		}
-
-/*              W := W * T'  or  W * T */
-
-		dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b2865, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - V * W' */
-
-		if (*m > *k) {
-
-/*                 C1 := C1 - V1 * W' */
-
-		    i__1 = *m - *k;
-		    dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b3001,
-			     &v[v_offset], ldv, &work[work_offset], ldwork, &
-			    c_b2865, &c__[c_offset], ldc);
-		}
-
-/*              W := W * V2' */
-
-		dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b2865,
-			&v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-
-/*              C2 := C2 - W' */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
-				work_dim1];
-/* L80: */
-		    }
-/* L90: */
-		}
-
-	    } else if (lsame_(side, "R")) {
-
-/*
-                Form  C * H  or  C * H'  where  C = ( C1  C2 )
-
-                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
-
-                W := C2
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
-/* L100: */
-		}
-
-/*              W := W * V2 */
-
-		dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &
-			c_b2865, &v[*n - *k + 1 + v_dim1], ldv, &work[
-			work_offset], ldwork);
-		if (*n > *k) {
-
-/*                 W := W + C1 * V1 */
-
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "No transpose", m, k, &i__1, &
-			    c_b2865, &c__[c_offset], ldc, &v[v_offset], ldv, &
-			    c_b2865, &work[work_offset], ldwork);
-		}
-
-/*              W := W * T  or  W * T' */
-
-		dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b2865, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - W * V' */
-
-		if (*n > *k) {
-
-/*                 C1 := C1 - W * V1' */
-
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b3001,
-			     &work[work_offset], ldwork, &v[v_offset], ldv, &
-			    c_b2865, &c__[c_offset], ldc);
-		}
-
-/*              W := W * V2' */
-
-		dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b2865,
-			&v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-
-/*              C2 := C2 - W */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
-				work_dim1];
-/* L110: */
-		    }
-/* L120: */
-		}
-	    }
-	}
-
-    } else if (lsame_(storev, "R")) {
-
-	if (lsame_(direct, "F")) {
-
-/*
-             Let  V =  ( V1  V2 )    (V1: first K columns)
-             where  V1  is unit upper triangular.
-*/
-
-	    if (lsame_(side, "L")) {
-
-/*
-                Form  H * C  or  H' * C  where  C = ( C1 )
-                                                    ( C2 )
-
-                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
-
-                W := C1'
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
-/* L130: */
-		}
-
-/*              W := W * V1' */
-
-		dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b2865,
-			&v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
-
-/*                 W := W + C2'*V2' */
-
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b2865, &
-			    c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 +
-			    1], ldv, &c_b2865, &work[work_offset], ldwork);
-		}
-
-/*              W := W * T'  or  W * T */
-
-		dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b2865, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - V' * W' */
-
-		if (*m > *k) {
-
-/*                 C2 := C2 - V2' * W' */
-
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b3001, &
-			    v[(*k + 1) * v_dim1 + 1], ldv, &work[work_offset],
-			     ldwork, &c_b2865, &c__[*k + 1 + c_dim1], ldc);
-		}
-
-/*              W := W * V1 */
-
-		dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &
-			c_b2865, &v[v_offset], ldv, &work[work_offset],
-			ldwork);
-
-/*              C1 := C1 - W' */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
-/* L140: */
-		    }
-/* L150: */
-		}
-
-	    } else if (lsame_(side, "R")) {
-
-/*
-                Form  C * H  or  C * H'  where  C = ( C1  C2 )
-
-                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
-
-                W := C1
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
-			    work_dim1 + 1], &c__1);
-/* L160: */
-		}
-
-/*              W := W * V1' */
-
-		dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b2865,
-			&v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
-
-/*                 W := W + C2 * V2' */
-
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b2865,
-			     &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) *
-			    v_dim1 + 1], ldv, &c_b2865, &work[work_offset],
-			    ldwork);
-		}
-
-/*              W := W * T  or  W * T' */
-
-		dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b2865, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - W * V */
-
-		if (*n > *k) {
-
-/*                 C2 := C2 - W * V2 */
-
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "No transpose", m, &i__1, k, &
-			    c_b3001, &work[work_offset], ldwork, &v[(*k + 1) *
-			     v_dim1 + 1], ldv, &c_b2865, &c__[(*k + 1) *
-			    c_dim1 + 1], ldc);
-		}
-
-/*              W := W * V1 */
-
-		dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &
-			c_b2865, &v[v_offset], ldv, &work[work_offset],
-			ldwork);
-
-/*              C1 := C1 - W */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
-/* L170: */
-		    }
-/* L180: */
-		}
-
-	    }
-
-	} else {
-
-/*
-             Let  V =  ( V1  V2 )    (V2: last K columns)
-             where  V2  is unit lower triangular.
-*/
-
-	    if (lsame_(side, "L")) {
-
-/*
-                Form  H * C  or  H' * C  where  C = ( C1 )
-                                                    ( C2 )
-
-                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
-
-                W := C2'
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
-/* L190: */
-		}
-
-/*              W := W * V2' */
-
-		dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b2865,
-			&v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-		if (*m > *k) {
-
-/*                 W := W + C1'*V1' */
-
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b2865, &
-			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b2865, &
-			    work[work_offset], ldwork);
-		}
-
-/*              W := W * T'  or  W * T */
-
-		dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b2865, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - V' * W' */
-
-		if (*m > *k) {
-
-/*                 C1 := C1 - V1' * W' */
-
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b3001, &
-			    v[v_offset], ldv, &work[work_offset], ldwork, &
-			    c_b2865, &c__[c_offset], ldc);
-		}
-
-/*              W := W * V2 */
-
-		dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &
-			c_b2865, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-
-/*              C2 := C2 - W' */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
-				work_dim1];
-/* L200: */
-		    }
-/* L210: */
-		}
-
-	    } else if (lsame_(side, "R")) {
-
-/*
-                Form  C * H  or  C * H'  where  C = ( C1  C2 )
-
-                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
-
-                W := C2
-*/
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
-/* L220: */
-		}
-
-/*              W := W * V2' */
-
-		dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b2865,
-			&v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-		if (*n > *k) {
-
-/*                 W := W + C1 * V1' */
-
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b2865,
-			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b2865,
-			     &work[work_offset], ldwork);
-		}
-
-/*              W := W * T  or  W * T' */
-
-		dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b2865, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - W * V */
-
-		if (*n > *k) {
-
-/*                 C1 := C1 - W * V1 */
-
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "No transpose", m, &i__1, k, &
-			    c_b3001, &work[work_offset], ldwork, &v[v_offset],
-			     ldv, &c_b2865, &c__[c_offset], ldc);
-		}
-
-/*              W := W * V2 */
-
-		dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &
-			c_b2865, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-
-/*              C1 := C1 - W */
-
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
-				work_dim1];
-/* L230: */
-		    }
-/* L240: */
-		}
-
-	    }
-
-	}
-    }
-
-    return 0;
-
-/*     End of DLARFB */
-
-} /* dlarfb_ */
-
-/* Subroutine */ int dlarfg_(integer *n, doublereal *alpha, doublereal *x,
-	integer *incx, doublereal *tau)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static integer j, knt;
-    static doublereal beta;
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    static doublereal xnorm;
-
-    static doublereal safmin, rsafmn;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    DLARFG generates a real elementary reflector H of order n, such
-    that
-
-          H * ( alpha ) = ( beta ),   H' * H = I.
-              (   x   )   (   0  )
-
-    where alpha and beta are scalars, and x is an (n-1)-element real
-    vector. H is represented in the form
-
-          H = I - tau * ( 1 ) * ( 1 v' ) ,
-                        ( v )
-
-    where tau is a real scalar and v is a real (n-1)-element
-    vector.
-
-    If the elements of x are all zero, then tau = 0 and H is taken to be
-    the unit matrix.
-
-    Otherwise  1 <= tau <= 2.
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The order of the elementary reflector.
-
-    ALPHA   (input/output) DOUBLE PRECISION
-            On entry, the value alpha.
-            On exit, it is overwritten with the value beta.
-
-    X       (input/output) DOUBLE PRECISION array, dimension
-                           (1+(N-2)*abs(INCX))
-            On entry, the vector x.
-            On exit, it is overwritten with the vector v.
-
-    INCX    (input) INTEGER
-            The increment between elements of X. INCX > 0.
-
-    TAU     (output) DOUBLE PRECISION
-            The value tau.
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if (*n <= 1) {
-	*tau = 0.;
-	return 0;
-    }
-
-    i__1 = *n - 1;
-    xnorm = dnrm2_(&i__1, &x[1], incx);
-
-    if (xnorm == 0.) {
-
-/*        H  =  I */
-
-	*tau = 0.;
-    } else {
-
-/*        general case */
-
-	d__1 = dlapy2_(alpha, &xnorm);
-	beta = -d_sign(&d__1, alpha);
-	safmin = SAFEMINIMUM / EPSILON;
-	if (abs(beta) < safmin) {
-
-/*           XNORM, BETA may be inaccurate; scale X and recompute them */
-
-	    rsafmn = 1. / safmin;
-	    knt = 0;
-L10:
-	    ++knt;
-	    i__1 = *n - 1;
-	    dscal_(&i__1, &rsafmn, &x[1], incx);
-	    beta *= rsafmn;
-	    *alpha *= rsafmn;
-	    if (abs(beta) < safmin) {
-		goto L10;
-	    }
-
-/*           New BETA is at most 1, at least SAFMIN */
-
-	    i__1 = *n - 1;
-	    xnorm = dnrm2_(&i__1, &x[1], incx);
-	    d__1 = dlapy2_(alpha, &xnorm);
-	    beta = -d_sign(&d__1, alpha);
-	    *tau = (beta - *alpha) / beta;
-	    i__1 = *n - 1;
-	    d__1 = 1. / (*alpha - beta);
-	    dscal_(&i__1, &d__1, &x[1], incx);
-
-/*           If ALPHA is subnormal, it may lose relative accuracy */
-
-	    *alpha = beta;
-	    i__1 = knt;
-	    for (j = 1; j <= i__1; ++j) {
-		*alpha *= safmin;
-/* L20: */
-	    }
-	} else {
-	    *tau = (beta - *alpha) / beta;
-	    i__1 = *n - 1;
-	    d__1 = 1. / (*alpha - beta);
-	    dscal_(&i__1, &d__1, &x[1], incx);
-	    *alpha = beta;
-	}
-    }
-
-    return 0;
-
-/*     End of DLARFG */
-
-} /* dlarfg_ */
-
-/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer *
-	k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t,
-	integer *ldt)
-{
-    /* System generated locals */
-    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
-    doublereal d__1;
-
-    /* Local variables */
-    static integer i__, j;
-    static doublereal vii;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *), dtrmv_(char *,
-	    char *, char *, integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DLARFT forms the triangular factor T of a real block reflector H
-    of order n, which is defined as a product of k elementary reflectors.
-
-    If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
-
-    If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
-
-    If STOREV = 'C', the vector which defines the elementary reflector
-    H(i) is stored in the i-th column of the array V, and
-
-       H  =  I - V * T * V'
-
-    If STOREV = 'R', the vector which defines the elementary reflector
-    H(i) is stored in the i-th row of the array V, and
-
-       H  =  I - V' * T * V
-
-    Arguments
-    =========
-
-    DIRECT  (input) CHARACTER*1
-            Specifies the order in which the elementary reflectors are
-            multiplied to form the block reflector:
-            = 'F': H = H(1) H(2) . . . H(k) (Forward)
-            = 'B': H = H(k) . . . H(2) H(1) (Backward)
-
-    STOREV  (input) CHARACTER*1
-            Specifies how the vectors which define the elementary
-            reflectors are stored (see also Further Details):
-            = 'C': columnwise
-            = 'R': rowwise
-
-    N       (input) INTEGER
-            The order of the block reflector H. N >= 0.
-
-    K       (input) INTEGER
-            The order of the triangular factor T (= the number of
-            elementary reflectors). K >= 1.
-
-    V       (input/output) DOUBLE PRECISION array, dimension
-                                 (LDV,K) if STOREV = 'C'
-                                 (LDV,N) if STOREV = 'R'
-            The matrix V. See further details.
-
-    LDV     (input) INTEGER
-            The leading dimension of the array V.
-            If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
-
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i).
-
-    T       (output) DOUBLE PRECISION array, dimension (LDT,K)
-            The k by k triangular factor T of the block reflector.
-            If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
-            lower triangular. The rest of the array is not used.
-
-    LDT     (input) INTEGER
-            The leading dimension of the array T. LDT >= K.
-
-    Further Details
-    ===============
-
-    The shape of the matrix V and the storage of the vectors which define
-    the H(i) is best illustrated by the following example with n = 5 and
-    k = 3. The elements equal to 1 are not stored; the corresponding
-    array elements are modified but restored on exit. The rest of the
-    array is not used.
-
-    DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
-
-                 V = (  1       )                 V = (  1 v1 v1 v1 v1 )
-                     ( v1  1    )                     (     1 v2 v2 v2 )
-                     ( v1 v2  1 )                     (        1 v3 v3 )
-                     ( v1 v2 v3 )
-                     ( v1 v2 v3 )
-
-    DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
-
-                 V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
-                     ( v1 v2 v3 )                     ( v2 v2 v2  1    )
-                     (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
-                     (     1 v3 )
-                     (        1 )
-
-    =====================================================================
-
-
-       Quick return if possible
-*/
-
-    /* Parameter adjustments */
-    v_dim1 = *ldv;
-    v_offset = 1 + v_dim1;
-    v -= v_offset;
-    --tau;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-
-    /* Function Body */
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (lsame_(direct, "F")) {
-	i__1 = *k;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (tau[i__] == 0.) {
-
-/*              H(i)  =  I */
-
-		i__2 = i__;
-		for (j = 1; j <= i__2; ++j) {
-		    t[j + i__ * t_dim1] = 0.;
-/* L10: */
-		}
-	    } else {
-
-/*              general case */
-
-		vii = v[i__ + i__ * v_dim1];
-		v[i__ + i__ * v_dim1] = 1.;
-		if (lsame_(storev, "C")) {
-
-/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
-
-		    i__2 = *n - i__ + 1;
-		    i__3 = i__ - 1;
-		    d__1 = -tau[i__];
-		    dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1],
-			     ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b2879, &t[
-			    i__ * t_dim1 + 1], &c__1);
-		} else {
-
-/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
-
-		    i__2 = i__ - 1;
-		    i__3 = *n - i__ + 1;
-		    d__1 = -tau[i__];
-		    dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ *
-			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
-			    c_b2879, &t[i__ * t_dim1 + 1], &c__1);
-		}
-		v[i__ + i__ * v_dim1] = vii;
-
-/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
-
-		i__2 = i__ - 1;
-		dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
-			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
-		t[i__ + i__ * t_dim1] = tau[i__];
-	    }
-/* L20: */
-	}
-    } else {
-	for (i__ = *k; i__ >= 1; --i__) {
-	    if (tau[i__] == 0.) {
-
-/*              H(i)  =  I */
-
-		i__1 = *k;
-		for (j = i__; j <= i__1; ++j) {
-		    t[j + i__ * t_dim1] = 0.;
-/* L30: */
-		}
-	    } else {
-
-/*              general case */
-
-		if (i__ < *k) {
-		    if (lsame_(storev, "C")) {
-			vii = v[*n - *k + i__ + i__ * v_dim1];
-			v[*n - *k + i__ + i__ * v_dim1] = 1.;
-
-/*
-                      T(i+1:k,i) :=
-                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
-*/
-
-			i__1 = *n - *k + i__;
-			i__2 = *k - i__;
-			d__1 = -tau[i__];
-			dgemv_("Transpose", &i__1, &i__2, &d__1, &v[(i__ + 1)
-				* v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], &
-				c__1, &c_b2879, &t[i__ + 1 + i__ * t_dim1], &
-				c__1);
-			v[*n - *k + i__ + i__ * v_dim1] = vii;
-		    } else {
-			vii = v[i__ + (*n - *k + i__) * v_dim1];
-			v[i__ + (*n - *k + i__) * v_dim1] = 1.;
-
-/*
-                      T(i+1:k,i) :=
-                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
-*/
-
-			i__1 = *k - i__;
-			i__2 = *n - *k + i__;
-			d__1 = -tau[i__];
-			dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ +
-				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
-				c_b2879, &t[i__ + 1 + i__ * t_dim1], &c__1);
-			v[i__ + (*n - *k + i__) * v_dim1] = vii;
-		    }
-
-/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
-
-		    i__1 = *k - i__;
-		    dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
-			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
-			     t_dim1], &c__1)
-			    ;
-		}
-		t[i__ + i__ * t_dim1] = tau[i__];
-	    }
-/* L40: */
-	}
-    }
-    return 0;
-
-/*     End of DLARFT */
-
-} /* dlarft_ */
-
-/* Subroutine */ int dlarfx_(char *side, integer *m, integer *n, doublereal *
-	v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work)
-{
-    /* System generated locals */
-    integer c_dim1, c_offset, i__1;
-    doublereal d__1;
-
-    /* Local variables */
-    static integer j;
-    static doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5,
-	    v6, v7, v8, v9, t10, v10, sum;
-    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DLARFX applies a real elementary reflector H to a real m by n
-    matrix C, from either the left or the right. H is represented in the
-    form
-
-          H = I - tau * v * v'
-
-    where tau is a real scalar and v is a real vector.
-
-    If tau = 0, then H is taken to be the unit matrix
-
-    This version uses inline code if H has order < 11.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': form  H * C
-            = 'R': form  C * H
-
-    M       (input) INTEGER
-            The number of rows of the matrix C.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C.
-
-    V       (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L'
-                                       or (N) if SIDE = 'R'
-            The vector v in the representation of H.
-
-    TAU     (input) DOUBLE PRECISION
-            The value tau in the representation of H.
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the m by n matrix C.
-            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
-            or C * H if SIDE = 'R'.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDA >= (1,M).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension
-                        (N) if SIDE = 'L'
-                        or (M) if SIDE = 'R'
-            WORK is not referenced if H has order < 11.
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --v;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    if (*tau == 0.) {
-	return 0;
-    }
-    if (lsame_(side, "L")) {
-
-/*        Form  H * C, where H has order m. */
-
-	switch (*m) {
-	    case 1:  goto L10;
-	    case 2:  goto L30;
-	    case 3:  goto L50;
-	    case 4:  goto L70;
-	    case 5:  goto L90;
-	    case 6:  goto L110;
-	    case 7:  goto L130;
-	    case 8:  goto L150;
-	    case 9:  goto L170;
-	    case 10:  goto L190;
-	}
-
-/*
-          Code for general M
-
-          w := C'*v
-*/
-
-	dgemv_("Transpose", m, n, &c_b2865, &c__[c_offset], ldc, &v[1], &c__1,
-		 &c_b2879, &work[1], &c__1);
-
-/*        C := C - tau * v * w' */
-
-	d__1 = -(*tau);
-	dger_(m, n, &d__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc)
-		;
-	goto L410;
-L10:
-
-/*        Special code for 1 x 1 Householder */
-
-	t1 = 1. - *tau * v[1] * v[1];
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1];
-/* L20: */
-	}
-	goto L410;
-L30:
-
-/*        Special code for 2 x 2 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-/* L40: */
-	}
-	goto L410;
-L50:
-
-/*        Special code for 3 x 3 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-/* L60: */
-	}
-	goto L410;
-L70:
-
-/*        Special code for 4 x 4 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-	    c__[j * c_dim1 + 4] -= sum * t4;
-/* L80: */
-	}
-	goto L410;
-L90:
-
-/*        Special code for 5 x 5 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
-		    j * c_dim1 + 5];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-	    c__[j * c_dim1 + 4] -= sum * t4;
-	    c__[j * c_dim1 + 5] -= sum * t5;
-/* L100: */
-	}
-	goto L410;
-L110:
-
-/*        Special code for 6 x 6 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	v6 = v[6];
-	t6 = *tau * v6;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
-		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-	    c__[j * c_dim1 + 4] -= sum * t4;
-	    c__[j * c_dim1 + 5] -= sum * t5;
-	    c__[j * c_dim1 + 6] -= sum * t6;
-/* L120: */
-	}
-	goto L410;
-L130:
-
-/*        Special code for 7 x 7 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	v6 = v[6];
-	t6 = *tau * v6;
-	v7 = v[7];
-	t7 = *tau * v7;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
-		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
-		    c_dim1 + 7];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-	    c__[j * c_dim1 + 4] -= sum * t4;
-	    c__[j * c_dim1 + 5] -= sum * t5;
-	    c__[j * c_dim1 + 6] -= sum * t6;
-	    c__[j * c_dim1 + 7] -= sum * t7;
-/* L140: */
-	}
-	goto L410;
-L150:
-
-/*        Special code for 8 x 8 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	v6 = v[6];
-	t6 = *tau * v6;
-	v7 = v[7];
-	t7 = *tau * v7;
-	v8 = v[8];
-	t8 = *tau * v8;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
-		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
-		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-	    c__[j * c_dim1 + 4] -= sum * t4;
-	    c__[j * c_dim1 + 5] -= sum * t5;
-	    c__[j * c_dim1 + 6] -= sum * t6;
-	    c__[j * c_dim1 + 7] -= sum * t7;
-	    c__[j * c_dim1 + 8] -= sum * t8;
-/* L160: */
-	}
-	goto L410;
-L170:
-
-/*        Special code for 9 x 9 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	v6 = v[6];
-	t6 = *tau * v6;
-	v7 = v[7];
-	t7 = *tau * v7;
-	v8 = v[8];
-	t8 = *tau * v8;
-	v9 = v[9];
-	t9 = *tau * v9;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
-		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
-		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j *
-		    c_dim1 + 9];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-	    c__[j * c_dim1 + 4] -= sum * t4;
-	    c__[j * c_dim1 + 5] -= sum * t5;
-	    c__[j * c_dim1 + 6] -= sum * t6;
-	    c__[j * c_dim1 + 7] -= sum * t7;
-	    c__[j * c_dim1 + 8] -= sum * t8;
-	    c__[j * c_dim1 + 9] -= sum * t9;
-/* L180: */
-	}
-	goto L410;
-L190:
-
-/*        Special code for 10 x 10 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	v6 = v[6];
-	t6 = *tau * v6;
-	v7 = v[7];
-	t7 = *tau * v7;
-	v8 = v[8];
-	t8 = *tau * v8;
-	v9 = v[9];
-	t9 = *tau * v9;
-	v10 = v[10];
-	t10 = *tau * v10;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
-		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
-		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j *
-		    c_dim1 + 9] + v10 * c__[j * c_dim1 + 10];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-	    c__[j * c_dim1 + 4] -= sum * t4;
-	    c__[j * c_dim1 + 5] -= sum * t5;
-	    c__[j * c_dim1 + 6] -= sum * t6;
-	    c__[j * c_dim1 + 7] -= sum * t7;
-	    c__[j * c_dim1 + 8] -= sum * t8;
-	    c__[j * c_dim1 + 9] -= sum * t9;
-	    c__[j * c_dim1 + 10] -= sum * t10;
-/* L200: */
-	}
-	goto L410;
-    } else {
-
-/*        Form  C * H, where H has order n. */
-
-	switch (*n) {
-	    case 1:  goto L210;
-	    case 2:  goto L230;
-	    case 3:  goto L250;
-	    case 4:  goto L270;
-	    case 5:  goto L290;
-	    case 6:  goto L310;
-	    case 7:  goto L330;
-	    case 8:  goto L350;
-	    case 9:  goto L370;
-	    case 10:  goto L390;
-	}
-
-/*
-          Code for general N
-
-          w := C * v
-*/
-
-	dgemv_("No transpose", m, n, &c_b2865, &c__[c_offset], ldc, &v[1], &
-		c__1, &c_b2879, &work[1], &c__1);
-
-/*        C := C - tau * w * v' */
-
-	d__1 = -(*tau);
-	dger_(m, n, &d__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc)
-		;
-	goto L410;
-L210:
-
-/*        Special code for 1 x 1 Householder */
-
-	t1 = 1. - *tau * v[1] * v[1];
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    c__[j + c_dim1] = t1 * c__[j + c_dim1];
-/* L220: */
-	}
-	goto L410;
-L230:
-
-/*        Special code for 2 x 2 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))];
-	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
-/* L240: */
-	}
-	goto L410;
-L250:
-
-/*        Special code for 3 x 3 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3];
-	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
-	    c__[j + c_dim1 * 3] -= sum * t3;
-/* L260: */
-	}
-	goto L410;
-L270:
-
-/*        Special code for 4 x 4 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))];
-	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
-	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
-/* L280: */
-	}
-	goto L410;
-L290:
-
-/*        Special code for 5 x 5 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
-		    v5 * c__[j + c_dim1 * 5];
-	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
-	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
-	    c__[j + c_dim1 * 5] -= sum * t5;
-/* L300: */
-	}
-	goto L410;
-L310:
-
-/*        Special code for 6 x 6 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	v6 = v[6];
-	t6 = *tau * v6;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
-		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6];
-	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
-	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
-	    c__[j + c_dim1 * 5] -= sum * t5;
-	    c__[j + c_dim1 * 6] -= sum * t6;
-/* L320: */
-	}
-	goto L410;
-L330:
-
-/*        Special code for 7 x 7 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	v6 = v[6];
-	t6 = *tau * v6;
-	v7 = v[7];
-	t7 = *tau * v7;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
-		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
-		     c__[j + c_dim1 * 7];
-	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
-	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
-	    c__[j + c_dim1 * 5] -= sum * t5;
-	    c__[j + c_dim1 * 6] -= sum * t6;
-	    c__[j + c_dim1 * 7] -= sum * t7;
-/* L340: */
-	}
-	goto L410;
-L350:
-
-/*        Special code for 8 x 8 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	v6 = v[6];
-	t6 = *tau * v6;
-	v7 = v[7];
-	t7 = *tau * v7;
-	v8 = v[8];
-	t8 = *tau * v8;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
-		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
-		     c__[j + c_dim1 * 7] + v8 * c__[j + ((c_dim1) << (3))];
-	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
-	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
-	    c__[j + c_dim1 * 5] -= sum * t5;
-	    c__[j + c_dim1 * 6] -= sum * t6;
-	    c__[j + c_dim1 * 7] -= sum * t7;
-	    c__[j + ((c_dim1) << (3))] -= sum * t8;
-/* L360: */
-	}
-	goto L410;
-L370:
-
-/*        Special code for 9 x 9 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	v6 = v[6];
-	t6 = *tau * v6;
-	v7 = v[7];
-	t7 = *tau * v7;
-	v8 = v[8];
-	t8 = *tau * v8;
-	v9 = v[9];
-	t9 = *tau * v9;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
-		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
-		     c__[j + c_dim1 * 7] + v8 * c__[j + ((c_dim1) << (3))] +
-		    v9 * c__[j + c_dim1 * 9];
-	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
-	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
-	    c__[j + c_dim1 * 5] -= sum * t5;
-	    c__[j + c_dim1 * 6] -= sum * t6;
-	    c__[j + c_dim1 * 7] -= sum * t7;
-	    c__[j + ((c_dim1) << (3))] -= sum * t8;
-	    c__[j + c_dim1 * 9] -= sum * t9;
-/* L380: */
-	}
-	goto L410;
-L390:
-
-/*        Special code for 10 x 10 Householder */
-
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	v6 = v[6];
-	t6 = *tau * v6;
-	v7 = v[7];
-	t7 = *tau * v7;
-	v8 = v[8];
-	t8 = *tau * v8;
-	v9 = v[9];
-	t9 = *tau * v9;
-	v10 = v[10];
-	t10 = *tau * v10;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
-		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
-		     c__[j + c_dim1 * 7] + v8 * c__[j + ((c_dim1) << (3))] +
-		    v9 * c__[j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10];
-	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
-	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
-	    c__[j + c_dim1 * 5] -= sum * t5;
-	    c__[j + c_dim1 * 6] -= sum * t6;
-	    c__[j + c_dim1 * 7] -= sum * t7;
-	    c__[j + ((c_dim1) << (3))] -= sum * t8;
-	    c__[j + c_dim1 * 9] -= sum * t9;
-	    c__[j + c_dim1 * 10] -= sum * t10;
-/* L400: */
-	}
-	goto L410;
-    }
-L410:
-    return 0;
-
-/*     End of DLARFX */
-
-} /* dlarfx_ */
-
-/* Subroutine */ int dlartg_(doublereal *f, doublereal *g, doublereal *cs,
-	doublereal *sn, doublereal *r__)
-{
-    /* Initialized data */
-
-    static logical first = TRUE_;
-
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__;
-    static doublereal f1, g1, eps, scale;
-    static integer count;
-    static doublereal safmn2, safmx2;
-
-    static doublereal safmin;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    DLARTG generate a plane rotation so that
-
-       [  CS  SN  ]  .  [ F ]  =  [ R ]   where CS**2 + SN**2 = 1.
-       [ -SN  CS  ]     [ G ]     [ 0 ]
-
-    This is a slower, more accurate version of the BLAS1 routine DROTG,
-    with the following other differences:
-       F and G are unchanged on return.
-       If G=0, then CS=1 and SN=0.
-       If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any
-          floating point operations (saves work in DBDSQR when
-          there are zeros on the diagonal).
-
-    If F exceeds G in magnitude, CS will be positive.
-
-    Arguments
-    =========
-
-    F       (input) DOUBLE PRECISION
-            The first component of vector to be rotated.
-
-    G       (input) DOUBLE PRECISION
-            The second component of vector to be rotated.
-
-    CS      (output) DOUBLE PRECISION
-            The cosine of the rotation.
-
-    SN      (output) DOUBLE PRECISION
-            The sine of the rotation.
-
-    R       (output) DOUBLE PRECISION
-            The nonzero component of the rotated vector.
-
-    =====================================================================
-*/
-
-
-    if (first) {
-	first = FALSE_;
-	safmin = SAFEMINIMUM;
-	eps = EPSILON;
-	d__1 = BASE;
-	i__1 = (integer) (log(safmin / eps) / log(BASE) /
-		2.);
-	safmn2 = pow_di(&d__1, &i__1);
-	safmx2 = 1. / safmn2;
-    }
-    if (*g == 0.) {
-	*cs = 1.;
-	*sn = 0.;
-	*r__ = *f;
-    } else if (*f == 0.) {
-	*cs = 0.;
-	*sn = 1.;
-	*r__ = *g;
-    } else {
-	f1 = *f;
-	g1 = *g;
-/* Computing MAX */
-	d__1 = abs(f1), d__2 = abs(g1);
-	scale = max(d__1,d__2);
-	if (scale >= safmx2) {
-	    count = 0;
-L10:
-	    ++count;
-	    f1 *= safmn2;
-	    g1 *= safmn2;
-/* Computing MAX */
-	    d__1 = abs(f1), d__2 = abs(g1);
-	    scale = max(d__1,d__2);
-	    if (scale >= safmx2) {
-		goto L10;
-	    }
-/* Computing 2nd power */
-	    d__1 = f1;
-/* Computing 2nd power */
-	    d__2 = g1;
-	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
-	    *cs = f1 / *r__;
-	    *sn = g1 / *r__;
-	    i__1 = count;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-		*r__ *= safmx2;
-/* L20: */
-	    }
-	} else if (scale <= safmn2) {
-	    count = 0;
-L30:
-	    ++count;
-	    f1 *= safmx2;
-	    g1 *= safmx2;
-/* Computing MAX */
-	    d__1 = abs(f1), d__2 = abs(g1);
-	    scale = max(d__1,d__2);
-	    if (scale <= safmn2) {
-		goto L30;
-	    }
-/* Computing 2nd power */
-	    d__1 = f1;
-/* Computing 2nd power */
-	    d__2 = g1;
-	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
-	    *cs = f1 / *r__;
-	    *sn = g1 / *r__;
-	    i__1 = count;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-		*r__ *= safmn2;
-/* L40: */
-	    }
-	} else {
-/* Computing 2nd power */
-	    d__1 = f1;
-/* Computing 2nd power */
-	    d__2 = g1;
-	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
-	    *cs = f1 / *r__;
-	    *sn = g1 / *r__;
-	}
-	if (abs(*f) > abs(*g) && *cs < 0.) {
-	    *cs = -(*cs);
-	    *sn = -(*sn);
-	    *r__ = -(*r__);
-	}
-    }
-    return 0;
-
-/*     End of DLARTG */
-
-} /* dlartg_ */
-
-/* Subroutine */ int dlas2_(doublereal *f, doublereal *g, doublereal *h__,
-	doublereal *ssmin, doublereal *ssmax)
-{
-    /* System generated locals */
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal c__, fa, ga, ha, as, at, au, fhmn, fhmx;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    DLAS2  computes the singular values of the 2-by-2 matrix
-       [  F   G  ]
-       [  0   H  ].
-    On return, SSMIN is the smaller singular value and SSMAX is the
-    larger singular value.
-
-    Arguments
-    =========
-
-    F       (input) DOUBLE PRECISION
-            The (1,1) element of the 2-by-2 matrix.
-
-    G       (input) DOUBLE PRECISION
-            The (1,2) element of the 2-by-2 matrix.
-
-    H       (input) DOUBLE PRECISION
-            The (2,2) element of the 2-by-2 matrix.
-
-    SSMIN   (output) DOUBLE PRECISION
-            The smaller singular value.
-
-    SSMAX   (output) DOUBLE PRECISION
-            The larger singular value.
-
-    Further Details
-    ===============
-
-    Barring over/underflow, all output quantities are correct to within
-    a few units in the last place (ulps), even in the absence of a guard
-    digit in addition/subtraction.
-
-    In IEEE arithmetic, the code works correctly if one matrix element is
-    infinite.
-
-    Overflow will not occur unless the largest singular value itself
-    overflows, or is within a few ulps of overflow. (On machines with
-    partial overflow, like the Cray, overflow may occur if the largest
-    singular value is within a factor of 2 of overflow.)
-
-    Underflow is harmless if underflow is gradual. Otherwise, results
-    may correspond to a matrix modified by perturbations of size near
-    the underflow threshold.
-
-    ====================================================================
-*/
-
-
-    fa = abs(*f);
-    ga = abs(*g);
-    ha = abs(*h__);
-    fhmn = min(fa,ha);
-    fhmx = max(fa,ha);
-    if (fhmn == 0.) {
-	*ssmin = 0.;
-	if (fhmx == 0.) {
-	    *ssmax = ga;
-	} else {
-/* Computing 2nd power */
-	    d__1 = min(fhmx,ga) / max(fhmx,ga);
-	    *ssmax = max(fhmx,ga) * sqrt(d__1 * d__1 + 1.);
-	}
-    } else {
-	if (ga < fhmx) {
-	    as = fhmn / fhmx + 1.;
-	    at = (fhmx - fhmn) / fhmx;
-/* Computing 2nd power */
-	    d__1 = ga / fhmx;
-	    au = d__1 * d__1;
-	    c__ = 2. / (sqrt(as * as + au) + sqrt(at * at + au));
-	    *ssmin = fhmn * c__;
-	    *ssmax = fhmx / c__;
-	} else {
-	    au = fhmx / ga;
-	    if (au == 0.) {
-
-/*
-                Avoid possible harmful underflow if exponent range
-                asymmetric (true SSMIN may not underflow even if
-                AU underflows)
-*/
-
-		*ssmin = fhmn * fhmx / ga;
-		*ssmax = ga;
-	    } else {
-		as = fhmn / fhmx + 1.;
-		at = (fhmx - fhmn) / fhmx;
-/* Computing 2nd power */
-		d__1 = as * au;
-/* Computing 2nd power */
-		d__2 = at * au;
-		c__ = 1. / (sqrt(d__1 * d__1 + 1.) + sqrt(d__2 * d__2 + 1.));
-		*ssmin = fhmn * c__ * au;
-		*ssmin += *ssmin;
-		*ssmax = ga / (c__ + c__);
-	    }
-	}
-    }
-    return 0;
-
-/*     End of DLAS2 */
-
-} /* dlas2_ */
-
-/* Subroutine */ int dlascl_(char *type__, integer *kl, integer *ku,
-	doublereal *cfrom, doublereal *cto, integer *m, integer *n,
-	doublereal *a, integer *lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-
-    /* Local variables */
-    static integer i__, j, k1, k2, k3, k4;
-    static doublereal mul, cto1;
-    static logical done;
-    static doublereal ctoc;
-    extern logical lsame_(char *, char *);
-    static integer itype;
-    static doublereal cfrom1;
-
-    static doublereal cfromc;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static doublereal bignum, smlnum;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DLASCL multiplies the M by N real matrix A by the real scalar
-    CTO/CFROM.  This is done without over/underflow as long as the final
-    result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
-    A may be full, upper triangular, lower triangular, upper Hessenberg,
-    or banded.
-
-    Arguments
-    =========
-
-    TYPE    (input) CHARACTER*1
-            TYPE indices the storage type of the input matrix.
-            = 'G':  A is a full matrix.
-            = 'L':  A is a lower triangular matrix.
-            = 'U':  A is an upper triangular matrix.
-            = 'H':  A is an upper Hessenberg matrix.
-            = 'B':  A is a symmetric band matrix with lower bandwidth KL
-                    and upper bandwidth KU and with the only the lower
-                    half stored.
-            = 'Q':  A is a symmetric band matrix with lower bandwidth KL
-                    and upper bandwidth KU and with the only the upper
-                    half stored.
-            = 'Z':  A is a band matrix with lower bandwidth KL and upper
-                    bandwidth KU.
-
-    KL      (input) INTEGER
-            The lower bandwidth of A.  Referenced only if TYPE = 'B',
-            'Q' or 'Z'.
-
-    KU      (input) INTEGER
-            The upper bandwidth of A.  Referenced only if TYPE = 'B',
-            'Q' or 'Z'.
-
-    CFROM   (input) DOUBLE PRECISION
-    CTO     (input) DOUBLE PRECISION
-            The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
-            without over/underflow if the final result CTO*A(I,J)/CFROM
-            can be represented without over/underflow.  CFROM must be
-            nonzero.
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,M)
-            The matrix to be multiplied by CTO/CFROM.  See TYPE for the
-            storage type.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    INFO    (output) INTEGER
-            0  - successful exit
-            <0 - if INFO = -i, the i-th argument had an illegal value.
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-
-    if (lsame_(type__, "G")) {
-	itype = 0;
-    } else if (lsame_(type__, "L")) {
-	itype = 1;
-    } else if (lsame_(type__, "U")) {
-	itype = 2;
-    } else if (lsame_(type__, "H")) {
-	itype = 3;
-    } else if (lsame_(type__, "B")) {
-	itype = 4;
-    } else if (lsame_(type__, "Q")) {
-	itype = 5;
-    } else if (lsame_(type__, "Z")) {
-	itype = 6;
-    } else {
-	itype = -1;
-    }
-
-    if (itype == -1) {
-	*info = -1;
-    } else if (*cfrom == 0.) {
-	*info = -4;
-    } else if (*m < 0) {
-	*info = -6;
-    } else if (((*n < 0) || (itype == 4 && *n != *m)) || (itype == 5 && *n !=
-	    *m)) {
-	*info = -7;
-    } else if (itype <= 3 && *lda < max(1,*m)) {
-	*info = -9;
-    } else if (itype >= 4) {
-/* Computing MAX */
-	i__1 = *m - 1;
-	if ((*kl < 0) || (*kl > max(i__1,0))) {
-	    *info = -2;
-	} else /* if(complicated condition) */ {
-/* Computing MAX */
-	    i__1 = *n - 1;
-	    if (((*ku < 0) || (*ku > max(i__1,0))) || (((itype == 4) || (
-		    itype == 5)) && *kl != *ku)) {
-		*info = -3;
-	    } else if (((itype == 4 && *lda < *kl + 1) || (itype == 5 && *lda
-		    < *ku + 1)) || (itype == 6 && *lda < ((*kl) << (1)) + *ku
-		    + 1)) {
-		*info = -9;
-	    }
-	}
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLASCL", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*n == 0) || (*m == 0)) {
-	return 0;
-    }
-
-/*     Get machine parameters */
-
-    smlnum = SAFEMINIMUM;
-    bignum = 1. / smlnum;
-
-    cfromc = *cfrom;
-    ctoc = *cto;
-
-L10:
-    cfrom1 = cfromc * smlnum;
-    cto1 = ctoc / bignum;
-    if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
-	mul = smlnum;
-	done = FALSE_;
-	cfromc = cfrom1;
-    } else if (abs(cto1) > abs(cfromc)) {
-	mul = bignum;
-	done = FALSE_;
-	ctoc = cto1;
-    } else {
-	mul = ctoc / cfromc;
-	done = TRUE_;
-    }
-
-    if (itype == 0) {
-
-/*        Full matrix */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] *= mul;
-/* L20: */
-	    }
-/* L30: */
-	}
-
-    } else if (itype == 1) {
-
-/*        Lower triangular matrix */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = j; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] *= mul;
-/* L40: */
-	    }
-/* L50: */
-	}
-
-    } else if (itype == 2) {
-
-/*        Upper triangular matrix */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = min(j,*m);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] *= mul;
-/* L60: */
-	    }
-/* L70: */
-	}
-
-    } else if (itype == 3) {
-
-/*        Upper Hessenberg matrix */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = j + 1;
-	    i__2 = min(i__3,*m);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] *= mul;
-/* L80: */
-	    }
-/* L90: */
-	}
-
-    } else if (itype == 4) {
-
-/*        Lower half of a symmetric band matrix */
-
-	k3 = *kl + 1;
-	k4 = *n + 1;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = k3, i__4 = k4 - j;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] *= mul;
-/* L100: */
-	    }
-/* L110: */
-	}
-
-    } else if (itype == 5) {
-
-/*        Upper half of a symmetric band matrix */
-
-	k1 = *ku + 2;
-	k3 = *ku + 1;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MAX */
-	    i__2 = k1 - j;
-	    i__3 = k3;
-	    for (i__ = max(i__2,1); i__ <= i__3; ++i__) {
-		a[i__ + j * a_dim1] *= mul;
-/* L120: */
-	    }
-/* L130: */
-	}
-
-    } else if (itype == 6) {
-
-/*        Band matrix */
-
-	k1 = *kl + *ku + 2;
-	k2 = *kl + 1;
-	k3 = ((*kl) << (1)) + *ku + 1;
-	k4 = *kl + *ku + 1 + *m;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MAX */
-	    i__3 = k1 - j;
-/* Computing MIN */
-	    i__4 = k3, i__5 = k4 - j;
-	    i__2 = min(i__4,i__5);
-	    for (i__ = max(i__3,k2); i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] *= mul;
-/* L140: */
-	    }
-/* L150: */
-	}
-
-    }
-
-    if (! done) {
-	goto L10;
-    }
-
-    return 0;
-
-/*     End of DLASCL */
-
-} /* dlascl_ */
-
-/* Subroutine */ int dlasd0_(integer *n, integer *sqre, doublereal *d__,
-	doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *
-	ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer *
-	info)
-{
-    /* System generated locals */
-    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
-
-    /* Builtin functions */
-    integer pow_ii(integer *, integer *);
-
-    /* Local variables */
-    static integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf,
-	    iwk, lvl, ndb1, nlp1, nrp1;
-    static doublereal beta;
-    static integer idxq, nlvl;
-    static doublereal alpha;
-    static integer inode, ndiml, idxqc, ndimr, itemp, sqrei;
-    extern /* Subroutine */ int dlasd1_(integer *, integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
-	     doublereal *, integer *, integer *, integer *, doublereal *,
-	    integer *), dlasdq_(char *, integer *, integer *, integer *,
-	    integer *, integer *, doublereal *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlasdt_(integer *, integer *,
-	    integer *, integer *, integer *, integer *, integer *), xerbla_(
-	    char *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    Using a divide and conquer approach, DLASD0 computes the singular
-    value decomposition (SVD) of a real upper bidiagonal N-by-M
-    matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
-    The algorithm computes orthogonal matrices U and VT such that
-    B = U * S * VT. The singular values S are overwritten on D.
-
-    A related subroutine, DLASDA, computes only the singular values,
-    and optionally, the singular vectors in compact form.
-
-    Arguments
-    =========
-
-    N      (input) INTEGER
-           On entry, the row dimension of the upper bidiagonal matrix.
-           This is also the dimension of the main diagonal array D.
-
-    SQRE   (input) INTEGER
-           Specifies the column dimension of the bidiagonal matrix.
-           = 0: The bidiagonal matrix has column dimension M = N;
-           = 1: The bidiagonal matrix has column dimension M = N+1;
-
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry D contains the main diagonal of the bidiagonal
-           matrix.
-           On exit D, if INFO = 0, contains its singular values.
-
-    E      (input) DOUBLE PRECISION array, dimension (M-1)
-           Contains the subdiagonal entries of the bidiagonal matrix.
-           On exit, E has been destroyed.
-
-    U      (output) DOUBLE PRECISION array, dimension at least (LDQ, N)
-           On exit, U contains the left singular vectors.
-
-    LDU    (input) INTEGER
-           On entry, leading dimension of U.
-
-    VT     (output) DOUBLE PRECISION array, dimension at least (LDVT, M)
-           On exit, VT' contains the right singular vectors.
-
-    LDVT   (input) INTEGER
-           On entry, leading dimension of VT.
-
-    SMLSIZ (input) INTEGER
-           On entry, maximum size of the subproblems at the
-           bottom of the computation tree.
-
-    IWORK  INTEGER work array.
-           Dimension must be at least (8 * N)
-
-    WORK   DOUBLE PRECISION work array.
-           Dimension must be at least (3 * M**2 + 2 * M)
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    --iwork;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*n < 0) {
-	*info = -1;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
-	*info = -2;
-    }
-
-    m = *n + *sqre;
-
-    if (*ldu < *n) {
-	*info = -6;
-    } else if (*ldvt < m) {
-	*info = -8;
-    } else if (*smlsiz < 3) {
-	*info = -9;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLASD0", &i__1);
-	return 0;
-    }
-
-/*     If the input matrix is too small, call DLASDQ to find the SVD. */
-
-    if (*n <= *smlsiz) {
-	dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset],
-		ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info);
-	return 0;
-    }
-
-/*     Set up the computation tree. */
-
-    inode = 1;
-    ndiml = inode + *n;
-    ndimr = ndiml + *n;
-    idxq = ndimr + *n;
-    iwk = idxq + *n;
-    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
-	    smlsiz);
-
-/*
-       For the nodes on bottom level of the tree, solve
-       their subproblems by DLASDQ.
-*/
-
-    ndb1 = (nd + 1) / 2;
-    ncc = 0;
-    i__1 = nd;
-    for (i__ = ndb1; i__ <= i__1; ++i__) {
-
-/*
-       IC : center row of each node
-       NL : number of rows of left  subproblem
-       NR : number of rows of right subproblem
-       NLF: starting row of the left   subproblem
-       NRF: starting row of the right  subproblem
-*/
-
-	i1 = i__ - 1;
-	ic = iwork[inode + i1];
-	nl = iwork[ndiml + i1];
-	nlp1 = nl + 1;
-	nr = iwork[ndimr + i1];
-	nrp1 = nr + 1;
-	nlf = ic - nl;
-	nrf = ic + 1;
-	sqrei = 1;
-	dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[
-		nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[
-		nlf + nlf * u_dim1], ldu, &work[1], info);
-	if (*info != 0) {
-	    return 0;
-	}
-	itemp = idxq + nlf - 2;
-	i__2 = nl;
-	for (j = 1; j <= i__2; ++j) {
-	    iwork[itemp + j] = j;
-/* L10: */
-	}
-	if (i__ == nd) {
-	    sqrei = *sqre;
-	} else {
-	    sqrei = 1;
-	}
-	nrp1 = nr + sqrei;
-	dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[
-		nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[
-		nrf + nrf * u_dim1], ldu, &work[1], info);
-	if (*info != 0) {
-	    return 0;
-	}
-	itemp = idxq + ic;
-	i__2 = nr;
-	for (j = 1; j <= i__2; ++j) {
-	    iwork[itemp + j - 1] = j;
-/* L20: */
-	}
-/* L30: */
-    }
-
-/*     Now conquer each subproblem bottom-up. */
-
-    for (lvl = nlvl; lvl >= 1; --lvl) {
-
-/*
-          Find the first node LF and last node LL on the
-          current level LVL.
-*/
-
-	if (lvl == 1) {
-	    lf = 1;
-	    ll = 1;
-	} else {
-	    i__1 = lvl - 1;
-	    lf = pow_ii(&c__2, &i__1);
-	    ll = ((lf) << (1)) - 1;
-	}
-	i__1 = ll;
-	for (i__ = lf; i__ <= i__1; ++i__) {
-	    im1 = i__ - 1;
-	    ic = iwork[inode + im1];
-	    nl = iwork[ndiml + im1];
-	    nr = iwork[ndimr + im1];
-	    nlf = ic - nl;
-	    if (*sqre == 0 && i__ == ll) {
-		sqrei = *sqre;
-	    } else {
-		sqrei = 1;
-	    }
-	    idxqc = idxq + nlf - 1;
-	    alpha = d__[ic];
-	    beta = e[ic];
-	    dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf *
-		     u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[
-		    idxqc], &iwork[iwk], &work[1], info);
-	    if (*info != 0) {
-		return 0;
-	    }
-/* L40: */
-	}
-/* L50: */
-    }
-
-    return 0;
-
-/*     End of DLASD0 */
-
-} /* dlasd0_ */
-
-/* Subroutine */ int dlasd1_(integer *nl, integer *nr, integer *sqre,
-	doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u,
-	integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer *
-	iwork, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1;
-    doublereal d__1, d__2;
-
-    /* Local variables */
-    static integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2,
-	    idxc, idxp, ldvt2;
-    extern /* Subroutine */ int dlasd2_(integer *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, integer *,
-	    integer *, integer *, integer *, integer *, integer *), dlasd3_(
-	    integer *, integer *, integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, integer *, integer *, doublereal *, integer *),
-	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *,
-	    integer *, integer *, doublereal *, integer *, integer *),
-	     dlamrg_(integer *, integer *, doublereal *, integer *, integer *,
-	     integer *);
-    static integer isigma;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static doublereal orgnrm;
-    static integer coltyp;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
-    where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0.
-
-    A related subroutine DLASD7 handles the case in which the singular
-    values (and the singular vectors in factored form) are desired.
-
-    DLASD1 computes the SVD as follows:
-
-                  ( D1(in)  0    0     0 )
-      B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
-                  (   0     0   D2(in) 0 )
-
-        = U(out) * ( D(out) 0) * VT(out)
-
-    where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M
-    with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
-    elsewhere; and the entry b is empty if SQRE = 0.
-
-    The left singular vectors of the original matrix are stored in U, and
-    the transpose of the right singular vectors are stored in VT, and the
-    singular values are in D.  The algorithm consists of three stages:
-
-       The first stage consists of deflating the size of the problem
-       when there are multiple singular values or when there are zeros in
-       the Z vector.  For each such occurence the dimension of the
-       secular equation problem is reduced by one.  This stage is
-       performed by the routine DLASD2.
-
-       The second stage consists of calculating the updated
-       singular values. This is done by finding the square roots of the
-       roots of the secular equation via the routine DLASD4 (as called
-       by DLASD3). This routine also calculates the singular vectors of
-       the current problem.
-
-       The final stage consists of computing the updated singular vectors
-       directly using the updated singular values.  The singular vectors
-       for the current problem are multiplied with the singular vectors
-       from the overall problem.
-
-    Arguments
-    =========
-
-    NL     (input) INTEGER
-           The row dimension of the upper block.  NL >= 1.
-
-    NR     (input) INTEGER
-           The row dimension of the lower block.  NR >= 1.
-
-    SQRE   (input) INTEGER
-           = 0: the lower block is an NR-by-NR square matrix.
-           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
-
-           The bidiagonal matrix has row dimension N = NL + NR + 1,
-           and column dimension M = N + SQRE.
-
-    D      (input/output) DOUBLE PRECISION array,
-                          dimension (N = NL+NR+1).
-           On entry D(1:NL,1:NL) contains the singular values of the
-           upper block; and D(NL+2:N) contains the singular values of
-           the lower block. On exit D(1:N) contains the singular values
-           of the modified matrix.
-
-    ALPHA  (input) DOUBLE PRECISION
-           Contains the diagonal element associated with the added row.
-
-    BETA   (input) DOUBLE PRECISION
-           Contains the off-diagonal element associated with the added
-           row.
-
-    U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
-           On entry U(1:NL, 1:NL) contains the left singular vectors of
-           the upper block; U(NL+2:N, NL+2:N) contains the left singular
-           vectors of the lower block. On exit U contains the left
-           singular vectors of the bidiagonal matrix.
-
-    LDU    (input) INTEGER
-           The leading dimension of the array U.  LDU >= max( 1, N ).
-
-    VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
-           where M = N + SQRE.
-           On entry VT(1:NL+1, 1:NL+1)' contains the right singular
-           vectors of the upper block; VT(NL+2:M, NL+2:M)' contains
-           the right singular vectors of the lower block. On exit
-           VT' contains the right singular vectors of the
-           bidiagonal matrix.
-
-    LDVT   (input) INTEGER
-           The leading dimension of the array VT.  LDVT >= max( 1, M ).
-
-    IDXQ  (output) INTEGER array, dimension(N)
-           This contains the permutation which will reintegrate the
-           subproblem just solved back into sorted order, i.e.
-           D( IDXQ( I = 1, N ) ) will be in ascending order.
-
-    IWORK  (workspace) INTEGER array, dimension( 4 * N )
-
-    WORK   (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M )
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    --idxq;
-    --iwork;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*nl < 1) {
-	*info = -1;
-    } else if (*nr < 1) {
-	*info = -2;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
-	*info = -3;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLASD1", &i__1);
-	return 0;
-    }
-
-    n = *nl + *nr + 1;
-    m = n + *sqre;
-
-/*
-       The following values are for bookkeeping purposes only.  They are
-       integer pointers which indicate the portion of the workspace
-       used by a particular array in DLASD2 and DLASD3.
-*/
-
-    ldu2 = n;
-    ldvt2 = m;
-
-    iz = 1;
-    isigma = iz + m;
-    iu2 = isigma + n;
-    ivt2 = iu2 + ldu2 * n;
-    iq = ivt2 + ldvt2 * m;
-
-    idx = 1;
-    idxc = idx + n;
-    coltyp = idxc + n;
-    idxp = coltyp + n;
-
-/*
-       Scale.
-
-   Computing MAX
-*/
-    d__1 = abs(*alpha), d__2 = abs(*beta);
-    orgnrm = max(d__1,d__2);
-    d__[*nl + 1] = 0.;
-    i__1 = n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((d__1 = d__[i__], abs(d__1)) > orgnrm) {
-	    orgnrm = (d__1 = d__[i__], abs(d__1));
-	}
-/* L10: */
-    }
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, &n, &c__1, &d__[1], &n,
-	    info);
-    *alpha /= orgnrm;
-    *beta /= orgnrm;
-
-/*     Deflate singular values. */
-
-    dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset],
-	    ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, &
-	    work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], &
-	    idxq[1], &iwork[coltyp], info);
-
-/*     Solve Secular Equation and update singular vectors. */
-
-    ldq = k;
-    dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[
-	    u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[
-	    ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info);
-    if (*info != 0) {
-	return 0;
-    }
-
-/*     Unscale. */
-
-    dlascl_("G", &c__0, &c__0, &c_b2865, &orgnrm, &n, &c__1, &d__[1], &n,
-	    info);
-
-/*     Prepare the IDXQ sorting permutation. */
-
-    n1 = k;
-    n2 = n - k;
-    dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
-
-    return 0;
-
-/*     End of DLASD1 */
-
-} /* dlasd1_ */
-
-/* Subroutine */ int dlasd2_(integer *nl, integer *nr, integer *sqre, integer
-	*k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal *
-	beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt,
-	doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2,
-	integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *
-	idxq, integer *coltyp, integer *info)
-{
-    /* System generated locals */
-    integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset,
-	    vt2_dim1, vt2_offset, i__1;
-    doublereal d__1, d__2;
-
-    /* Local variables */
-    static doublereal c__;
-    static integer i__, j, m, n;
-    static doublereal s;
-    static integer k2;
-    static doublereal z1;
-    static integer ct, jp;
-    static doublereal eps, tau, tol;
-    static integer psm[4], nlp1, nlp2, idxi, idxj;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static integer ctot[4], idxjp;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer jprev;
-
-    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), dlacpy_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *), xerbla_(char *,
-	    integer *);
-    static doublereal hlftol;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    DLASD2 merges the two sets of singular values together into a single
-    sorted set.  Then it tries to deflate the size of the problem.
-    There are two ways in which deflation can occur:  when two or more
-    singular values are close together or if there is a tiny entry in the
-    Z vector.  For each such occurrence the order of the related secular
-    equation problem is reduced by one.
-
-    DLASD2 is called from DLASD1.
-
-    Arguments
-    =========
-
-    NL     (input) INTEGER
-           The row dimension of the upper block.  NL >= 1.
-
-    NR     (input) INTEGER
-           The row dimension of the lower block.  NR >= 1.
-
-    SQRE   (input) INTEGER
-           = 0: the lower block is an NR-by-NR square matrix.
-           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
-
-           The bidiagonal matrix has N = NL + NR + 1 rows and
-           M = N + SQRE >= N columns.
-
-    K      (output) INTEGER
-           Contains the dimension of the non-deflated matrix,
-           This is the order of the related secular equation. 1 <= K <=N.
-
-    D      (input/output) DOUBLE PRECISION array, dimension(N)
-           On entry D contains the singular values of the two submatrices
-           to be combined.  On exit D contains the trailing (N-K) updated
-           singular values (those which were deflated) sorted into
-           increasing order.
-
-    ALPHA  (input) DOUBLE PRECISION
-           Contains the diagonal element associated with the added row.
-
-    BETA   (input) DOUBLE PRECISION
-           Contains the off-diagonal element associated with the added
-           row.
-
-    U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
-           On entry U contains the left singular vectors of two
-           submatrices in the two square blocks with corners at (1,1),
-           (NL, NL), and (NL+2, NL+2), (N,N).
-           On exit U contains the trailing (N-K) updated left singular
-           vectors (those which were deflated) in its last N-K columns.
-
-    LDU    (input) INTEGER
-           The leading dimension of the array U.  LDU >= N.
-
-    Z      (output) DOUBLE PRECISION array, dimension(N)
-           On exit Z contains the updating row vector in the secular
-           equation.
-
-    DSIGMA (output) DOUBLE PRECISION array, dimension (N)
-           Contains a copy of the diagonal elements (K-1 singular values
-           and one zero) in the secular equation.
-
-    U2     (output) DOUBLE PRECISION array, dimension(LDU2,N)
-           Contains a copy of the first K-1 left singular vectors which
-           will be used by DLASD3 in a matrix multiply (DGEMM) to solve
-           for the new left singular vectors. U2 is arranged into four
-           blocks. The first block contains a column with 1 at NL+1 and
-           zero everywhere else; the second block contains non-zero
-           entries only at and above NL; the third contains non-zero
-           entries only below NL+1; and the fourth is dense.
-
-    LDU2   (input) INTEGER
-           The leading dimension of the array U2.  LDU2 >= N.
-
-    VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
-           On entry VT' contains the right singular vectors of two
-           submatrices in the two square blocks with corners at (1,1),
-           (NL+1, NL+1), and (NL+2, NL+2), (M,M).
-           On exit VT' contains the trailing (N-K) updated right singular
-           vectors (those which were deflated) in its last N-K columns.
-           In case SQRE =1, the last row of VT spans the right null
-           space.
-
-    LDVT   (input) INTEGER
-           The leading dimension of the array VT.  LDVT >= M.
-
-    VT2    (output) DOUBLE PRECISION array, dimension(LDVT2,N)
-           VT2' contains a copy of the first K right singular vectors
-           which will be used by DLASD3 in a matrix multiply (DGEMM) to
-           solve for the new right singular vectors. VT2 is arranged into
-           three blocks. The first block contains a row that corresponds
-           to the special 0 diagonal element in SIGMA; the second block
-           contains non-zeros only at and before NL +1; the third block
-           contains non-zeros only at and after  NL +2.
-
-    LDVT2  (input) INTEGER
-           The leading dimension of the array VT2.  LDVT2 >= M.
-
-    IDXP   (workspace) INTEGER array, dimension(N)
-           This will contain the permutation used to place deflated
-           values of D at the end of the array. On output IDXP(2:K)
-           points to the nondeflated D-values and IDXP(K+1:N)
-           points to the deflated singular values.
-
-    IDX    (workspace) INTEGER array, dimension(N)
-           This will contain the permutation used to sort the contents of
-           D into ascending order.
-
-    IDXC   (output) INTEGER array, dimension(N)
-           This will contain the permutation used to arrange the columns
-           of the deflated U matrix into three groups:  the first group
-           contains non-zero entries only at and above NL, the second
-           contains non-zero entries only below NL+2, and the third is
-           dense.
-
-    COLTYP (workspace/output) INTEGER array, dimension(N)
-           As workspace, this will contain a label which will indicate
-           which of the following types a column in the U2 matrix or a
-           row in the VT2 matrix is:
-           1 : non-zero in the upper half only
-           2 : non-zero in the lower half only
-           3 : dense
-           4 : deflated
-
-           On exit, it is an array of dimension 4, with COLTYP(I) being
-           the dimension of the I-th type columns.
-
-    IDXQ   (input) INTEGER array, dimension(N)
-           This contains the permutation which separately sorts the two
-           sub-problems in D into ascending order.  Note that entries in
-           the first hlaf of this permutation must first be moved one
-           position backward; and entries in the second half
-           must first have NL+1 added to their values.
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --z__;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    --dsigma;
-    u2_dim1 = *ldu2;
-    u2_offset = 1 + u2_dim1;
-    u2 -= u2_offset;
-    vt2_dim1 = *ldvt2;
-    vt2_offset = 1 + vt2_dim1;
-    vt2 -= vt2_offset;
-    --idxp;
-    --idx;
-    --idxc;
-    --idxq;
-    --coltyp;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*nl < 1) {
-	*info = -1;
-    } else if (*nr < 1) {
-	*info = -2;
-    } else if (*sqre != 1 && *sqre != 0) {
-	*info = -3;
-    }
-
-    n = *nl + *nr + 1;
-    m = n + *sqre;
-
-    if (*ldu < n) {
-	*info = -10;
-    } else if (*ldvt < m) {
-	*info = -12;
-    } else if (*ldu2 < n) {
-	*info = -15;
-    } else if (*ldvt2 < m) {
-	*info = -17;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLASD2", &i__1);
-	return 0;
-    }
-
-    nlp1 = *nl + 1;
-    nlp2 = *nl + 2;
-
-/*
-       Generate the first part of the vector Z; and move the singular
-       values in the first part of D one position backward.
-*/
-
-    z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1];
-    z__[1] = z1;
-    for (i__ = *nl; i__ >= 1; --i__) {
-	z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1];
-	d__[i__ + 1] = d__[i__];
-	idxq[i__ + 1] = idxq[i__] + 1;
-/* L10: */
-    }
-
-/*     Generate the second part of the vector Z. */
-
-    i__1 = m;
-    for (i__ = nlp2; i__ <= i__1; ++i__) {
-	z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1];
-/* L20: */
-    }
-
-/*     Initialize some reference arrays. */
-
-    i__1 = nlp1;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	coltyp[i__] = 1;
-/* L30: */
-    }
-    i__1 = n;
-    for (i__ = nlp2; i__ <= i__1; ++i__) {
-	coltyp[i__] = 2;
-/* L40: */
-    }
-
-/*     Sort the singular values into increasing order */
-
-    i__1 = n;
-    for (i__ = nlp2; i__ <= i__1; ++i__) {
-	idxq[i__] += nlp1;
-/* L50: */
-    }
-
-/*
-       DSIGMA, IDXC, IDXC, and the first column of U2
-       are used as storage space.
-*/
-
-    i__1 = n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	dsigma[i__] = d__[idxq[i__]];
-	u2[i__ + u2_dim1] = z__[idxq[i__]];
-	idxc[i__] = coltyp[idxq[i__]];
-/* L60: */
-    }
-
-    dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
-
-    i__1 = n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	idxi = idx[i__] + 1;
-	d__[i__] = dsigma[idxi];
-	z__[i__] = u2[idxi + u2_dim1];
-	coltyp[i__] = idxc[idxi];
-/* L70: */
-    }
-
-/*     Calculate the allowable deflation tolerance */
-
-    eps = EPSILON;
-/* Computing MAX */
-    d__1 = abs(*alpha), d__2 = abs(*beta);
-    tol = max(d__1,d__2);
-/* Computing MAX */
-    d__2 = (d__1 = d__[n], abs(d__1));
-    tol = eps * 8. * max(d__2,tol);
-
-/*
-       There are 2 kinds of deflation -- first a value in the z-vector
-       is small, second two (or more) singular values are very close
-       together (their difference is small).
-
-       If the value in the z-vector is small, we simply permute the
-       array so that the corresponding singular value is moved to the
-       end.
-
-       If two values in the D-vector are close, we perform a two-sided
-       rotation designed to make one of the corresponding z-vector
-       entries zero, and then permute the array so that the deflated
-       singular value is moved to the end.
-
-       If there are multiple singular values then the problem deflates.
-       Here the number of equal singular values are found.  As each equal
-       singular value is found, an elementary reflector is computed to
-       rotate the corresponding singular subspace so that the
-       corresponding components of Z are zero in this new basis.
-*/
-
-    *k = 1;
-    k2 = n + 1;
-    i__1 = n;
-    for (j = 2; j <= i__1; ++j) {
-	if ((d__1 = z__[j], abs(d__1)) <= tol) {
-
-/*           Deflate due to small z component. */
-
-	    --k2;
-	    idxp[k2] = j;
-	    coltyp[j] = 4;
-	    if (j == n) {
-		goto L120;
-	    }
-	} else {
-	    jprev = j;
-	    goto L90;
-	}
-/* L80: */
-    }
-L90:
-    j = jprev;
-L100:
-    ++j;
-    if (j > n) {
-	goto L110;
-    }
-    if ((d__1 = z__[j], abs(d__1)) <= tol) {
-
-/*        Deflate due to small z component. */
-
-	--k2;
-	idxp[k2] = j;
-	coltyp[j] = 4;
-    } else {
-
-/*        Check if singular values are close enough to allow deflation. */
-
-	if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) {
-
-/*           Deflation is possible. */
-
-	    s = z__[jprev];
-	    c__ = z__[j];
-
-/*
-             Find sqrt(a**2+b**2) without overflow or
-             destructive underflow.
-*/
-
-	    tau = dlapy2_(&c__, &s);
-	    c__ /= tau;
-	    s = -s / tau;
-	    z__[j] = tau;
-	    z__[jprev] = 0.;
-
-/*
-             Apply back the Givens rotation to the left and right
-             singular vector matrices.
-*/
-
-	    idxjp = idxq[idx[jprev] + 1];
-	    idxj = idxq[idx[j] + 1];
-	    if (idxjp <= nlp1) {
-		--idxjp;
-	    }
-	    if (idxj <= nlp1) {
-		--idxj;
-	    }
-	    drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &
-		    c__1, &c__, &s);
-	    drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &
-		    c__, &s);
-	    if (coltyp[j] != coltyp[jprev]) {
-		coltyp[j] = 3;
-	    }
-	    coltyp[jprev] = 4;
-	    --k2;
-	    idxp[k2] = jprev;
-	    jprev = j;
-	} else {
-	    ++(*k);
-	    u2[*k + u2_dim1] = z__[jprev];
-	    dsigma[*k] = d__[jprev];
-	    idxp[*k] = jprev;
-	    jprev = j;
-	}
-    }
-    goto L100;
-L110:
-
-/*     Record the last singular value. */
-
-    ++(*k);
-    u2[*k + u2_dim1] = z__[jprev];
-    dsigma[*k] = d__[jprev];
-    idxp[*k] = jprev;
-
-L120:
-
-/*
-       Count up the total number of the various types of columns, then
-       form a permutation which positions the four column types into
-       four groups of uniform structure (although one or more of these
-       groups may be empty).
-*/
-
-    for (j = 1; j <= 4; ++j) {
-	ctot[j - 1] = 0;
-/* L130: */
-    }
-    i__1 = n;
-    for (j = 2; j <= i__1; ++j) {
-	ct = coltyp[j];
-	++ctot[ct - 1];
-/* L140: */
-    }
-
-/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
-
-    psm[0] = 2;
-    psm[1] = ctot[0] + 2;
-    psm[2] = psm[1] + ctot[1];
-    psm[3] = psm[2] + ctot[2];
-
-/*
-       Fill out the IDXC array so that the permutation which it induces
-       will place all type-1 columns first, all type-2 columns next,
-       then all type-3's, and finally all type-4's, starting from the
-       second column. This applies similarly to the rows of VT.
-*/
-
-    i__1 = n;
-    for (j = 2; j <= i__1; ++j) {
-	jp = idxp[j];
-	ct = coltyp[jp];
-	idxc[psm[ct - 1]] = j;
-	++psm[ct - 1];
-/* L150: */
-    }
-
-/*
-       Sort the singular values and corresponding singular vectors into
-       DSIGMA, U2, and VT2 respectively.  The singular values/vectors
-       which were not deflated go into the first K slots of DSIGMA, U2,
-       and VT2 respectively, while those which were deflated go into the
-       last N - K slots, except that the first column/row will be treated
-       separately.
-*/
-
-    i__1 = n;
-    for (j = 2; j <= i__1; ++j) {
-	jp = idxp[j];
-	dsigma[j] = d__[jp];
-	idxj = idxq[idx[idxp[idxc[j]]] + 1];
-	if (idxj <= nlp1) {
-	    --idxj;
-	}
-	dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
-	dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
-/* L160: */
-    }
-
-/*     Determine DSIGMA(1), DSIGMA(2) and Z(1) */
-
-    dsigma[1] = 0.;
-    hlftol = tol / 2.;
-    if (abs(dsigma[2]) <= hlftol) {
-	dsigma[2] = hlftol;
-    }
-    if (m > n) {
-	z__[1] = dlapy2_(&z1, &z__[m]);
-	if (z__[1] <= tol) {
-	    c__ = 1.;
-	    s = 0.;
-	    z__[1] = tol;
-	} else {
-	    c__ = z1 / z__[1];
-	    s = z__[m] / z__[1];
-	}
-    } else {
-	if (abs(z1) <= tol) {
-	    z__[1] = tol;
-	} else {
-	    z__[1] = z1;
-	}
-    }
-
-/*     Move the rest of the updating row to Z. */
-
-    i__1 = *k - 1;
-    dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);
-
-/*
-       Determine the first column of U2, the first row of VT2 and the
-       last row of VT.
-*/
-
-    dlaset_("A", &n, &c__1, &c_b2879, &c_b2879, &u2[u2_offset], ldu2);
-    u2[nlp1 + u2_dim1] = 1.;
-    if (m > n) {
-	i__1 = nlp1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1];
-	    vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1];
-/* L170: */
-	}
-	i__1 = m;
-	for (i__ = nlp2; i__ <= i__1; ++i__) {
-	    vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1];
-	    vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1];
-/* L180: */
-	}
-    } else {
-	dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
-    }
-    if (m > n) {
-	dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
-    }
-
-/*
-       The deflated singular values and their corresponding vectors go
-       into the back of D, U, and V respectively.
-*/
-
-    if (n > *k) {
-	i__1 = n - *k;
-	dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
-	i__1 = n - *k;
-	dlacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1)
-		 * u_dim1 + 1], ldu);
-	i__1 = n - *k;
-	dlacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 +
-		vt_dim1], ldvt);
-    }
-
-/*     Copy CTOT into COLTYP for referencing in DLASD3. */
-
-    for (j = 1; j <= 4; ++j) {
-	coltyp[j] = ctot[j - 1];
-/* L190: */
-    }
-
-    return 0;
-
-/*     End of DLASD2 */
-
-} /* dlasd2_ */
-
-/* Subroutine */ int dlasd3_(integer *nl, integer *nr, integer *sqre, integer
-	*k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma,
-	doublereal *u, integer *ldu, doublereal *u2, integer *ldu2,
-	doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2,
-	integer *idxc, integer *ctot, doublereal *z__, integer *info)
-{
-    /* System generated locals */
-    integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1,
-	    vt_offset, vt2_dim1, vt2_offset, i__1, i__2;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static integer i__, j, m, n, jc;
-    static doublereal rho;
-    static integer nlp1, nlp2, nrp1;
-    static doublereal temp;
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer ctemp;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer ktemp;
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, integer *), dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dlacpy_(char *, integer *, integer
-	    *, doublereal *, integer *, doublereal *, integer *),
-	    xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    DLASD3 finds all the square roots of the roots of the secular
-    equation, as defined by the values in D and Z.  It makes the
-    appropriate calls to DLASD4 and then updates the singular
-    vectors by matrix multiplication.
-
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
-
-    DLASD3 is called from DLASD1.
-
-    Arguments
-    =========
-
-    NL     (input) INTEGER
-           The row dimension of the upper block.  NL >= 1.
-
-    NR     (input) INTEGER
-           The row dimension of the lower block.  NR >= 1.
-
-    SQRE   (input) INTEGER
-           = 0: the lower block is an NR-by-NR square matrix.
-           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
-
-           The bidiagonal matrix has N = NL + NR + 1 rows and
-           M = N + SQRE >= N columns.
-
-    K      (input) INTEGER
-           The size of the secular equation, 1 =< K = < N.
-
-    D      (output) DOUBLE PRECISION array, dimension(K)
-           On exit the square roots of the roots of the secular equation,
-           in ascending order.
-
-    Q      (workspace) DOUBLE PRECISION array,
-                       dimension at least (LDQ,K).
-
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= K.
-
-    DSIGMA (input) DOUBLE PRECISION array, dimension(K)
-           The first K elements of this array contain the old roots
-           of the deflated updating problem.  These are the poles
-           of the secular equation.
-
-    U      (input) DOUBLE PRECISION array, dimension (LDU, N)
-           The last N - K columns of this matrix contain the deflated
-           left singular vectors.
-
-    LDU    (input) INTEGER
-           The leading dimension of the array U.  LDU >= N.
-
-    U2     (input) DOUBLE PRECISION array, dimension (LDU2, N)
-           The first K columns of this matrix contain the non-deflated
-           left singular vectors for the split problem.
-
-    LDU2   (input) INTEGER
-           The leading dimension of the array U2.  LDU2 >= N.
-
-    VT     (input) DOUBLE PRECISION array, dimension (LDVT, M)
-           The last M - K columns of VT' contain the deflated
-           right singular vectors.
-
-    LDVT   (input) INTEGER
-           The leading dimension of the array VT.  LDVT >= N.
-
-    VT2    (input) DOUBLE PRECISION array, dimension (LDVT2, N)
-           The first K columns of VT2' contain the non-deflated
-           right singular vectors for the split problem.
-
-    LDVT2  (input) INTEGER
-           The leading dimension of the array VT2.  LDVT2 >= N.
-
-    IDXC   (input) INTEGER array, dimension ( N )
-           The permutation used to arrange the columns of U (and rows of
-           VT) into three groups:  the first group contains non-zero
-           entries only at and above (or before) NL +1; the second
-           contains non-zero entries only at and below (or after) NL+2;
-           and the third is dense. The first column of U and the row of
-           VT are treated separately, however.
-
-           The rows of the singular vectors found by DLASD4
-           must be likewise permuted before the matrix multiplies can
-           take place.
-
-    CTOT   (input) INTEGER array, dimension ( 4 )
-           A count of the total number of the various types of columns
-           in U (or rows in VT), as described in IDXC. The fourth column
-           type is any column which has been deflated.
-
-    Z      (input) DOUBLE PRECISION array, dimension (K)
-           The first K elements of this array contain the components
-           of the deflation-adjusted updating row vector.
-
-    INFO   (output) INTEGER
-           = 0:  successful exit.
-           < 0:  if INFO = -i, the i-th argument had an illegal value.
-           > 0:  if INFO = 1, an singular value did not converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --dsigma;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    u2_dim1 = *ldu2;
-    u2_offset = 1 + u2_dim1;
-    u2 -= u2_offset;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    vt2_dim1 = *ldvt2;
-    vt2_offset = 1 + vt2_dim1;
-    vt2 -= vt2_offset;
-    --idxc;
-    --ctot;
-    --z__;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*nl < 1) {
-	*info = -1;
-    } else if (*nr < 1) {
-	*info = -2;
-    } else if (*sqre != 1 && *sqre != 0) {
-	*info = -3;
-    }
-
-    n = *nl + *nr + 1;
-    m = n + *sqre;
-    nlp1 = *nl + 1;
-    nlp2 = *nl + 2;
-
-    if ((*k < 1) || (*k > n)) {
-	*info = -4;
-    } else if (*ldq < *k) {
-	*info = -7;
-    } else if (*ldu < n) {
-	*info = -10;
-    } else if (*ldu2 < n) {
-	*info = -12;
-    } else if (*ldvt < m) {
-	*info = -14;
-    } else if (*ldvt2 < m) {
-	*info = -16;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLASD3", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*k == 1) {
-	d__[1] = abs(z__[1]);
-	dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
-	if (z__[1] > 0.) {
-	    dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
-	} else {
-	    i__1 = n;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-		u[i__ + u_dim1] = -u2[i__ + u2_dim1];
-/* L10: */
-	    }
-	}
-	return 0;
-    }
-
-/*
-       Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
-       be computed with high relative accuracy (barring over/underflow).
-       This is a problem on machines without a guard digit in
-       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
-       The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
-       which on any of these machines zeros out the bottommost
-       bit of DSIGMA(I) if it is 1; this makes the subsequent
-       subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
-       occurs. On binary machines with a guard digit (almost all
-       machines) it does not change DSIGMA(I) at all. On hexadecimal
-       and decimal machines with a guard digit, it slightly
-       changes the bottommost bits of DSIGMA(I). It does not account
-       for hexadecimal or decimal machines without guard digits
-       (we know of none). We use a subroutine call to compute
-       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
-       this code.
-*/
-
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
-/* L20: */
-    }
-
-/*     Keep a copy of Z. */
-
-    dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);
-
-/*     Normalize Z. */
-
-    rho = dnrm2_(k, &z__[1], &c__1);
-    dlascl_("G", &c__0, &c__0, &rho, &c_b2865, k, &c__1, &z__[1], k, info);
-    rho *= rho;
-
-/*     Find the new singular values. */
-
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j],
-		 &vt[j * vt_dim1 + 1], info);
-
-/*        If the zero finder fails, the computation is terminated. */
-
-	if (*info != 0) {
-	    return 0;
-	}
-/* L30: */
-    }
-
-/*     Compute updated Z. */
-
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1];
-	i__2 = i__ - 1;
-	for (j = 1; j <= i__2; ++j) {
-	    z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
-		    i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]);
-/* L40: */
-	}
-	i__2 = *k - 1;
-	for (j = i__; j <= i__2; ++j) {
-	    z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
-		    i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
-/* L50: */
-	}
-	d__2 = sqrt((d__1 = z__[i__], abs(d__1)));
-	z__[i__] = d_sign(&d__2, &q[i__ + q_dim1]);
-/* L60: */
-    }
-
-/*
-       Compute left singular vectors of the modified diagonal matrix,
-       and store related information for the right singular vectors.
-*/
-
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ *
-		vt_dim1 + 1];
-	u[i__ * u_dim1 + 1] = -1.;
-	i__2 = *k;
-	for (j = 2; j <= i__2; ++j) {
-	    vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__
-		    * vt_dim1];
-	    u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1];
-/* L70: */
-	}
-	temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1);
-	q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp;
-	i__2 = *k;
-	for (j = 2; j <= i__2; ++j) {
-	    jc = idxc[j];
-	    q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp;
-/* L80: */
-	}
-/* L90: */
-    }
-
-/*     Update the left singular vector matrix. */
-
-    if (*k == 2) {
-	dgemm_("N", "N", &n, k, k, &c_b2865, &u2[u2_offset], ldu2, &q[
-		q_offset], ldq, &c_b2879, &u[u_offset], ldu);
-	goto L100;
-    }
-    if (ctot[1] > 0) {
-	dgemm_("N", "N", nl, k, &ctot[1], &c_b2865, &u2[((u2_dim1) << (1)) +
-		1], ldu2, &q[q_dim1 + 2], ldq, &c_b2879, &u[u_dim1 + 1], ldu);
-	if (ctot[3] > 0) {
-	    ktemp = ctot[1] + 2 + ctot[2];
-	    dgemm_("N", "N", nl, k, &ctot[3], &c_b2865, &u2[ktemp * u2_dim1 +
-		    1], ldu2, &q[ktemp + q_dim1], ldq, &c_b2865, &u[u_dim1 +
-		    1], ldu);
-	}
-    } else if (ctot[3] > 0) {
-	ktemp = ctot[1] + 2 + ctot[2];
-	dgemm_("N", "N", nl, k, &ctot[3], &c_b2865, &u2[ktemp * u2_dim1 + 1],
-		ldu2, &q[ktemp + q_dim1], ldq, &c_b2879, &u[u_dim1 + 1], ldu);
-    } else {
-	dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
-    }
-    dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
-    ktemp = ctot[1] + 2;
-    ctemp = ctot[2] + ctot[3];
-    dgemm_("N", "N", nr, k, &ctemp, &c_b2865, &u2[nlp2 + ktemp * u2_dim1],
-	    ldu2, &q[ktemp + q_dim1], ldq, &c_b2879, &u[nlp2 + u_dim1], ldu);
-
-/*     Generate the right singular vectors. */
-
-L100:
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1);
-	q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp;
-	i__2 = *k;
-	for (j = 2; j <= i__2; ++j) {
-	    jc = idxc[j];
-	    q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp;
-/* L110: */
-	}
-/* L120: */
-    }
-
-/*     Update the right singular vector matrix. */
-
-    if (*k == 2) {
-	dgemm_("N", "N", k, &m, k, &c_b2865, &q[q_offset], ldq, &vt2[
-		vt2_offset], ldvt2, &c_b2879, &vt[vt_offset], ldvt);
-	return 0;
-    }
-    ktemp = ctot[1] + 1;
-    dgemm_("N", "N", k, &nlp1, &ktemp, &c_b2865, &q[q_dim1 + 1], ldq, &vt2[
-	    vt2_dim1 + 1], ldvt2, &c_b2879, &vt[vt_dim1 + 1], ldvt);
-    ktemp = ctot[1] + 2 + ctot[2];
-    if (ktemp <= *ldvt2) {
-	dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b2865, &q[ktemp * q_dim1 + 1],
-		 ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b2865, &vt[vt_dim1 +
-		1], ldvt);
-    }
-
-    ktemp = ctot[1] + 1;
-    nrp1 = *nr + *sqre;
-    if (ktemp > 1) {
-	i__1 = *k;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    q[i__ + ktemp * q_dim1] = q[i__ + q_dim1];
-/* L130: */
-	}
-	i__1 = m;
-	for (i__ = nlp2; i__ <= i__1; ++i__) {
-	    vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1];
-/* L140: */
-	}
-    }
-    ctemp = ctot[2] + 1 + ctot[3];
-    dgemm_("N", "N", k, &nrp1, &ctemp, &c_b2865, &q[ktemp * q_dim1 + 1], ldq,
-	    &vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b2879, &vt[nlp2 *
-	    vt_dim1 + 1], ldvt);
-
-    return 0;
-
-/*     End of DLASD3 */
-
-} /* dlasd3_ */
-
-/* Subroutine */ int dlasd4_(integer *n, integer *i__, doublereal *d__,
-	doublereal *z__, doublereal *delta, doublereal *rho, doublereal *
-	sigma, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal a, b, c__;
-    static integer j;
-    static doublereal w, dd[3];
-    static integer ii;
-    static doublereal dw, zz[3];
-    static integer ip1;
-    static doublereal eta, phi, eps, tau, psi;
-    static integer iim1, iip1;
-    static doublereal dphi, dpsi;
-    static integer iter;
-    static doublereal temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq,
-	    dtiip;
-    static integer niter;
-    static doublereal dtisq;
-    static logical swtch;
-    static doublereal dtnsq;
-    extern /* Subroutine */ int dlaed6_(integer *, logical *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *)
-	    , dlasd5_(integer *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *);
-    static doublereal delsq2, dtnsq1;
-    static logical swtch3;
-
-    static logical orgati;
-    static doublereal erretm, dtipsq, rhoinv;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    This subroutine computes the square root of the I-th updated
-    eigenvalue of a positive symmetric rank-one modification to
-    a positive diagonal matrix whose entries are given as the squares
-    of the corresponding entries in the array d, and that
-
-           0 <= D(i) < D(j)  for  i < j
-
-    and that RHO > 0. This is arranged by the calling routine, and is
-    no loss in generality.  The rank-one modified system is thus
-
-           diag( D ) * diag( D ) +  RHO *  Z * Z_transpose.
-
-    where we assume the Euclidean norm of Z is 1.
-
-    The method consists of approximating the rational functions in the
-    secular equation by simpler interpolating rational functions.
-
-    Arguments
-    =========
-
-    N      (input) INTEGER
-           The length of all arrays.
-
-    I      (input) INTEGER
-           The index of the eigenvalue to be computed.  1 <= I <= N.
-
-    D      (input) DOUBLE PRECISION array, dimension ( N )
-           The original eigenvalues.  It is assumed that they are in
-           order, 0 <= D(I) < D(J)  for I < J.
-
-    Z      (input) DOUBLE PRECISION array, dimension ( N )
-           The components of the updating vector.
-
-    DELTA  (output) DOUBLE PRECISION array, dimension ( N )
-           If N .ne. 1, DELTA contains (D(j) - sigma_I) in its  j-th
-           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
-           contains the information necessary to construct the
-           (singular) eigenvectors.
-
-    RHO    (input) DOUBLE PRECISION
-           The scalar in the symmetric updating formula.
-
-    SIGMA  (output) DOUBLE PRECISION
-           The computed lambda_I, the I-th updated eigenvalue.
-
-    WORK   (workspace) DOUBLE PRECISION array, dimension ( N )
-           If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th
-           component.  If N = 1, then WORK( 1 ) = 1.
-
-    INFO   (output) INTEGER
-           = 0:  successful exit
-           > 0:  if INFO = 1, the updating process failed.
-
-    Internal Parameters
-    ===================
-
-    Logical variable ORGATI (origin-at-i?) is used for distinguishing
-    whether D(i) or D(i+1) is treated as the origin.
-
-              ORGATI = .true.    origin at i
-              ORGATI = .false.   origin at i+1
-
-    Logical variable SWTCH3 (switch-for-3-poles?) is for noting
-    if we are working with THREE poles!
-
-    MAXIT is the maximum number of iterations allowed for each
-    eigenvalue.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ren-Cang Li, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-
-
-       Since this routine is called in an inner loop, we do no argument
-       checking.
-
-       Quick return for N=1 and 2.
-*/
-
-    /* Parameter adjustments */
-    --work;
-    --delta;
-    --z__;
-    --d__;
-
-    /* Function Body */
-    *info = 0;
-    if (*n == 1) {
-
-/*        Presumably, I=1 upon entry */
-
-	*sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]);
-	delta[1] = 1.;
-	work[1] = 1.;
-	return 0;
-    }
-    if (*n == 2) {
-	dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]);
-	return 0;
-    }
-
-/*     Compute machine epsilon */
-
-    eps = EPSILON;
-    rhoinv = 1. / *rho;
-
-/*     The case I = N */
-
-    if (*i__ == *n) {
-
-/*        Initialize some basic variables */
-
-	ii = *n - 1;
-	niter = 1;
-
-/*        Calculate initial guess */
-
-	temp = *rho / 2.;
-
-/*
-          If ||Z||_2 is not one, then TEMP should be set to
-          RHO * ||Z||_2^2 / TWO
-*/
-
-	temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp));
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    work[j] = d__[j] + d__[*n] + temp1;
-	    delta[j] = d__[j] - d__[*n] - temp1;
-/* L10: */
-	}
-
-	psi = 0.;
-	i__1 = *n - 2;
-	for (j = 1; j <= i__1; ++j) {
-	    psi += z__[j] * z__[j] / (delta[j] * work[j]);
-/* L20: */
-	}
-
-	c__ = rhoinv + psi;
-	w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[*
-		n] / (delta[*n] * work[*n]);
-
-	if (w <= 0.) {
-	    temp1 = sqrt(d__[*n] * d__[*n] + *rho);
-	    temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[*
-		    n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] *
-		    z__[*n] / *rho;
-
-/*
-             The following TAU is to approximate
-             SIGMA_n^2 - D( N )*D( N )
-*/
-
-	    if (c__ <= temp) {
-		tau = *rho;
-	    } else {
-		delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
-		a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*
-			n];
-		b = z__[*n] * z__[*n] * delsq;
-		if (a < 0.) {
-		    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
-		} else {
-		    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
-		}
-	    }
-
-/*
-             It can be proved that
-                 D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO
-*/
-
-	} else {
-	    delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
-	    a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
-	    b = z__[*n] * z__[*n] * delsq;
-
-/*
-             The following TAU is to approximate
-             SIGMA_n^2 - D( N )*D( N )
-*/
-
-	    if (a < 0.) {
-		tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
-	    } else {
-		tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
-	    }
-
-/*
-             It can be proved that
-             D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2
-*/
-
-	}
-
-/*        The following ETA is to approximate SIGMA_n - D( N ) */
-
-	eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau));
-
-	*sigma = d__[*n] + eta;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    delta[j] = d__[j] - d__[*i__] - eta;
-	    work[j] = d__[j] + d__[*i__] + eta;
-/* L30: */
-	}
-
-/*        Evaluate PSI and the derivative DPSI */
-
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
-	i__1 = ii;
-	for (j = 1; j <= i__1; ++j) {
-	    temp = z__[j] / (delta[j] * work[j]);
-	    psi += z__[j] * temp;
-	    dpsi += temp * temp;
-	    erretm += psi;
-/* L40: */
-	}
-	erretm = abs(erretm);
-
-/*        Evaluate PHI and the derivative DPHI */
-
-	temp = z__[*n] / (delta[*n] * work[*n]);
-	phi = z__[*n] * temp;
-	dphi = temp * temp;
-	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
-		+ dphi);
-
-	w = rhoinv + phi + psi;
-
-/*        Test for convergence */
-
-	if (abs(w) <= eps * erretm) {
-	    goto L240;
-	}
-
-/*        Calculate the new step */
-
-	++niter;
-	dtnsq1 = work[*n - 1] * delta[*n - 1];
-	dtnsq = work[*n] * delta[*n];
-	c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
-	a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi);
-	b = dtnsq * dtnsq1 * w;
-	if (c__ < 0.) {
-	    c__ = abs(c__);
-	}
-	if (c__ == 0.) {
-	    eta = *rho - *sigma * *sigma;
-	} else if (a >= 0.) {
-	    eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
-		    * 2.);
-	} else {
-	    eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
-		    );
-	}
-
-/*
-          Note, eta should be positive if w is negative, and
-          eta should be negative otherwise. However,
-          if for some reason caused by roundoff, eta*w > 0,
-          we simply use one Newton step instead. This way
-          will guarantee eta*w < 0.
-*/
-
-	if (w * eta > 0.) {
-	    eta = -w / (dpsi + dphi);
-	}
-	temp = eta - dtnsq;
-	if (temp > *rho) {
-	    eta = *rho + dtnsq;
-	}
-
-	tau += eta;
-	eta /= *sigma + sqrt(eta + *sigma * *sigma);
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    delta[j] -= eta;
-	    work[j] += eta;
-/* L50: */
-	}
-
-	*sigma += eta;
-
-/*        Evaluate PSI and the derivative DPSI */
-
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
-	i__1 = ii;
-	for (j = 1; j <= i__1; ++j) {
-	    temp = z__[j] / (work[j] * delta[j]);
-	    psi += z__[j] * temp;
-	    dpsi += temp * temp;
-	    erretm += psi;
-/* L60: */
-	}
-	erretm = abs(erretm);
-
-/*        Evaluate PHI and the derivative DPHI */
-
-	temp = z__[*n] / (work[*n] * delta[*n]);
-	phi = z__[*n] * temp;
-	dphi = temp * temp;
-	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
-		+ dphi);
-
-	w = rhoinv + phi + psi;
-
-/*        Main loop to update the values of the array   DELTA */
-
-	iter = niter + 1;
-
-	for (niter = iter; niter <= 20; ++niter) {
-
-/*           Test for convergence */
-
-	    if (abs(w) <= eps * erretm) {
-		goto L240;
-	    }
-
-/*           Calculate the new step */
-
-	    dtnsq1 = work[*n - 1] * delta[*n - 1];
-	    dtnsq = work[*n] * delta[*n];
-	    c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
-	    a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi);
-	    b = dtnsq1 * dtnsq * w;
-	    if (a >= 0.) {
-		eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-			c__ * 2.);
-	    } else {
-		eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
-			d__1))));
-	    }
-
-/*
-             Note, eta should be positive if w is negative, and
-             eta should be negative otherwise. However,
-             if for some reason caused by roundoff, eta*w > 0,
-             we simply use one Newton step instead. This way
-             will guarantee eta*w < 0.
-*/
-
-	    if (w * eta > 0.) {
-		eta = -w / (dpsi + dphi);
-	    }
-	    temp = eta - dtnsq;
-	    if (temp <= 0.) {
-		eta /= 2.;
-	    }
-
-	    tau += eta;
-	    eta /= *sigma + sqrt(eta + *sigma * *sigma);
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		delta[j] -= eta;
-		work[j] += eta;
-/* L70: */
-	    }
-
-	    *sigma += eta;
-
-/*           Evaluate PSI and the derivative DPSI */
-
-	    dpsi = 0.;
-	    psi = 0.;
-	    erretm = 0.;
-	    i__1 = ii;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = z__[j] / (work[j] * delta[j]);
-		psi += z__[j] * temp;
-		dpsi += temp * temp;
-		erretm += psi;
-/* L80: */
-	    }
-	    erretm = abs(erretm);
-
-/*           Evaluate PHI and the derivative DPHI */
-
-	    temp = z__[*n] / (work[*n] * delta[*n]);
-	    phi = z__[*n] * temp;
-	    dphi = temp * temp;
-	    erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
-		    dpsi + dphi);
-
-	    w = rhoinv + phi + psi;
-/* L90: */
-	}
-
-/*        Return with INFO = 1, NITER = MAXIT and not converged */
-
-	*info = 1;
-	goto L240;
-
-/*        End for the case I = N */
-
-    } else {
-
-/*        The case for I < N */
-
-	niter = 1;
-	ip1 = *i__ + 1;
-
-/*        Calculate initial guess */
-
-	delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]);
-	delsq2 = delsq / 2.;
-	temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2));
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    work[j] = d__[j] + d__[*i__] + temp;
-	    delta[j] = d__[j] - d__[*i__] - temp;
-/* L100: */
-	}
-
-	psi = 0.;
-	i__1 = *i__ - 1;
-	for (j = 1; j <= i__1; ++j) {
-	    psi += z__[j] * z__[j] / (work[j] * delta[j]);
-/* L110: */
-	}
-
-	phi = 0.;
-	i__1 = *i__ + 2;
-	for (j = *n; j >= i__1; --j) {
-	    phi += z__[j] * z__[j] / (work[j] * delta[j]);
-/* L120: */
-	}
-	c__ = rhoinv + psi + phi;
-	w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[
-		ip1] * z__[ip1] / (work[ip1] * delta[ip1]);
-
-	if (w > 0.) {
-
-/*
-             d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
-
-             We choose d(i) as origin.
-*/
-
-	    orgati = TRUE_;
-	    sg2lb = 0.;
-	    sg2ub = delsq2;
-	    a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
-	    b = z__[*i__] * z__[*i__] * delsq;
-	    if (a > 0.) {
-		tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
-			d__1))));
-	    } else {
-		tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-			c__ * 2.);
-	    }
-
-/*
-             TAU now is an estimation of SIGMA^2 - D( I )^2. The
-             following, however, is the corresponding estimation of
-             SIGMA - D( I ).
-*/
-
-	    eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau));
-	} else {
-
-/*
-             (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2
-
-             We choose d(i+1) as origin.
-*/
-
-	    orgati = FALSE_;
-	    sg2lb = -delsq2;
-	    sg2ub = 0.;
-	    a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
-	    b = z__[ip1] * z__[ip1] * delsq;
-	    if (a < 0.) {
-		tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
-			d__1))));
-	    } else {
-		tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
-			(c__ * 2.);
-	    }
-
-/*
-             TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The
-             following, however, is the corresponding estimation of
-             SIGMA - D( IP1 ).
-*/
-
-	    eta = tau / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau,
-		    abs(d__1))));
-	}
-
-	if (orgati) {
-	    ii = *i__;
-	    *sigma = d__[*i__] + eta;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		work[j] = d__[j] + d__[*i__] + eta;
-		delta[j] = d__[j] - d__[*i__] - eta;
-/* L130: */
-	    }
-	} else {
-	    ii = *i__ + 1;
-	    *sigma = d__[ip1] + eta;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		work[j] = d__[j] + d__[ip1] + eta;
-		delta[j] = d__[j] - d__[ip1] - eta;
-/* L140: */
-	    }
-	}
-	iim1 = ii - 1;
-	iip1 = ii + 1;
-
-/*        Evaluate PSI and the derivative DPSI */
-
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
-	i__1 = iim1;
-	for (j = 1; j <= i__1; ++j) {
-	    temp = z__[j] / (work[j] * delta[j]);
-	    psi += z__[j] * temp;
-	    dpsi += temp * temp;
-	    erretm += psi;
-/* L150: */
-	}
-	erretm = abs(erretm);
-
-/*        Evaluate PHI and the derivative DPHI */
-
-	dphi = 0.;
-	phi = 0.;
-	i__1 = iip1;
-	for (j = *n; j >= i__1; --j) {
-	    temp = z__[j] / (work[j] * delta[j]);
-	    phi += z__[j] * temp;
-	    dphi += temp * temp;
-	    erretm += phi;
-/* L160: */
-	}
-
-	w = rhoinv + phi + psi;
-
-/*
-          W is the value of the secular function with
-          its ii-th element removed.
-*/
-
-	swtch3 = FALSE_;
-	if (orgati) {
-	    if (w < 0.) {
-		swtch3 = TRUE_;
-	    }
-	} else {
-	    if (w > 0.) {
-		swtch3 = TRUE_;
-	    }
-	}
-	if ((ii == 1) || (ii == *n)) {
-	    swtch3 = FALSE_;
-	}
-
-	temp = z__[ii] / (work[ii] * delta[ii]);
-	dw = dpsi + dphi + temp * temp;
-	temp = z__[ii] * temp;
-	w += temp;
-	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
-		abs(tau) * dw;
-
-/*        Test for convergence */
-
-	if (abs(w) <= eps * erretm) {
-	    goto L240;
-	}
-
-	if (w <= 0.) {
-	    sg2lb = max(sg2lb,tau);
-	} else {
-	    sg2ub = min(sg2ub,tau);
-	}
-
-/*        Calculate the new step */
-
-	++niter;
-	if (! swtch3) {
-	    dtipsq = work[ip1] * delta[ip1];
-	    dtisq = work[*i__] * delta[*i__];
-	    if (orgati) {
-/* Computing 2nd power */
-		d__1 = z__[*i__] / dtisq;
-		c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
-	    } else {
-/* Computing 2nd power */
-		d__1 = z__[ip1] / dtipsq;
-		c__ = w - dtisq * dw - delsq * (d__1 * d__1);
-	    }
-	    a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
-	    b = dtipsq * dtisq * w;
-	    if (c__ == 0.) {
-		if (a == 0.) {
-		    if (orgati) {
-			a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi +
-				dphi);
-		    } else {
-			a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi +
-				dphi);
-		    }
-		}
-		eta = b / a;
-	    } else if (a <= 0.) {
-		eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-			c__ * 2.);
-	    } else {
-		eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
-			d__1))));
-	    }
-	} else {
-
-/*           Interpolation using THREE most relevant poles */
-
-	    dtiim = work[iim1] * delta[iim1];
-	    dtiip = work[iip1] * delta[iip1];
-	    temp = rhoinv + psi + phi;
-	    if (orgati) {
-		temp1 = z__[iim1] / dtiim;
-		temp1 *= temp1;
-		c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) *
-			 (d__[iim1] + d__[iip1]) * temp1;
-		zz[0] = z__[iim1] * z__[iim1];
-		if (dpsi < temp1) {
-		    zz[2] = dtiip * dtiip * dphi;
-		} else {
-		    zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi);
-		}
-	    } else {
-		temp1 = z__[iip1] / dtiip;
-		temp1 *= temp1;
-		c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) *
-			 (d__[iim1] + d__[iip1]) * temp1;
-		if (dphi < temp1) {
-		    zz[0] = dtiim * dtiim * dpsi;
-		} else {
-		    zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1));
-		}
-		zz[2] = z__[iip1] * z__[iip1];
-	    }
-	    zz[1] = z__[ii] * z__[ii];
-	    dd[0] = dtiim;
-	    dd[1] = delta[ii] * work[ii];
-	    dd[2] = dtiip;
-	    dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
-	    if (*info != 0) {
-		goto L240;
-	    }
-	}
-
-/*
-          Note, eta should be positive if w is negative, and
-          eta should be negative otherwise. However,
-          if for some reason caused by roundoff, eta*w > 0,
-          we simply use one Newton step instead. This way
-          will guarantee eta*w < 0.
-*/
-
-	if (w * eta >= 0.) {
-	    eta = -w / dw;
-	}
-	if (orgati) {
-	    temp1 = work[*i__] * delta[*i__];
-	    temp = eta - temp1;
-	} else {
-	    temp1 = work[ip1] * delta[ip1];
-	    temp = eta - temp1;
-	}
-	if ((temp > sg2ub) || (temp < sg2lb)) {
-	    if (w < 0.) {
-		eta = (sg2ub - tau) / 2.;
-	    } else {
-		eta = (sg2lb - tau) / 2.;
-	    }
-	}
-
-	tau += eta;
-	eta /= *sigma + sqrt(*sigma * *sigma + eta);
-
-	prew = w;
-
-	*sigma += eta;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    work[j] += eta;
-	    delta[j] -= eta;
-/* L170: */
-	}
-
-/*        Evaluate PSI and the derivative DPSI */
-
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
-	i__1 = iim1;
-	for (j = 1; j <= i__1; ++j) {
-	    temp = z__[j] / (work[j] * delta[j]);
-	    psi += z__[j] * temp;
-	    dpsi += temp * temp;
-	    erretm += psi;
-/* L180: */
-	}
-	erretm = abs(erretm);
-
-/*        Evaluate PHI and the derivative DPHI */
-
-	dphi = 0.;
-	phi = 0.;
-	i__1 = iip1;
-	for (j = *n; j >= i__1; --j) {
-	    temp = z__[j] / (work[j] * delta[j]);
-	    phi += z__[j] * temp;
-	    dphi += temp * temp;
-	    erretm += phi;
-/* L190: */
-	}
-
-	temp = z__[ii] / (work[ii] * delta[ii]);
-	dw = dpsi + dphi + temp * temp;
-	temp = z__[ii] * temp;
-	w = rhoinv + phi + psi + temp;
-	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
-		abs(tau) * dw;
-
-	if (w <= 0.) {
-	    sg2lb = max(sg2lb,tau);
-	} else {
-	    sg2ub = min(sg2ub,tau);
-	}
-
-	swtch = FALSE_;
-	if (orgati) {
-	    if (-w > abs(prew) / 10.) {
-		swtch = TRUE_;
-	    }
-	} else {
-	    if (w > abs(prew) / 10.) {
-		swtch = TRUE_;
-	    }
-	}
-
-/*        Main loop to update the values of the array   DELTA and WORK */
-
-	iter = niter + 1;
-
-	for (niter = iter; niter <= 20; ++niter) {
-
-/*           Test for convergence */
-
-	    if (abs(w) <= eps * erretm) {
-		goto L240;
-	    }
-
-/*           Calculate the new step */
-
-	    if (! swtch3) {
-		dtipsq = work[ip1] * delta[ip1];
-		dtisq = work[*i__] * delta[*i__];
-		if (! swtch) {
-		    if (orgati) {
-/* Computing 2nd power */
-			d__1 = z__[*i__] / dtisq;
-			c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
-		    } else {
-/* Computing 2nd power */
-			d__1 = z__[ip1] / dtipsq;
-			c__ = w - dtisq * dw - delsq * (d__1 * d__1);
-		    }
-		} else {
-		    temp = z__[ii] / (work[ii] * delta[ii]);
-		    if (orgati) {
-			dpsi += temp * temp;
-		    } else {
-			dphi += temp * temp;
-		    }
-		    c__ = w - dtisq * dpsi - dtipsq * dphi;
-		}
-		a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
-		b = dtipsq * dtisq * w;
-		if (c__ == 0.) {
-		    if (a == 0.) {
-			if (! swtch) {
-			    if (orgati) {
-				a = z__[*i__] * z__[*i__] + dtipsq * dtipsq *
-					(dpsi + dphi);
-			    } else {
-				a = z__[ip1] * z__[ip1] + dtisq * dtisq * (
-					dpsi + dphi);
-			    }
-			} else {
-			    a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi;
-			}
-		    }
-		    eta = b / a;
-		} else if (a <= 0.) {
-		    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
-			     / (c__ * 2.);
-		} else {
-		    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
-			    abs(d__1))));
-		}
-	    } else {
-
-/*              Interpolation using THREE most relevant poles */
-
-		dtiim = work[iim1] * delta[iim1];
-		dtiip = work[iip1] * delta[iip1];
-		temp = rhoinv + psi + phi;
-		if (swtch) {
-		    c__ = temp - dtiim * dpsi - dtiip * dphi;
-		    zz[0] = dtiim * dtiim * dpsi;
-		    zz[2] = dtiip * dtiip * dphi;
-		} else {
-		    if (orgati) {
-			temp1 = z__[iim1] / dtiim;
-			temp1 *= temp1;
-			temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[
-				iip1]) * temp1;
-			c__ = temp - dtiip * (dpsi + dphi) - temp2;
-			zz[0] = z__[iim1] * z__[iim1];
-			if (dpsi < temp1) {
-			    zz[2] = dtiip * dtiip * dphi;
-			} else {
-			    zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi);
-			}
-		    } else {
-			temp1 = z__[iip1] / dtiip;
-			temp1 *= temp1;
-			temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[
-				iip1]) * temp1;
-			c__ = temp - dtiim * (dpsi + dphi) - temp2;
-			if (dphi < temp1) {
-			    zz[0] = dtiim * dtiim * dpsi;
-			} else {
-			    zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1));
-			}
-			zz[2] = z__[iip1] * z__[iip1];
-		    }
-		}
-		dd[0] = dtiim;
-		dd[1] = delta[ii] * work[ii];
-		dd[2] = dtiip;
-		dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
-		if (*info != 0) {
-		    goto L240;
-		}
-	    }
-
-/*
-             Note, eta should be positive if w is negative, and
-             eta should be negative otherwise. However,
-             if for some reason caused by roundoff, eta*w > 0,
-             we simply use one Newton step instead. This way
-             will guarantee eta*w < 0.
-*/
-
-	    if (w * eta >= 0.) {
-		eta = -w / dw;
-	    }
-	    if (orgati) {
-		temp1 = work[*i__] * delta[*i__];
-		temp = eta - temp1;
-	    } else {
-		temp1 = work[ip1] * delta[ip1];
-		temp = eta - temp1;
-	    }
-	    if ((temp > sg2ub) || (temp < sg2lb)) {
-		if (w < 0.) {
-		    eta = (sg2ub - tau) / 2.;
-		} else {
-		    eta = (sg2lb - tau) / 2.;
-		}
-	    }
-
-	    tau += eta;
-	    eta /= *sigma + sqrt(*sigma * *sigma + eta);
-
-	    *sigma += eta;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		work[j] += eta;
-		delta[j] -= eta;
-/* L200: */
-	    }
-
-	    prew = w;
-
-/*           Evaluate PSI and the derivative DPSI */
-
-	    dpsi = 0.;
-	    psi = 0.;
-	    erretm = 0.;
-	    i__1 = iim1;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = z__[j] / (work[j] * delta[j]);
-		psi += z__[j] * temp;
-		dpsi += temp * temp;
-		erretm += psi;
-/* L210: */
-	    }
-	    erretm = abs(erretm);
-
-/*           Evaluate PHI and the derivative DPHI */
-
-	    dphi = 0.;
-	    phi = 0.;
-	    i__1 = iip1;
-	    for (j = *n; j >= i__1; --j) {
-		temp = z__[j] / (work[j] * delta[j]);
-		phi += z__[j] * temp;
-		dphi += temp * temp;
-		erretm += phi;
-/* L220: */
-	    }
-
-	    temp = z__[ii] / (work[ii] * delta[ii]);
-	    dw = dpsi + dphi + temp * temp;
-	    temp = z__[ii] * temp;
-	    w = rhoinv + phi + psi + temp;
-	    erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
-		    + abs(tau) * dw;
-	    if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
-		swtch = ! swtch;
-	    }
-
-	    if (w <= 0.) {
-		sg2lb = max(sg2lb,tau);
-	    } else {
-		sg2ub = min(sg2ub,tau);
-	    }
-
-/* L230: */
-	}
-
-/*        Return with INFO = 1, NITER = MAXIT and not converged */
-
-	*info = 1;
-
-    }
-
-L240:
-    return 0;
-
-/*     End of DLASD4 */
-
-} /* dlasd4_ */
-
-/* Subroutine */ int dlasd5_(integer *i__, doublereal *d__, doublereal *z__,
-	doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal *
-	work)
-{
-    /* System generated locals */
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal b, c__, w, del, tau, delsq;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    This subroutine computes the square root of the I-th eigenvalue
-    of a positive symmetric rank-one modification of a 2-by-2 diagonal
-    matrix
-
-               diag( D ) * diag( D ) +  RHO *  Z * transpose(Z) .
-
-    The diagonal entries in the array D are assumed to satisfy
-
-               0 <= D(i) < D(j)  for  i < j .
-
-    We also assume RHO > 0 and that the Euclidean norm of the vector
-    Z is one.
-
-    Arguments
-    =========
-
-    I      (input) INTEGER
-           The index of the eigenvalue to be computed.  I = 1 or I = 2.
-
-    D      (input) DOUBLE PRECISION array, dimension ( 2 )
-           The original eigenvalues.  We assume 0 <= D(1) < D(2).
-
-    Z      (input) DOUBLE PRECISION array, dimension ( 2 )
-           The components of the updating vector.
-
-    DELTA  (output) DOUBLE PRECISION array, dimension ( 2 )
-           Contains (D(j) - lambda_I) in its  j-th component.
-           The vector DELTA contains the information necessary
-           to construct the eigenvectors.
-
-    RHO    (input) DOUBLE PRECISION
-           The scalar in the symmetric updating formula.
-
-    DSIGMA (output) DOUBLE PRECISION
-           The computed lambda_I, the I-th updated eigenvalue.
-
-    WORK   (workspace) DOUBLE PRECISION array, dimension ( 2 )
-           WORK contains (D(j) + sigma_I) in its  j-th component.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ren-Cang Li, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --work;
-    --delta;
-    --z__;
-    --d__;
-
-    /* Function Body */
-    del = d__[2] - d__[1];
-    delsq = del * (d__[2] + d__[1]);
-    if (*i__ == 1) {
-	w = *rho * 4. * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.) - z__[1] *
-		z__[1] / (d__[1] * 3. + d__[2])) / del + 1.;
-	if (w > 0.) {
-	    b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
-	    c__ = *rho * z__[1] * z__[1] * delsq;
-
-/*
-             B > ZERO, always
-
-             The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 )
-*/
-
-	    tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
-
-/*           The following TAU is DSIGMA - D( 1 ) */
-
-	    tau /= d__[1] + sqrt(d__[1] * d__[1] + tau);
-	    *dsigma = d__[1] + tau;
-	    delta[1] = -tau;
-	    delta[2] = del - tau;
-	    work[1] = d__[1] * 2. + tau;
-	    work[2] = d__[1] + tau + d__[2];
-/*
-             DELTA( 1 ) = -Z( 1 ) / TAU
-             DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
-*/
-	} else {
-	    b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
-	    c__ = *rho * z__[2] * z__[2] * delsq;
-
-/*           The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
-
-	    if (b > 0.) {
-		tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
-	    } else {
-		tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
-	    }
-
-/*           The following TAU is DSIGMA - D( 2 ) */
-
-	    tau /= d__[2] + sqrt((d__1 = d__[2] * d__[2] + tau, abs(d__1)));
-	    *dsigma = d__[2] + tau;
-	    delta[1] = -(del + tau);
-	    delta[2] = -tau;
-	    work[1] = d__[1] + tau + d__[2];
-	    work[2] = d__[2] * 2. + tau;
-/*
-             DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
-             DELTA( 2 ) = -Z( 2 ) / TAU
-*/
-	}
-/*
-          TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
-          DELTA( 1 ) = DELTA( 1 ) / TEMP
-          DELTA( 2 ) = DELTA( 2 ) / TEMP
-*/
-    } else {
-
-/*        Now I=2 */
-
-	b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
-	c__ = *rho * z__[2] * z__[2] * delsq;
-
-/*        The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
-
-	if (b > 0.) {
-	    tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
-	} else {
-	    tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
-	}
-
-/*        The following TAU is DSIGMA - D( 2 ) */
-
-	tau /= d__[2] + sqrt(d__[2] * d__[2] + tau);
-	*dsigma = d__[2] + tau;
-	delta[1] = -(del + tau);
-	delta[2] = -tau;
-	work[1] = d__[1] + tau + d__[2];
-	work[2] = d__[2] * 2. + tau;
-/*
-          DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
-          DELTA( 2 ) = -Z( 2 ) / TAU
-          TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
-          DELTA( 1 ) = DELTA( 1 ) / TEMP
-          DELTA( 2 ) = DELTA( 2 ) / TEMP
-*/
-    }
-    return 0;
-
-/*     End of DLASD5 */
-
-} /* dlasd5_ */
-
-/* Subroutine */ int dlasd6_(integer *icompq, integer *nl, integer *nr,
-	integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl,
-	doublereal *alpha, doublereal *beta, integer *idxq, integer *perm,
-	integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum,
-	 integer *ldgnum, doublereal *poles, doublereal *difl, doublereal *
-	difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s,
-	doublereal *work, integer *iwork, integer *info)
-{
-    /* System generated locals */
-    integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset,
-	    poles_dim1, poles_offset, i__1;
-    doublereal d__1, d__2;
-
-    /* Local variables */
-    static integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlasd7_(integer *, integer *, integer *,
-	     integer *, integer *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, integer *, integer *,
-	    integer *, integer *, integer *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, integer *), dlasd8_(
-	    integer *, integer *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
-	     doublereal *, integer *), dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dlamrg_(integer *, integer *,
-	    doublereal *, integer *, integer *, integer *);
-    static integer isigma;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static doublereal orgnrm;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLASD6 computes the SVD of an updated upper bidiagonal matrix B
-    obtained by merging two smaller ones by appending a row. This
-    routine is used only for the problem which requires all singular
-    values and optionally singular vector matrices in factored form.
-    B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
-    A related subroutine, DLASD1, handles the case in which all singular
-    values and singular vectors of the bidiagonal matrix are desired.
-
-    DLASD6 computes the SVD as follows:
-
-                  ( D1(in)  0    0     0 )
-      B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
-                  (   0     0   D2(in) 0 )
-
-        = U(out) * ( D(out) 0) * VT(out)
-
-    where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M
-    with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
-    elsewhere; and the entry b is empty if SQRE = 0.
-
-    The singular values of B can be computed using D1, D2, the first
-    components of all the right singular vectors of the lower block, and
-    the last components of all the right singular vectors of the upper
-    block. These components are stored and updated in VF and VL,
-    respectively, in DLASD6. Hence U and VT are not explicitly
-    referenced.
-
-    The singular values are stored in D. The algorithm consists of two
-    stages:
-
-          The first stage consists of deflating the size of the problem
-          when there are multiple singular values or if there is a zero
-          in the Z vector. For each such occurence the dimension of the
-          secular equation problem is reduced by one. This stage is
-          performed by the routine DLASD7.
-
-          The second stage consists of calculating the updated
-          singular values. This is done by finding the roots of the
-          secular equation via the routine DLASD4 (as called by DLASD8).
-          This routine also updates VF and VL and computes the distances
-          between the updated singular values and the old singular
-          values.
-
-    DLASD6 is called from DLASDA.
-
-    Arguments
-    =========
-
-    ICOMPQ (input) INTEGER
-           Specifies whether singular vectors are to be computed in
-           factored form:
-           = 0: Compute singular values only.
-           = 1: Compute singular vectors in factored form as well.
-
-    NL     (input) INTEGER
-           The row dimension of the upper block.  NL >= 1.
-
-    NR     (input) INTEGER
-           The row dimension of the lower block.  NR >= 1.
-
-    SQRE   (input) INTEGER
-           = 0: the lower block is an NR-by-NR square matrix.
-           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
-
-           The bidiagonal matrix has row dimension N = NL + NR + 1,
-           and column dimension M = N + SQRE.
-
-    D      (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ).
-           On entry D(1:NL,1:NL) contains the singular values of the
-           upper block, and D(NL+2:N) contains the singular values
-           of the lower block. On exit D(1:N) contains the singular
-           values of the modified matrix.
-
-    VF     (input/output) DOUBLE PRECISION array, dimension ( M )
-           On entry, VF(1:NL+1) contains the first components of all
-           right singular vectors of the upper block; and VF(NL+2:M)
-           contains the first components of all right singular vectors
-           of the lower block. On exit, VF contains the first components
-           of all right singular vectors of the bidiagonal matrix.
-
-    VL     (input/output) DOUBLE PRECISION array, dimension ( M )
-           On entry, VL(1:NL+1) contains the  last components of all
-           right singular vectors of the upper block; and VL(NL+2:M)
-           contains the last components of all right singular vectors of
-           the lower block. On exit, VL contains the last components of
-           all right singular vectors of the bidiagonal matrix.
-
-    ALPHA  (input) DOUBLE PRECISION
-           Contains the diagonal element associated with the added row.
-
-    BETA   (input) DOUBLE PRECISION
-           Contains the off-diagonal element associated with the added
-           row.
-
-    IDXQ   (output) INTEGER array, dimension ( N )
-           This contains the permutation which will reintegrate the
-           subproblem just solved back into sorted order, i.e.
-           D( IDXQ( I = 1, N ) ) will be in ascending order.
-
-    PERM   (output) INTEGER array, dimension ( N )
-           The permutations (from deflation and sorting) to be applied
-           to each block. Not referenced if ICOMPQ = 0.
-
-    GIVPTR (output) INTEGER
-           The number of Givens rotations which took place in this
-           subproblem. Not referenced if ICOMPQ = 0.
-
-    GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
-           Each pair of numbers indicates a pair of columns to take place
-           in a Givens rotation. Not referenced if ICOMPQ = 0.
-
-    LDGCOL (input) INTEGER
-           leading dimension of GIVCOL, must be at least N.
-
-    GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
-           Each number indicates the C or S value to be used in the
-           corresponding Givens rotation. Not referenced if ICOMPQ = 0.
-
-    LDGNUM (input) INTEGER
-           The leading dimension of GIVNUM and POLES, must be at least N.
-
-    POLES  (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
-           On exit, POLES(1,*) is an array containing the new singular
-           values obtained from solving the secular equation, and
-           POLES(2,*) is an array containing the poles in the secular
-           equation. Not referenced if ICOMPQ = 0.
-
-    DIFL   (output) DOUBLE PRECISION array, dimension ( N )
-           On exit, DIFL(I) is the distance between I-th updated
-           (undeflated) singular value and the I-th (undeflated) old
-           singular value.
-
-    DIFR   (output) DOUBLE PRECISION array,
-                    dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and
-                    dimension ( N ) if ICOMPQ = 0.
-           On exit, DIFR(I, 1) is the distance between I-th updated
-           (undeflated) singular value and the I+1-th (undeflated) old
-           singular value.
-
-           If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
-           normalizing factors for the right singular vector matrix.
-
-           See DLASD8 for details on DIFL and DIFR.
-
-    Z      (output) DOUBLE PRECISION array, dimension ( M )
-           The first elements of this array contain the components
-           of the deflation-adjusted updating row vector.
-
-    K      (output) INTEGER
-           Contains the dimension of the non-deflated matrix,
-           This is the order of the related secular equation. 1 <= K <=N.
-
-    C      (output) DOUBLE PRECISION
-           C contains garbage if SQRE =0 and the C-value of a Givens
-           rotation related to the right null space if SQRE = 1.
-
-    S      (output) DOUBLE PRECISION
-           S contains garbage if SQRE =0 and the S-value of a Givens
-           rotation related to the right null space if SQRE = 1.
-
-    WORK   (workspace) DOUBLE PRECISION array, dimension ( 4 * M )
-
-    IWORK  (workspace) INTEGER array, dimension ( 3 * N )
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --vf;
-    --vl;
-    --idxq;
-    --perm;
-    givcol_dim1 = *ldgcol;
-    givcol_offset = 1 + givcol_dim1;
-    givcol -= givcol_offset;
-    poles_dim1 = *ldgnum;
-    poles_offset = 1 + poles_dim1;
-    poles -= poles_offset;
-    givnum_dim1 = *ldgnum;
-    givnum_offset = 1 + givnum_dim1;
-    givnum -= givnum_offset;
-    --difl;
-    --difr;
-    --z__;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-    n = *nl + *nr + 1;
-    m = n + *sqre;
-
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*nl < 1) {
-	*info = -2;
-    } else if (*nr < 1) {
-	*info = -3;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
-	*info = -4;
-    } else if (*ldgcol < n) {
-	*info = -14;
-    } else if (*ldgnum < n) {
-	*info = -16;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLASD6", &i__1);
-	return 0;
-    }
-
-/*
-       The following values are for bookkeeping purposes only.  They are
-       integer pointers which indicate the portion of the workspace
-       used by a particular array in DLASD7 and DLASD8.
-*/
-
-    isigma = 1;
-    iw = isigma + n;
-    ivfw = iw + m;
-    ivlw = ivfw + m;
-
-    idx = 1;
-    idxc = idx + n;
-    idxp = idxc + n;
-
-/*
-       Scale.
-
-   Computing MAX
-*/
-    d__1 = abs(*alpha), d__2 = abs(*beta);
-    orgnrm = max(d__1,d__2);
-    d__[*nl + 1] = 0.;
-    i__1 = n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((d__1 = d__[i__], abs(d__1)) > orgnrm) {
-	    orgnrm = (d__1 = d__[i__], abs(d__1));
-	}
-/* L10: */
-    }
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, &n, &c__1, &d__[1], &n,
-	    info);
-    *alpha /= orgnrm;
-    *beta /= orgnrm;
-
-/*     Sort and Deflate singular values. */
-
-    dlasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], &
-	    work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], &
-	    iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[
-	    givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s,
-	    info);
-
-/*     Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */
-
-    dlasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1],
-	    ldgnum, &work[isigma], &work[iw], info);
-
-/*     Save the poles if ICOMPQ = 1. */
-
-    if (*icompq == 1) {
-	dcopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1);
-	dcopy_(k, &work[isigma], &c__1, &poles[((poles_dim1) << (1)) + 1], &
-		c__1);
-    }
-
-/*     Unscale. */
-
-    dlascl_("G", &c__0, &c__0, &c_b2865, &orgnrm, &n, &c__1, &d__[1], &n,
-	    info);
-
-/*     Prepare the IDXQ sorting permutation. */
-
-    n1 = *k;
-    n2 = n - *k;
-    dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
-
-    return 0;
-
-/*     End of DLASD6 */
-
-} /* dlasd6_ */
-
-/* Subroutine */ int dlasd7_(integer *icompq, integer *nl, integer *nr,
-	integer *sqre, integer *k, doublereal *d__, doublereal *z__,
-	doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl,
-	doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal *
-	dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm,
-	integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum,
-	 integer *ldgnum, doublereal *c__, doublereal *s, integer *info)
-{
-    /* System generated locals */
-    integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1;
-    doublereal d__1, d__2;
-
-    /* Local variables */
-    static integer i__, j, m, n, k2;
-    static doublereal z1;
-    static integer jp;
-    static doublereal eps, tau, tol;
-    static integer nlp1, nlp2, idxi, idxj;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static integer idxjp;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer jprev;
-
-    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), xerbla_(char *, integer *);
-    static doublereal hlftol;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLASD7 merges the two sets of singular values together into a single
-    sorted set. Then it tries to deflate the size of the problem. There
-    are two ways in which deflation can occur:  when two or more singular
-    values are close together or if there is a tiny entry in the Z
-    vector. For each such occurrence the order of the related
-    secular equation problem is reduced by one.
-
-    DLASD7 is called from DLASD6.
-
-    Arguments
-    =========
-
-    ICOMPQ  (input) INTEGER
-            Specifies whether singular vectors are to be computed
-            in compact form, as follows:
-            = 0: Compute singular values only.
-            = 1: Compute singular vectors of upper
-                 bidiagonal matrix in compact form.
-
-    NL     (input) INTEGER
-           The row dimension of the upper block. NL >= 1.
-
-    NR     (input) INTEGER
-           The row dimension of the lower block. NR >= 1.
-
-    SQRE   (input) INTEGER
-           = 0: the lower block is an NR-by-NR square matrix.
-           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
-
-           The bidiagonal matrix has
-           N = NL + NR + 1 rows and
-           M = N + SQRE >= N columns.
-
-    K      (output) INTEGER
-           Contains the dimension of the non-deflated matrix, this is
-           the order of the related secular equation. 1 <= K <=N.
-
-    D      (input/output) DOUBLE PRECISION array, dimension ( N )
-           On entry D contains the singular values of the two submatrices
-           to be combined. On exit D contains the trailing (N-K) updated
-           singular values (those which were deflated) sorted into
-           increasing order.
-
-    Z      (output) DOUBLE PRECISION array, dimension ( M )
-           On exit Z contains the updating row vector in the secular
-           equation.
-
-    ZW     (workspace) DOUBLE PRECISION array, dimension ( M )
-           Workspace for Z.
-
-    VF     (input/output) DOUBLE PRECISION array, dimension ( M )
-           On entry, VF(1:NL+1) contains the first components of all
-           right singular vectors of the upper block; and VF(NL+2:M)
-           contains the first components of all right singular vectors
-           of the lower block. On exit, VF contains the first components
-           of all right singular vectors of the bidiagonal matrix.
-
-    VFW    (workspace) DOUBLE PRECISION array, dimension ( M )
-           Workspace for VF.
-
-    VL     (input/output) DOUBLE PRECISION array, dimension ( M )
-           On entry, VL(1:NL+1) contains the  last components of all
-           right singular vectors of the upper block; and VL(NL+2:M)
-           contains the last components of all right singular vectors
-           of the lower block. On exit, VL contains the last components
-           of all right singular vectors of the bidiagonal matrix.
-
-    VLW    (workspace) DOUBLE PRECISION array, dimension ( M )
-           Workspace for VL.
-
-    ALPHA  (input) DOUBLE PRECISION
-           Contains the diagonal element associated with the added row.
-
-    BETA   (input) DOUBLE PRECISION
-           Contains the off-diagonal element associated with the added
-           row.
-
-    DSIGMA (output) DOUBLE PRECISION array, dimension ( N )
-           Contains a copy of the diagonal elements (K-1 singular values
-           and one zero) in the secular equation.
-
-    IDX    (workspace) INTEGER array, dimension ( N )
-           This will contain the permutation used to sort the contents of
-           D into ascending order.
-
-    IDXP   (workspace) INTEGER array, dimension ( N )
-           This will contain the permutation used to place deflated
-           values of D at the end of the array. On output IDXP(2:K)
-           points to the nondeflated D-values and IDXP(K+1:N)
-           points to the deflated singular values.
-
-    IDXQ   (input) INTEGER array, dimension ( N )
-           This contains the permutation which separately sorts the two
-           sub-problems in D into ascending order.  Note that entries in
-           the first half of this permutation must first be moved one
-           position backward; and entries in the second half
-           must first have NL+1 added to their values.
-
-    PERM   (output) INTEGER array, dimension ( N )
-           The permutations (from deflation and sorting) to be applied
-           to each singular block. Not referenced if ICOMPQ = 0.
-
-    GIVPTR (output) INTEGER
-           The number of Givens rotations which took place in this
-           subproblem. Not referenced if ICOMPQ = 0.
-
-    GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
-           Each pair of numbers indicates a pair of columns to take place
-           in a Givens rotation. Not referenced if ICOMPQ = 0.
-
-    LDGCOL (input) INTEGER
-           The leading dimension of GIVCOL, must be at least N.
-
-    GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
-           Each number indicates the C or S value to be used in the
-           corresponding Givens rotation. Not referenced if ICOMPQ = 0.
-
-    LDGNUM (input) INTEGER
-           The leading dimension of GIVNUM, must be at least N.
-
-    C      (output) DOUBLE PRECISION
-           C contains garbage if SQRE =0 and the C-value of a Givens
-           rotation related to the right null space if SQRE = 1.
-
-    S      (output) DOUBLE PRECISION
-           S contains garbage if SQRE =0 and the S-value of a Givens
-           rotation related to the right null space if SQRE = 1.
-
-    INFO   (output) INTEGER
-           = 0:  successful exit.
-           < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --z__;
-    --zw;
-    --vf;
-    --vfw;
-    --vl;
-    --vlw;
-    --dsigma;
-    --idx;
-    --idxp;
-    --idxq;
-    --perm;
-    givcol_dim1 = *ldgcol;
-    givcol_offset = 1 + givcol_dim1;
-    givcol -= givcol_offset;
-    givnum_dim1 = *ldgnum;
-    givnum_offset = 1 + givnum_dim1;
-    givnum -= givnum_offset;
-
-    /* Function Body */
-    *info = 0;
-    n = *nl + *nr + 1;
-    m = n + *sqre;
-
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*nl < 1) {
-	*info = -2;
-    } else if (*nr < 1) {
-	*info = -3;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
-	*info = -4;
-    } else if (*ldgcol < n) {
-	*info = -22;
-    } else if (*ldgnum < n) {
-	*info = -24;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLASD7", &i__1);
-	return 0;
-    }
-
-    nlp1 = *nl + 1;
-    nlp2 = *nl + 2;
-    if (*icompq == 1) {
-	*givptr = 0;
-    }
-
-/*
-       Generate the first part of the vector Z and move the singular
-       values in the first part of D one position backward.
-*/
-
-    z1 = *alpha * vl[nlp1];
-    vl[nlp1] = 0.;
-    tau = vf[nlp1];
-    for (i__ = *nl; i__ >= 1; --i__) {
-	z__[i__ + 1] = *alpha * vl[i__];
-	vl[i__] = 0.;
-	vf[i__ + 1] = vf[i__];
-	d__[i__ + 1] = d__[i__];
-	idxq[i__ + 1] = idxq[i__] + 1;
-/* L10: */
-    }
-    vf[1] = tau;
-
-/*     Generate the second part of the vector Z. */
-
-    i__1 = m;
-    for (i__ = nlp2; i__ <= i__1; ++i__) {
-	z__[i__] = *beta * vf[i__];
-	vf[i__] = 0.;
-/* L20: */
-    }
-
-/*     Sort the singular values into increasing order */
-
-    i__1 = n;
-    for (i__ = nlp2; i__ <= i__1; ++i__) {
-	idxq[i__] += nlp1;
-/* L30: */
-    }
-
-/*     DSIGMA, IDXC, IDXC, and ZW are used as storage space. */
-
-    i__1 = n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	dsigma[i__] = d__[idxq[i__]];
-	zw[i__] = z__[idxq[i__]];
-	vfw[i__] = vf[idxq[i__]];
-	vlw[i__] = vl[idxq[i__]];
-/* L40: */
-    }
-
-    dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
-
-    i__1 = n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	idxi = idx[i__] + 1;
-	d__[i__] = dsigma[idxi];
-	z__[i__] = zw[idxi];
-	vf[i__] = vfw[idxi];
-	vl[i__] = vlw[idxi];
-/* L50: */
-    }
-
-/*     Calculate the allowable deflation tolerence */
-
-    eps = EPSILON;
-/* Computing MAX */
-    d__1 = abs(*alpha), d__2 = abs(*beta);
-    tol = max(d__1,d__2);
-/* Computing MAX */
-    d__2 = (d__1 = d__[n], abs(d__1));
-    tol = eps * 64. * max(d__2,tol);
-
-/*
-       There are 2 kinds of deflation -- first a value in the z-vector
-       is small, second two (or more) singular values are very close
-       together (their difference is small).
-
-       If the value in the z-vector is small, we simply permute the
-       array so that the corresponding singular value is moved to the
-       end.
-
-       If two values in the D-vector are close, we perform a two-sided
-       rotation designed to make one of the corresponding z-vector
-       entries zero, and then permute the array so that the deflated
-       singular value is moved to the end.
-
-       If there are multiple singular values then the problem deflates.
-       Here the number of equal singular values are found.  As each equal
-       singular value is found, an elementary reflector is computed to
-       rotate the corresponding singular subspace so that the
-       corresponding components of Z are zero in this new basis.
-*/
-
-    *k = 1;
-    k2 = n + 1;
-    i__1 = n;
-    for (j = 2; j <= i__1; ++j) {
-	if ((d__1 = z__[j], abs(d__1)) <= tol) {
-
-/*           Deflate due to small z component. */
-
-	    --k2;
-	    idxp[k2] = j;
-	    if (j == n) {
-		goto L100;
-	    }
-	} else {
-	    jprev = j;
-	    goto L70;
-	}
-/* L60: */
-    }
-L70:
-    j = jprev;
-L80:
-    ++j;
-    if (j > n) {
-	goto L90;
-    }
-    if ((d__1 = z__[j], abs(d__1)) <= tol) {
-
-/*        Deflate due to small z component. */
-
-	--k2;
-	idxp[k2] = j;
-    } else {
-
-/*        Check if singular values are close enough to allow deflation. */
-
-	if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) {
-
-/*           Deflation is possible. */
-
-	    *s = z__[jprev];
-	    *c__ = z__[j];
-
-/*
-             Find sqrt(a**2+b**2) without overflow or
-             destructive underflow.
-*/
-
-	    tau = dlapy2_(c__, s);
-	    z__[j] = tau;
-	    z__[jprev] = 0.;
-	    *c__ /= tau;
-	    *s = -(*s) / tau;
-
-/*           Record the appropriate Givens rotation */
-
-	    if (*icompq == 1) {
-		++(*givptr);
-		idxjp = idxq[idx[jprev] + 1];
-		idxj = idxq[idx[j] + 1];
-		if (idxjp <= nlp1) {
-		    --idxjp;
-		}
-		if (idxj <= nlp1) {
-		    --idxj;
-		}
-		givcol[*givptr + ((givcol_dim1) << (1))] = idxjp;
-		givcol[*givptr + givcol_dim1] = idxj;
-		givnum[*givptr + ((givnum_dim1) << (1))] = *c__;
-		givnum[*givptr + givnum_dim1] = *s;
-	    }
-	    drot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s);
-	    drot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s);
-	    --k2;
-	    idxp[k2] = jprev;
-	    jprev = j;
-	} else {
-	    ++(*k);
-	    zw[*k] = z__[jprev];
-	    dsigma[*k] = d__[jprev];
-	    idxp[*k] = jprev;
-	    jprev = j;
-	}
-    }
-    goto L80;
-L90:
-
-/*     Record the last singular value. */
-
-    ++(*k);
-    zw[*k] = z__[jprev];
-    dsigma[*k] = d__[jprev];
-    idxp[*k] = jprev;
-
-L100:
-
-/*
-       Sort the singular values into DSIGMA. The singular values which
-       were not deflated go into the first K slots of DSIGMA, except
-       that DSIGMA(1) is treated separately.
-*/
-
-    i__1 = n;
-    for (j = 2; j <= i__1; ++j) {
-	jp = idxp[j];
-	dsigma[j] = d__[jp];
-	vfw[j] = vf[jp];
-	vlw[j] = vl[jp];
-/* L110: */
-    }
-    if (*icompq == 1) {
-	i__1 = n;
-	for (j = 2; j <= i__1; ++j) {
-	    jp = idxp[j];
-	    perm[j] = idxq[idx[jp] + 1];
-	    if (perm[j] <= nlp1) {
-		--perm[j];
-	    }
-/* L120: */
-	}
-    }
-
-/*
-       The deflated singular values go back into the last N - K slots of
-       D.
-*/
-
-    i__1 = n - *k;
-    dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
-
-/*
-       Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and
-       VL(M).
-*/
-
-    dsigma[1] = 0.;
-    hlftol = tol / 2.;
-    if (abs(dsigma[2]) <= hlftol) {
-	dsigma[2] = hlftol;
-    }
-    if (m > n) {
-	z__[1] = dlapy2_(&z1, &z__[m]);
-	if (z__[1] <= tol) {
-	    *c__ = 1.;
-	    *s = 0.;
-	    z__[1] = tol;
-	} else {
-	    *c__ = z1 / z__[1];
-	    *s = -z__[m] / z__[1];
-	}
-	drot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s);
-	drot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s);
-    } else {
-	if (abs(z1) <= tol) {
-	    z__[1] = tol;
-	} else {
-	    z__[1] = z1;
-	}
-    }
-
-/*     Restore Z, VF, and VL. */
-
-    i__1 = *k - 1;
-    dcopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1);
-    i__1 = n - 1;
-    dcopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1);
-    i__1 = n - 1;
-    dcopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1);
-
-    return 0;
-
-/*     End of DLASD7 */
-
-} /* dlasd7_ */
-
-/* Subroutine */ int dlasd8_(integer *icompq, integer *k, doublereal *d__,
-	doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl,
-	doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal *
-	work, integer *info)
-{
-    /* System generated locals */
-    integer difr_dim1, difr_offset, i__1, i__2;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static integer i__, j;
-    static doublereal dj, rho;
-    static integer iwk1, iwk2, iwk3;
-    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    static doublereal temp;
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    static integer iwk2i, iwk3i;
-    static doublereal diflj, difrj, dsigj;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, integer *), dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dlaset_(char *, integer *, integer
-	    *, doublereal *, doublereal *, doublereal *, integer *),
-	    xerbla_(char *, integer *);
-    static doublereal dsigjp;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLASD8 finds the square roots of the roots of the secular equation,
-    as defined by the values in DSIGMA and Z. It makes the appropriate
-    calls to DLASD4, and stores, for each  element in D, the distance
-    to its two nearest poles (elements in DSIGMA). It also updates
-    the arrays VF and VL, the first and last components of all the
-    right singular vectors of the original bidiagonal matrix.
-
-    DLASD8 is called from DLASD6.
-
-    Arguments
-    =========
-
-    ICOMPQ  (input) INTEGER
-            Specifies whether singular vectors are to be computed in
-            factored form in the calling routine:
-            = 0: Compute singular values only.
-            = 1: Compute singular vectors in factored form as well.
-
-    K       (input) INTEGER
-            The number of terms in the rational function to be solved
-            by DLASD4.  K >= 1.
-
-    D       (output) DOUBLE PRECISION array, dimension ( K )
-            On output, D contains the updated singular values.
-
-    Z       (input) DOUBLE PRECISION array, dimension ( K )
-            The first K elements of this array contain the components
-            of the deflation-adjusted updating row vector.
-
-    VF      (input/output) DOUBLE PRECISION array, dimension ( K )
-            On entry, VF contains  information passed through DBEDE8.
-            On exit, VF contains the first K components of the first
-            components of all right singular vectors of the bidiagonal
-            matrix.
-
-    VL      (input/output) DOUBLE PRECISION array, dimension ( K )
-            On entry, VL contains  information passed through DBEDE8.
-            On exit, VL contains the first K components of the last
-            components of all right singular vectors of the bidiagonal
-            matrix.
-
-    DIFL    (output) DOUBLE PRECISION array, dimension ( K )
-            On exit, DIFL(I) = D(I) - DSIGMA(I).
-
-    DIFR    (output) DOUBLE PRECISION array,
-                     dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and
-                     dimension ( K ) if ICOMPQ = 0.
-            On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not
-            defined and will not be referenced.
-
-            If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
-            normalizing factors for the right singular vector matrix.
-
-    LDDIFR  (input) INTEGER
-            The leading dimension of DIFR, must be at least K.
-
-    DSIGMA  (input) DOUBLE PRECISION array, dimension ( K )
-            The first K elements of this array contain the old roots
-            of the deflated updating problem.  These are the poles
-            of the secular equation.
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension at least 3 * K
-
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --z__;
-    --vf;
-    --vl;
-    --difl;
-    difr_dim1 = *lddifr;
-    difr_offset = 1 + difr_dim1;
-    difr -= difr_offset;
-    --dsigma;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*k < 1) {
-	*info = -2;
-    } else if (*lddifr < *k) {
-	*info = -9;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLASD8", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*k == 1) {
-	d__[1] = abs(z__[1]);
-	difl[1] = d__[1];
-	if (*icompq == 1) {
-	    difl[2] = 1.;
-	    difr[((difr_dim1) << (1)) + 1] = 1.;
-	}
-	return 0;
-    }
-
-/*
-       Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
-       be computed with high relative accuracy (barring over/underflow).
-       This is a problem on machines without a guard digit in
-       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
-       The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
-       which on any of these machines zeros out the bottommost
-       bit of DSIGMA(I) if it is 1; this makes the subsequent
-       subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
-       occurs. On binary machines with a guard digit (almost all
-       machines) it does not change DSIGMA(I) at all. On hexadecimal
-       and decimal machines with a guard digit, it slightly
-       changes the bottommost bits of DSIGMA(I). It does not account
-       for hexadecimal or decimal machines without guard digits
-       (we know of none). We use a subroutine call to compute
-       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
-       this code.
-*/
-
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
-/* L10: */
-    }
-
-/*     Book keeping. */
-
-    iwk1 = 1;
-    iwk2 = iwk1 + *k;
-    iwk3 = iwk2 + *k;
-    iwk2i = iwk2 - 1;
-    iwk3i = iwk3 - 1;
-
-/*     Normalize Z. */
-
-    rho = dnrm2_(k, &z__[1], &c__1);
-    dlascl_("G", &c__0, &c__0, &rho, &c_b2865, k, &c__1, &z__[1], k, info);
-    rho *= rho;
-
-/*     Initialize WORK(IWK3). */
-
-    dlaset_("A", k, &c__1, &c_b2865, &c_b2865, &work[iwk3], k);
-
-/*
-       Compute the updated singular values, the arrays DIFL, DIFR,
-       and the updated Z.
-*/
-
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[
-		iwk2], info);
-
-/*        If the root finder fails, the computation is terminated. */
-
-	if (*info != 0) {
-	    return 0;
-	}
-	work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j];
-	difl[j] = -work[j];
-	difr[j + difr_dim1] = -work[j + 1];
-	i__2 = j - 1;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i +
-		    i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[
-		    j]);
-/* L20: */
-	}
-	i__2 = *k;
-	for (i__ = j + 1; i__ <= i__2; ++i__) {
-	    work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i +
-		    i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[
-		    j]);
-/* L30: */
-	}
-/* L40: */
-    }
-
-/*     Compute updated Z. */
-
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1)));
-	z__[i__] = d_sign(&d__2, &z__[i__]);
-/* L50: */
-    }
-
-/*     Update VF and VL. */
-
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	diflj = difl[j];
-	dj = d__[j];
-	dsigj = -dsigma[j];
-	if (j < *k) {
-	    difrj = -difr[j + difr_dim1];
-	    dsigjp = -dsigma[j + 1];
-	}
-	work[j] = -z__[j] / diflj / (dsigma[j] + dj);
-	i__2 = j - 1;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigj) - diflj) / (
-		    dsigma[i__] + dj);
-/* L60: */
-	}
-	i__2 = *k;
-	for (i__ = j + 1; i__ <= i__2; ++i__) {
-	    work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigjp) + difrj) /
-		    (dsigma[i__] + dj);
-/* L70: */
-	}
-	temp = dnrm2_(k, &work[1], &c__1);
-	work[iwk2i + j] = ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp;
-	work[iwk3i + j] = ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp;
-	if (*icompq == 1) {
-	    difr[j + ((difr_dim1) << (1))] = temp;
-	}
-/* L80: */
-    }
-
-    dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1);
-    dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1);
-
-    return 0;
-
-/*     End of DLASD8 */
-
-} /* dlasd8_ */
-
-/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n,
-	integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer
-	*ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr,
-	doublereal *z__, doublereal *poles, integer *givptr, integer *givcol,
-	integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__,
-	doublereal *s, doublereal *work, integer *iwork, integer *info)
-{
-    /* System generated locals */
-    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1,
-	    difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset,
-	    poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset,
-	    z_dim1, z_offset, i__1, i__2;
-
-    /* Builtin functions */
-    integer pow_ii(integer *, integer *);
-
-    /* Local variables */
-    static integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc,
-	    nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1;
-    static doublereal beta;
-    static integer idxq, nlvl;
-    static doublereal alpha;
-    static integer inode, ndiml, ndimr, idxqi, itemp;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer sqrei;
-    extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *, integer *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
-	     doublereal *, integer *, integer *);
-    static integer nwork1, nwork2;
-    extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer
-	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
-	     integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlasdt_(integer *, integer *,
-	    integer *, integer *, integer *, integer *, integer *), dlaset_(
-	    char *, integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *), xerbla_(char *, integer *);
-    static integer smlszp;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    Using a divide and conquer approach, DLASDA computes the singular
-    value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
-    B with diagonal D and offdiagonal E, where M = N + SQRE. The
-    algorithm computes the singular values in the SVD B = U * S * VT.
-    The orthogonal matrices U and VT are optionally computed in
-    compact form.
-
-    A related subroutine, DLASD0, computes the singular values and
-    the singular vectors in explicit form.
-
-    Arguments
-    =========
-
-    ICOMPQ (input) INTEGER
-           Specifies whether singular vectors are to be computed
-           in compact form, as follows
-           = 0: Compute singular values only.
-           = 1: Compute singular vectors of upper bidiagonal
-                matrix in compact form.
-
-    SMLSIZ (input) INTEGER
-           The maximum size of the subproblems at the bottom of the
-           computation tree.
-
-    N      (input) INTEGER
-           The row dimension of the upper bidiagonal matrix. This is
-           also the dimension of the main diagonal array D.
-
-    SQRE   (input) INTEGER
-           Specifies the column dimension of the bidiagonal matrix.
-           = 0: The bidiagonal matrix has column dimension M = N;
-           = 1: The bidiagonal matrix has column dimension M = N + 1.
-
-    D      (input/output) DOUBLE PRECISION array, dimension ( N )
-           On entry D contains the main diagonal of the bidiagonal
-           matrix. On exit D, if INFO = 0, contains its singular values.
-
-    E      (input) DOUBLE PRECISION array, dimension ( M-1 )
-           Contains the subdiagonal entries of the bidiagonal matrix.
-           On exit, E has been destroyed.
-
-    U      (output) DOUBLE PRECISION array,
-           dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced
-           if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left
-           singular vector matrices of all subproblems at the bottom
-           level.
-
-    LDU    (input) INTEGER, LDU = > N.
-           The leading dimension of arrays U, VT, DIFL, DIFR, POLES,
-           GIVNUM, and Z.
-
-    VT     (output) DOUBLE PRECISION array,
-           dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced
-           if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right
-           singular vector matrices of all subproblems at the bottom
-           level.
-
-    K      (output) INTEGER array,
-           dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0.
-           If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th
-           secular equation on the computation tree.
-
-    DIFL   (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ),
-           where NLVL = floor(log_2 (N/SMLSIZ))).
-
-    DIFR   (output) DOUBLE PRECISION array,
-                    dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and
-                    dimension ( N ) if ICOMPQ = 0.
-           If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1)
-           record distances between singular values on the I-th
-           level and singular values on the (I -1)-th level, and
-           DIFR(1:N, 2 * I ) contains the normalizing factors for
-           the right singular vector matrix. See DLASD8 for details.
-
-    Z      (output) DOUBLE PRECISION array,
-                    dimension ( LDU, NLVL ) if ICOMPQ = 1 and
-                    dimension ( N ) if ICOMPQ = 0.
-           The first K elements of Z(1, I) contain the components of
-           the deflation-adjusted updating row vector for subproblems
-           on the I-th level.
-
-    POLES  (output) DOUBLE PRECISION array,
-           dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced
-           if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and
-           POLES(1, 2*I) contain  the new and old singular values
-           involved in the secular equations on the I-th level.
-
-    GIVPTR (output) INTEGER array,
-           dimension ( N ) if ICOMPQ = 1, and not referenced if
-           ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records
-           the number of Givens rotations performed on the I-th
-           problem on the computation tree.
-
-    GIVCOL (output) INTEGER array,
-           dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not
-           referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
-           GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations
-           of Givens rotations performed on the I-th level on the
-           computation tree.
-
-    LDGCOL (input) INTEGER, LDGCOL = > N.
-           The leading dimension of arrays GIVCOL and PERM.
-
-    PERM   (output) INTEGER array,
-           dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced
-           if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records
-           permutations done on the I-th level of the computation tree.
-
-    GIVNUM (output) DOUBLE PRECISION array,
-           dimension ( LDU,  2 * NLVL ) if ICOMPQ = 1, and not
-           referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
-           GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S-
-           values of Givens rotations performed on the I-th level on
-           the computation tree.
-
-    C      (output) DOUBLE PRECISION array,
-           dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0.
-           If ICOMPQ = 1 and the I-th subproblem is not square, on exit,
-           C( I ) contains the C-value of a Givens rotation related to
-           the right null space of the I-th subproblem.
-
-    S      (output) DOUBLE PRECISION array, dimension ( N ) if
-           ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1
-           and the I-th subproblem is not square, on exit, S( I )
-           contains the S-value of a Givens rotation related to
-           the right null space of the I-th subproblem.
-
-    WORK   (workspace) DOUBLE PRECISION array, dimension
-           (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)).
-
-    IWORK  (workspace) INTEGER array.
-           Dimension must be at least (7 * N).
-
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    givnum_dim1 = *ldu;
-    givnum_offset = 1 + givnum_dim1;
-    givnum -= givnum_offset;
-    poles_dim1 = *ldu;
-    poles_offset = 1 + poles_dim1;
-    poles -= poles_offset;
-    z_dim1 = *ldu;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    difr_dim1 = *ldu;
-    difr_offset = 1 + difr_dim1;
-    difr -= difr_offset;
-    difl_dim1 = *ldu;
-    difl_offset = 1 + difl_dim1;
-    difl -= difl_offset;
-    vt_dim1 = *ldu;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    --k;
-    --givptr;
-    perm_dim1 = *ldgcol;
-    perm_offset = 1 + perm_dim1;
-    perm -= perm_offset;
-    givcol_dim1 = *ldgcol;
-    givcol_offset = 1 + givcol_dim1;
-    givcol -= givcol_offset;
-    --c__;
-    --s;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*smlsiz < 3) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
-	*info = -4;
-    } else if (*ldu < *n + *sqre) {
-	*info = -8;
-    } else if (*ldgcol < *n) {
-	*info = -17;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLASDA", &i__1);
-	return 0;
-    }
-
-    m = *n + *sqre;
-
-/*     If the input matrix is too small, call DLASDQ to find the SVD. */
-
-    if (*n <= *smlsiz) {
-	if (*icompq == 0) {
-	    dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
-		    vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, &
-		    work[1], info);
-	} else {
-	    dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
-		    , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1],
-		    info);
-	}
-	return 0;
-    }
-
-/*     Book-keeping and  set up the computation tree. */
-
-    inode = 1;
-    ndiml = inode + *n;
-    ndimr = ndiml + *n;
-    idxq = ndimr + *n;
-    iwk = idxq + *n;
-
-    ncc = 0;
-    nru = 0;
-
-    smlszp = *smlsiz + 1;
-    vf = 1;
-    vl = vf + m;
-    nwork1 = vl + m;
-    nwork2 = nwork1 + smlszp * smlszp;
-
-    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
-	    smlsiz);
-
-/*
-       for the nodes on bottom level of the tree, solve
-       their subproblems by DLASDQ.
-*/
-
-    ndb1 = (nd + 1) / 2;
-    i__1 = nd;
-    for (i__ = ndb1; i__ <= i__1; ++i__) {
-
-/*
-          IC : center row of each node
-          NL : number of rows of left  subproblem
-          NR : number of rows of right subproblem
-          NLF: starting row of the left   subproblem
-          NRF: starting row of the right  subproblem
-*/
-
-	i1 = i__ - 1;
-	ic = iwork[inode + i1];
-	nl = iwork[ndiml + i1];
-	nlp1 = nl + 1;
-	nr = iwork[ndimr + i1];
-	nlf = ic - nl;
-	nrf = ic + 1;
-	idxqi = idxq + nlf - 2;
-	vfi = vf + nlf - 1;
-	vli = vl + nlf - 1;
-	sqrei = 1;
-	if (*icompq == 0) {
-	    dlaset_("A", &nlp1, &nlp1, &c_b2879, &c_b2865, &work[nwork1], &
-		    smlszp);
-	    dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], &
-		    work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2],
-		    &nl, &work[nwork2], info);
-	    itemp = nwork1 + nl * smlszp;
-	    dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1);
-	    dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1);
-	} else {
-	    dlaset_("A", &nl, &nl, &c_b2879, &c_b2865, &u[nlf + u_dim1], ldu);
-	    dlaset_("A", &nlp1, &nlp1, &c_b2879, &c_b2865, &vt[nlf + vt_dim1],
-		     ldu);
-	    dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &
-		    vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf +
-		    u_dim1], ldu, &work[nwork1], info);
-	    dcopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1);
-	    dcopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1)
-		    ;
-	}
-	if (*info != 0) {
-	    return 0;
-	}
-	i__2 = nl;
-	for (j = 1; j <= i__2; ++j) {
-	    iwork[idxqi + j] = j;
-/* L10: */
-	}
-	if (i__ == nd && *sqre == 0) {
-	    sqrei = 0;
-	} else {
-	    sqrei = 1;
-	}
-	idxqi += nlp1;
-	vfi += nlp1;
-	vli += nlp1;
-	nrp1 = nr + sqrei;
-	if (*icompq == 0) {
-	    dlaset_("A", &nrp1, &nrp1, &c_b2879, &c_b2865, &work[nwork1], &
-		    smlszp);
-	    dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], &
-		    work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2],
-		    &nr, &work[nwork2], info);
-	    itemp = nwork1 + (nrp1 - 1) * smlszp;
-	    dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1);
-	    dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1);
-	} else {
-	    dlaset_("A", &nr, &nr, &c_b2879, &c_b2865, &u[nrf + u_dim1], ldu);
-	    dlaset_("A", &nrp1, &nrp1, &c_b2879, &c_b2865, &vt[nrf + vt_dim1],
-		     ldu);
-	    dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &
-		    vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf +
-		    u_dim1], ldu, &work[nwork1], info);
-	    dcopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1);
-	    dcopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1)
-		    ;
-	}
-	if (*info != 0) {
-	    return 0;
-	}
-	i__2 = nr;
-	for (j = 1; j <= i__2; ++j) {
-	    iwork[idxqi + j] = j;
-/* L20: */
-	}
-/* L30: */
-    }
-
-/*     Now conquer each subproblem bottom-up. */
-
-    j = pow_ii(&c__2, &nlvl);
-    for (lvl = nlvl; lvl >= 1; --lvl) {
-	lvl2 = ((lvl) << (1)) - 1;
-
-/*
-          Find the first node LF and last node LL on
-          the current level LVL.
-*/
-
-	if (lvl == 1) {
-	    lf = 1;
-	    ll = 1;
-	} else {
-	    i__1 = lvl - 1;
-	    lf = pow_ii(&c__2, &i__1);
-	    ll = ((lf) << (1)) - 1;
-	}
-	i__1 = ll;
-	for (i__ = lf; i__ <= i__1; ++i__) {
-	    im1 = i__ - 1;
-	    ic = iwork[inode + im1];
-	    nl = iwork[ndiml + im1];
-	    nr = iwork[ndimr + im1];
-	    nlf = ic - nl;
-	    nrf = ic + 1;
-	    if (i__ == ll) {
-		sqrei = *sqre;
-	    } else {
-		sqrei = 1;
-	    }
-	    vfi = vf + nlf - 1;
-	    vli = vl + nlf - 1;
-	    idxqi = idxq + nlf - 1;
-	    alpha = d__[ic];
-	    beta = e[ic];
-	    if (*icompq == 0) {
-		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
-			work[vli], &alpha, &beta, &iwork[idxqi], &perm[
-			perm_offset], &givptr[1], &givcol[givcol_offset],
-			ldgcol, &givnum[givnum_offset], ldu, &poles[
-			poles_offset], &difl[difl_offset], &difr[difr_offset],
-			 &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1],
-			 &iwork[iwk], info);
-	    } else {
-		--j;
-		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
-			work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf +
-			lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 *
-			givcol_dim1], ldgcol, &givnum[nlf + lvl2 *
-			givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], &
-			difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 *
-			difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j],
-			&s[j], &work[nwork1], &iwork[iwk], info);
-	    }
-	    if (*info != 0) {
-		return 0;
-	    }
-/* L40: */
-	}
-/* L50: */
-    }
-
-    return 0;
-
-/*     End of DLASDA */
-
-} /* dlasda_ */
-
-/* Subroutine */ int dlasdq_(char *uplo, integer *sqre, integer *n, integer *
-	ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e,
-	doublereal *vt, integer *ldvt, doublereal *u, integer *ldu,
-	doublereal *c__, integer *ldc, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
-	    i__2;
-
-    /* Local variables */
-    static integer i__, j;
-    static doublereal r__, cs, sn;
-    static integer np1, isub;
-    static doublereal smin;
-    static integer sqre1;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *
-	    , doublereal *, integer *);
-    static integer iuplo;
-    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *), xerbla_(char *,
-	    integer *), dbdsqr_(char *, integer *, integer *, integer
-	    *, integer *, doublereal *, doublereal *, doublereal *, integer *,
-	     doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    static logical rotate;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    DLASDQ computes the singular value decomposition (SVD) of a real
-    (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
-    E, accumulating the transformations if desired. Letting B denote
-    the input bidiagonal matrix, the algorithm computes orthogonal
-    matrices Q and P such that B = Q * S * P' (P' denotes the transpose
-    of P). The singular values S are overwritten on D.
-
-    The input matrix U  is changed to U  * Q  if desired.
-    The input matrix VT is changed to P' * VT if desired.
-    The input matrix C  is changed to Q' * C  if desired.
-
-    See "Computing  Small Singular Values of Bidiagonal Matrices With
-    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
-    LAPACK Working Note #3, for a detailed description of the algorithm.
-
-    Arguments
-    =========
-
-    UPLO  (input) CHARACTER*1
-          On entry, UPLO specifies whether the input bidiagonal matrix
-          is upper or lower bidiagonal, and wether it is square are
-          not.
-             UPLO = 'U' or 'u'   B is upper bidiagonal.
-             UPLO = 'L' or 'l'   B is lower bidiagonal.
-
-    SQRE  (input) INTEGER
-          = 0: then the input matrix is N-by-N.
-          = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and
-               (N+1)-by-N if UPLU = 'L'.
-
-          The bidiagonal matrix has
-          N = NL + NR + 1 rows and
-          M = N + SQRE >= N columns.
-
-    N     (input) INTEGER
-          On entry, N specifies the number of rows and columns
-          in the matrix. N must be at least 0.
-
-    NCVT  (input) INTEGER
-          On entry, NCVT specifies the number of columns of
-          the matrix VT. NCVT must be at least 0.
-
-    NRU   (input) INTEGER
-          On entry, NRU specifies the number of rows of
-          the matrix U. NRU must be at least 0.
-
-    NCC   (input) INTEGER
-          On entry, NCC specifies the number of columns of
-          the matrix C. NCC must be at least 0.
-
-    D     (input/output) DOUBLE PRECISION array, dimension (N)
-          On entry, D contains the diagonal entries of the
-          bidiagonal matrix whose SVD is desired. On normal exit,
-          D contains the singular values in ascending order.
-
-    E     (input/output) DOUBLE PRECISION array.
-          dimension is (N-1) if SQRE = 0 and N if SQRE = 1.
-          On entry, the entries of E contain the offdiagonal entries
-          of the bidiagonal matrix whose SVD is desired. On normal
-          exit, E will contain 0. If the algorithm does not converge,
-          D and E will contain the diagonal and superdiagonal entries
-          of a bidiagonal matrix orthogonally equivalent to the one
-          given as input.
-
-    VT    (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
-          On entry, contains a matrix which on exit has been
-          premultiplied by P', dimension N-by-NCVT if SQRE = 0
-          and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0).
-
-    LDVT  (input) INTEGER
-          On entry, LDVT specifies the leading dimension of VT as
-          declared in the calling (sub) program. LDVT must be at
-          least 1. If NCVT is nonzero LDVT must also be at least N.
-
-    U     (input/output) DOUBLE PRECISION array, dimension (LDU, N)
-          On entry, contains a  matrix which on exit has been
-          postmultiplied by Q, dimension NRU-by-N if SQRE = 0
-          and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0).
-
-    LDU   (input) INTEGER
-          On entry, LDU  specifies the leading dimension of U as
-          declared in the calling (sub) program. LDU must be at
-          least max( 1, NRU ) .
-
-    C     (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
-          On entry, contains an N-by-NCC matrix which on exit
-          has been premultiplied by Q'  dimension N-by-NCC if SQRE = 0
-          and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0).
-
-    LDC   (input) INTEGER
-          On entry, LDC  specifies the leading dimension of C as
-          declared in the calling (sub) program. LDC must be at
-          least 1. If NCC is nonzero, LDC must also be at least N.
-
-    WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)
-          Workspace. Only referenced if one of NCVT, NRU, or NCC is
-          nonzero, and if N is at least 2.
-
-    INFO  (output) INTEGER
-          On exit, a value of 0 indicates a successful exit.
-          If INFO < 0, argument number -INFO is illegal.
-          If INFO > 0, the algorithm did not converge, and INFO
-          specifies how many superdiagonals did not converge.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    iuplo = 0;
-    if (lsame_(uplo, "U")) {
-	iuplo = 1;
-    }
-    if (lsame_(uplo, "L")) {
-	iuplo = 2;
-    }
-    if (iuplo == 0) {
-	*info = -1;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*ncvt < 0) {
-	*info = -4;
-    } else if (*nru < 0) {
-	*info = -5;
-    } else if (*ncc < 0) {
-	*info = -6;
-    } else if ((*ncvt == 0 && *ldvt < 1) || (*ncvt > 0 && *ldvt < max(1,*n)))
-	    {
-	*info = -10;
-    } else if (*ldu < max(1,*nru)) {
-	*info = -12;
-    } else if ((*ncc == 0 && *ldc < 1) || (*ncc > 0 && *ldc < max(1,*n))) {
-	*info = -14;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLASDQ", &i__1);
-	return 0;
-    }
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     ROTATE is true if any singular vectors desired, false otherwise */
-
-    rotate = ((*ncvt > 0) || (*nru > 0)) || (*ncc > 0);
-    np1 = *n + 1;
-    sqre1 = *sqre;
-
-/*
-       If matrix non-square upper bidiagonal, rotate to be lower
-       bidiagonal.  The rotations are on the right.
-*/
-
-    if (iuplo == 1 && sqre1 == 1) {
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
-	    d__[i__] = r__;
-	    e[i__] = sn * d__[i__ + 1];
-	    d__[i__ + 1] = cs * d__[i__ + 1];
-	    if (rotate) {
-		work[i__] = cs;
-		work[*n + i__] = sn;
-	    }
-/* L10: */
-	}
-	dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
-	d__[*n] = r__;
-	e[*n] = 0.;
-	if (rotate) {
-	    work[*n] = cs;
-	    work[*n + *n] = sn;
-	}
-	iuplo = 2;
-	sqre1 = 0;
-
-/*        Update singular vectors if desired. */
-
-	if (*ncvt > 0) {
-	    dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[
-		    vt_offset], ldvt);
-	}
-    }
-
-/*
-       If matrix lower bidiagonal, rotate to be upper bidiagonal
-       by applying Givens rotations on the left.
-*/
-
-    if (iuplo == 2) {
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
-	    d__[i__] = r__;
-	    e[i__] = sn * d__[i__ + 1];
-	    d__[i__ + 1] = cs * d__[i__ + 1];
-	    if (rotate) {
-		work[i__] = cs;
-		work[*n + i__] = sn;
-	    }
-/* L20: */
-	}
-
-/*
-          If matrix (N+1)-by-N lower bidiagonal, one additional
-          rotation is needed.
-*/
-
-	if (sqre1 == 1) {
-	    dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
-	    d__[*n] = r__;
-	    if (rotate) {
-		work[*n] = cs;
-		work[*n + *n] = sn;
-	    }
-	}
-
-/*        Update singular vectors if desired. */
-
-	if (*nru > 0) {
-	    if (sqre1 == 0) {
-		dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[
-			u_offset], ldu);
-	    } else {
-		dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[
-			u_offset], ldu);
-	    }
-	}
-	if (*ncc > 0) {
-	    if (sqre1 == 0) {
-		dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[
-			c_offset], ldc);
-	    } else {
-		dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[
-			c_offset], ldc);
-	    }
-	}
-    }
-
-/*
-       Call DBDSQR to compute the SVD of the reduced real
-       N-by-N upper bidiagonal matrix.
-*/
-
-    dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[
-	    u_offset], ldu, &c__[c_offset], ldc, &work[1], info);
-
-/*
-       Sort the singular values into ascending order (insertion sort on
-       singular values, but only one transposition per singular vector)
-*/
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*        Scan for smallest D(I). */
-
-	isub = i__;
-	smin = d__[i__];
-	i__2 = *n;
-	for (j = i__ + 1; j <= i__2; ++j) {
-	    if (d__[j] < smin) {
-		isub = j;
-		smin = d__[j];
-	    }
-/* L30: */
-	}
-	if (isub != i__) {
-
-/*           Swap singular values and vectors. */
-
-	    d__[isub] = d__[i__];
-	    d__[i__] = smin;
-	    if (*ncvt > 0) {
-		dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1],
-			ldvt);
-	    }
-	    if (*nru > 0) {
-		dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1]
-			, &c__1);
-	    }
-	    if (*ncc > 0) {
-		dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc)
-			;
-	    }
-	}
-/* L40: */
-    }
-
-    return 0;
-
-/*     End of DLASDQ */
-
-} /* dlasdq_ */
-
-/* Subroutine */ int dlasdt_(integer *n, integer *lvl, integer *nd, integer *
-	inode, integer *ndiml, integer *ndimr, integer *msub)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Builtin functions */
-    double log(doublereal);
-
-    /* Local variables */
-    static integer i__, il, ir, maxn;
-    static doublereal temp;
-    static integer nlvl, llst, ncrnt;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLASDT creates a tree of subproblems for bidiagonal divide and
-    conquer.
-
-    Arguments
-    =========
-
-     N      (input) INTEGER
-            On entry, the number of diagonal elements of the
-            bidiagonal matrix.
-
-     LVL    (output) INTEGER
-            On exit, the number of levels on the computation tree.
-
-     ND     (output) INTEGER
-            On exit, the number of nodes on the tree.
-
-     INODE  (output) INTEGER array, dimension ( N )
-            On exit, centers of subproblems.
-
-     NDIML  (output) INTEGER array, dimension ( N )
-            On exit, row dimensions of left children.
-
-     NDIMR  (output) INTEGER array, dimension ( N )
-            On exit, row dimensions of right children.
-
-     MSUB   (input) INTEGER.
-            On entry, the maximum row dimension each subproblem at the
-            bottom of the tree can be of.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Find the number of levels on the tree.
-*/
-
-    /* Parameter adjustments */
-    --ndimr;
-    --ndiml;
-    --inode;
-
-    /* Function Body */
-    maxn = max(1,*n);
-    temp = log((doublereal) maxn / (doublereal) (*msub + 1)) / log(2.);
-    *lvl = (integer) temp + 1;
-
-    i__ = *n / 2;
-    inode[1] = i__ + 1;
-    ndiml[1] = i__;
-    ndimr[1] = *n - i__ - 1;
-    il = 0;
-    ir = 1;
-    llst = 1;
-    i__1 = *lvl - 1;
-    for (nlvl = 1; nlvl <= i__1; ++nlvl) {
-
-/*
-          Constructing the tree at (NLVL+1)-st level. The number of
-          nodes created on this level is LLST * 2.
-*/
-
-	i__2 = llst - 1;
-	for (i__ = 0; i__ <= i__2; ++i__) {
-	    il += 2;
-	    ir += 2;
-	    ncrnt = llst + i__;
-	    ndiml[il] = ndiml[ncrnt] / 2;
-	    ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1;
-	    inode[il] = inode[ncrnt] - ndimr[il] - 1;
-	    ndiml[ir] = ndimr[ncrnt] / 2;
-	    ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1;
-	    inode[ir] = inode[ncrnt] + ndiml[ir] + 1;
-/* L10: */
-	}
-	llst <<= 1;
-/* L20: */
-    }
-    *nd = ((llst) << (1)) - 1;
-
-    return 0;
-
-/*     End of DLASDT */
-
-} /* dlasdt_ */
-
-/* Subroutine */ int dlaset_(char *uplo, integer *m, integer *n, doublereal *
-	alpha, doublereal *beta, doublereal *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j;
-    extern logical lsame_(char *, char *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLASET initializes an m-by-n matrix A to BETA on the diagonal and
-    ALPHA on the offdiagonals.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies the part of the matrix A to be set.
-            = 'U':      Upper triangular part is set; the strictly lower
-                        triangular part of A is not changed.
-            = 'L':      Lower triangular part is set; the strictly upper
-                        triangular part of A is not changed.
-            Otherwise:  All of the matrix A is set.
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
-
-    ALPHA   (input) DOUBLE PRECISION
-            The constant to which the offdiagonal elements are to be set.
-
-    BETA    (input) DOUBLE PRECISION
-            The constant to which the diagonal elements are to be set.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On exit, the leading m-by-n submatrix of A is set as follows:
-
-            if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n,
-            if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n,
-            otherwise,     A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j,
-
-            and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n).
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-   =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    if (lsame_(uplo, "U")) {
-
-/*
-          Set the strictly upper triangular or trapezoidal part of the
-          array to ALPHA.
-*/
-
-	i__1 = *n;
-	for (j = 2; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = j - 1;
-	    i__2 = min(i__3,*m);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] = *alpha;
-/* L10: */
-	    }
-/* L20: */
-	}
-
-    } else if (lsame_(uplo, "L")) {
-
-/*
-          Set the strictly lower triangular or trapezoidal part of the
-          array to ALPHA.
-*/
-
-	i__1 = min(*m,*n);
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = j + 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] = *alpha;
-/* L30: */
-	    }
-/* L40: */
-	}
-
-    } else {
-
-/*        Set the leading m-by-n submatrix to ALPHA. */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] = *alpha;
-/* L50: */
-	    }
-/* L60: */
-	}
-    }
-
-/*     Set the first min(M,N) diagonal elements to BETA. */
-
-    i__1 = min(*m,*n);
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	a[i__ + i__ * a_dim1] = *beta;
-/* L70: */
-    }
-
-    return 0;
-
-/*     End of DLASET */
-
-} /* dlaset_ */
-
-/* Subroutine */ int dlasq1_(integer *n, doublereal *d__, doublereal *e,
-	doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    doublereal d__1, d__2, d__3;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__;
-    static doublereal eps;
-    extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal
-	    *, doublereal *, doublereal *);
-    static doublereal scale;
-    static integer iinfo;
-    static doublereal sigmn;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static doublereal sigmx;
-    extern /* Subroutine */ int dlasq2_(integer *, doublereal *, integer *);
-
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *);
-    static doublereal safmin;
-    extern /* Subroutine */ int xerbla_(char *, integer *), dlasrt_(
-	    char *, integer *, doublereal *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    DLASQ1 computes the singular values of a real N-by-N bidiagonal
-    matrix with diagonal D and off-diagonal E. The singular values
-    are computed to high relative accuracy, in the absence of
-    denormalization, underflow and overflow. The algorithm was first
-    presented in
-
-    "Accurate singular values and differential qd algorithms" by K. V.
-    Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230,
-    1994,
-
-    and the present implementation is described in "An implementation of
-    the dqds Algorithm (Positive Case)", LAPACK Working Note.
-
-    Arguments
-    =========
-
-    N     (input) INTEGER
-          The number of rows and columns in the matrix. N >= 0.
-
-    D     (input/output) DOUBLE PRECISION array, dimension (N)
-          On entry, D contains the diagonal elements of the
-          bidiagonal matrix whose SVD is desired. On normal exit,
-          D contains the singular values in decreasing order.
-
-    E     (input/output) DOUBLE PRECISION array, dimension (N)
-          On entry, elements E(1:N-1) contain the off-diagonal elements
-          of the bidiagonal matrix whose SVD is desired.
-          On exit, E is overwritten.
-
-    WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)
-
-    INFO  (output) INTEGER
-          = 0: successful exit
-          < 0: if INFO = -i, the i-th argument had an illegal value
-          > 0: the algorithm failed
-               = 1, a split was marked by a positive value in E
-               = 2, current block of Z not diagonalized after 30*N
-                    iterations (in inner while loop)
-               = 3, termination criterion of outer while loop not met
-                    (program created more than N unreduced blocks)
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --work;
-    --e;
-    --d__;
-
-    /* Function Body */
-    *info = 0;
-    if (*n < 0) {
-	*info = -2;
-	i__1 = -(*info);
-	xerbla_("DLASQ1", &i__1);
-	return 0;
-    } else if (*n == 0) {
-	return 0;
-    } else if (*n == 1) {
-	d__[1] = abs(d__[1]);
-	return 0;
-    } else if (*n == 2) {
-	dlas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx);
-	d__[1] = sigmx;
-	d__[2] = sigmn;
-	return 0;
-    }
-
-/*     Estimate the largest singular value. */
-
-    sigmx = 0.;
-    i__1 = *n - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	d__[i__] = (d__1 = d__[i__], abs(d__1));
-/* Computing MAX */
-	d__2 = sigmx, d__3 = (d__1 = e[i__], abs(d__1));
-	sigmx = max(d__2,d__3);
-/* L10: */
-    }
-    d__[*n] = (d__1 = d__[*n], abs(d__1));
-
-/*     Early return if SIGMX is zero (matrix is already diagonal). */
-
-    if (sigmx == 0.) {
-	dlasrt_("D", n, &d__[1], &iinfo);
-	return 0;
-    }
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	d__1 = sigmx, d__2 = d__[i__];
-	sigmx = max(d__1,d__2);
-/* L20: */
-    }
-
-/*
-       Copy D and E into WORK (in the Z format) and scale (squaring the
-       input data makes scaling by a power of the radix pointless).
-*/
-
-    eps = PRECISION;
-    safmin = SAFEMINIMUM;
-    scale = sqrt(eps / safmin);
-    dcopy_(n, &d__[1], &c__1, &work[1], &c__2);
-    i__1 = *n - 1;
-    dcopy_(&i__1, &e[1], &c__1, &work[2], &c__2);
-    i__1 = ((*n) << (1)) - 1;
-    i__2 = ((*n) << (1)) - 1;
-    dlascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2,
-	    &iinfo);
-
-/*     Compute the q's and e's. */
-
-    i__1 = ((*n) << (1)) - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing 2nd power */
-	d__1 = work[i__];
-	work[i__] = d__1 * d__1;
-/* L30: */
-    }
-    work[*n * 2] = 0.;
-
-    dlasq2_(n, &work[1], info);
-
-    if (*info == 0) {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    d__[i__] = sqrt(work[i__]);
-/* L40: */
-	}
-	dlascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, &
-		iinfo);
-    }
-
-    return 0;
-
-/*     End of DLASQ1 */
-
-} /* dlasq1_ */
-
-/* Subroutine */ int dlasq2_(integer *n, doublereal *z__, integer *info)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal d__, e;
-    static integer k;
-    static doublereal s, t;
-    static integer i0, i4, n0, pp;
-    static doublereal eps, tol;
-    static integer ipn4;
-    static doublereal tol2;
-    static logical ieee;
-    static integer nbig;
-    static doublereal dmin__, emin, emax;
-    static integer ndiv, iter;
-    static doublereal qmin, temp, qmax, zmax;
-    static integer splt, nfail;
-    static doublereal desig, trace, sigma;
-    static integer iinfo;
-    extern /* Subroutine */ int dlasq3_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     integer *, integer *, integer *, logical *);
-
-    static integer iwhila, iwhilb;
-    static doublereal oldemn, safmin;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
-	    integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    DLASQ2 computes all the eigenvalues of the symmetric positive
-    definite tridiagonal matrix associated with the qd array Z to high
-    relative accuracy are computed to high relative accuracy, in the
-    absence of denormalization, underflow and overflow.
-
-    To see the relation of Z to the tridiagonal matrix, let L be a
-    unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and
-    let U be an upper bidiagonal matrix with 1's above and diagonal
-    Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
-    symmetric tridiagonal to which it is similar.
-
-    Note : DLASQ2 defines a logical variable, IEEE, which is true
-    on machines which follow ieee-754 floating-point standard in their
-    handling of infinities and NaNs, and false otherwise. This variable
-    is passed to DLASQ3.
-
-    Arguments
-    =========
-
-    N     (input) INTEGER
-          The number of rows and columns in the matrix. N >= 0.
-
-    Z     (workspace) DOUBLE PRECISION array, dimension ( 4*N )
-          On entry Z holds the qd array. On exit, entries 1 to N hold
-          the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
-          trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
-          N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 )
-          holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of
-          shifts that failed.
-
-    INFO  (output) INTEGER
-          = 0: successful exit
-          < 0: if the i-th argument is a scalar and had an illegal
-               value, then INFO = -i, if the i-th argument is an
-               array and the j-entry had an illegal value, then
-               INFO = -(i*100+j)
-          > 0: the algorithm failed
-                = 1, a split was marked by a positive value in E
-                = 2, current block of Z not diagonalized after 30*N
-                     iterations (in inner while loop)
-                = 3, termination criterion of outer while loop not met
-                     (program created more than N unreduced blocks)
-
-    Further Details
-    ===============
-    Local Variables: I0:N0 defines a current unreduced segment of Z.
-    The shifts are accumulated in SIGMA. Iteration count is in ITER.
-    Ping-pong is controlled by PP (alternates between 0 and 1).
-
-    =====================================================================
-
-
-       Test the input arguments.
-       (in case DLASQ2 is not called by DLASQ1)
-*/
-
-    /* Parameter adjustments */
-    --z__;
-
-    /* Function Body */
-    *info = 0;
-    eps = PRECISION;
-    safmin = SAFEMINIMUM;
-    tol = eps * 100.;
-/* Computing 2nd power */
-    d__1 = tol;
-    tol2 = d__1 * d__1;
-
-    if (*n < 0) {
-	*info = -1;
-	xerbla_("DLASQ2", &c__1);
-	return 0;
-    } else if (*n == 0) {
-	return 0;
-    } else if (*n == 1) {
-
-/*        1-by-1 case. */
-
-	if (z__[1] < 0.) {
-	    *info = -201;
-	    xerbla_("DLASQ2", &c__2);
-	}
-	return 0;
-    } else if (*n == 2) {
-
-/*        2-by-2 case. */
-
-	if ((z__[2] < 0.) || (z__[3] < 0.)) {
-	    *info = -2;
-	    xerbla_("DLASQ2", &c__2);
-	    return 0;
-	} else if (z__[3] > z__[1]) {
-	    d__ = z__[3];
-	    z__[3] = z__[1];
-	    z__[1] = d__;
-	}
-	z__[5] = z__[1] + z__[2] + z__[3];
-	if (z__[2] > z__[3] * tol2) {
-	    t = (z__[1] - z__[3] + z__[2]) * .5;
-	    s = z__[3] * (z__[2] / t);
-	    if (s <= t) {
-		s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.)));
-	    } else {
-		s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s)));
-	    }
-	    t = z__[1] + (s + z__[2]);
-	    z__[3] *= z__[1] / t;
-	    z__[1] = t;
-	}
-	z__[2] = z__[3];
-	z__[6] = z__[2] + z__[1];
-	return 0;
-    }
-
-/*     Check for negative data and compute sums of q's and e's. */
-
-    z__[*n * 2] = 0.;
-    emin = z__[2];
-    qmax = 0.;
-    zmax = 0.;
-    d__ = 0.;
-    e = 0.;
-
-    i__1 = (*n - 1) << (1);
-    for (k = 1; k <= i__1; k += 2) {
-	if (z__[k] < 0.) {
-	    *info = -(k + 200);
-	    xerbla_("DLASQ2", &c__2);
-	    return 0;
-	} else if (z__[k + 1] < 0.) {
-	    *info = -(k + 201);
-	    xerbla_("DLASQ2", &c__2);
-	    return 0;
-	}
-	d__ += z__[k];
-	e += z__[k + 1];
-/* Computing MAX */
-	d__1 = qmax, d__2 = z__[k];
-	qmax = max(d__1,d__2);
-/* Computing MIN */
-	d__1 = emin, d__2 = z__[k + 1];
-	emin = min(d__1,d__2);
-/* Computing MAX */
-	d__1 = max(qmax,zmax), d__2 = z__[k + 1];
-	zmax = max(d__1,d__2);
-/* L10: */
-    }
-    if (z__[((*n) << (1)) - 1] < 0.) {
-	*info = -(((*n) << (1)) + 199);
-	xerbla_("DLASQ2", &c__2);
-	return 0;
-    }
-    d__ += z__[((*n) << (1)) - 1];
-/* Computing MAX */
-    d__1 = qmax, d__2 = z__[((*n) << (1)) - 1];
-    qmax = max(d__1,d__2);
-    zmax = max(qmax,zmax);
-
-/*     Check for diagonality. */
-
-    if (e == 0.) {
-	i__1 = *n;
-	for (k = 2; k <= i__1; ++k) {
-	    z__[k] = z__[((k) << (1)) - 1];
-/* L20: */
-	}
-	dlasrt_("D", n, &z__[1], &iinfo);
-	z__[((*n) << (1)) - 1] = d__;
-	return 0;
-    }
-
-    trace = d__ + e;
-
-/*     Check for zero data. */
-
-    if (trace == 0.) {
-	z__[((*n) << (1)) - 1] = 0.;
-	return 0;
-    }
-
-/*     Check whether the machine is IEEE conformable. */
-
-    ieee = ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)
-	    6, (ftnlen)1) == 1 && ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2,
-	     &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1;
-
-/*     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
-
-    for (k = (*n) << (1); k >= 2; k += -2) {
-	z__[k * 2] = 0.;
-	z__[((k) << (1)) - 1] = z__[k];
-	z__[((k) << (1)) - 2] = 0.;
-	z__[((k) << (1)) - 3] = z__[k - 1];
-/* L30: */
-    }
-
-    i0 = 1;
-    n0 = *n;
-
-/*     Reverse the qd-array, if warranted. */
-
-    if (z__[((i0) << (2)) - 3] * 1.5 < z__[((n0) << (2)) - 3]) {
-	ipn4 = (i0 + n0) << (2);
-	i__1 = (i0 + n0 - 1) << (1);
-	for (i4 = (i0) << (2); i4 <= i__1; i4 += 4) {
-	    temp = z__[i4 - 3];
-	    z__[i4 - 3] = z__[ipn4 - i4 - 3];
-	    z__[ipn4 - i4 - 3] = temp;
-	    temp = z__[i4 - 1];
-	    z__[i4 - 1] = z__[ipn4 - i4 - 5];
-	    z__[ipn4 - i4 - 5] = temp;
-/* L40: */
-	}
-    }
-
-/*     Initial split checking via dqd and Li's test. */
-
-    pp = 0;
-
-    for (k = 1; k <= 2; ++k) {
-
-	d__ = z__[((n0) << (2)) + pp - 3];
-	i__1 = ((i0) << (2)) + pp;
-	for (i4 = ((n0 - 1) << (2)) + pp; i4 >= i__1; i4 += -4) {
-	    if (z__[i4 - 1] <= tol2 * d__) {
-		z__[i4 - 1] = -0.;
-		d__ = z__[i4 - 3];
-	    } else {
-		d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
-	    }
-/* L50: */
-	}
-
-/*        dqd maps Z to ZZ plus Li's test. */
-
-	emin = z__[((i0) << (2)) + pp + 1];
-	d__ = z__[((i0) << (2)) + pp - 3];
-	i__1 = ((n0 - 1) << (2)) + pp;
-	for (i4 = ((i0) << (2)) + pp; i4 <= i__1; i4 += 4) {
-	    z__[i4 - ((pp) << (1)) - 2] = d__ + z__[i4 - 1];
-	    if (z__[i4 - 1] <= tol2 * d__) {
-		z__[i4 - 1] = -0.;
-		z__[i4 - ((pp) << (1)) - 2] = d__;
-		z__[i4 - ((pp) << (1))] = 0.;
-		d__ = z__[i4 + 1];
-	    } else if (safmin * z__[i4 + 1] < z__[i4 - ((pp) << (1)) - 2] &&
-		    safmin * z__[i4 - ((pp) << (1)) - 2] < z__[i4 + 1]) {
-		temp = z__[i4 + 1] / z__[i4 - ((pp) << (1)) - 2];
-		z__[i4 - ((pp) << (1))] = z__[i4 - 1] * temp;
-		d__ *= temp;
-	    } else {
-		z__[i4 - ((pp) << (1))] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4
-			- ((pp) << (1)) - 2]);
-		d__ = z__[i4 + 1] * (d__ / z__[i4 - ((pp) << (1)) - 2]);
-	    }
-/* Computing MIN */
-	    d__1 = emin, d__2 = z__[i4 - ((pp) << (1))];
-	    emin = min(d__1,d__2);
-/* L60: */
-	}
-	z__[((n0) << (2)) - pp - 2] = d__;
-
-/*        Now find qmax. */
-
-	qmax = z__[((i0) << (2)) - pp - 2];
-	i__1 = ((n0) << (2)) - pp - 2;
-	for (i4 = ((i0) << (2)) - pp + 2; i4 <= i__1; i4 += 4) {
-/* Computing MAX */
-	    d__1 = qmax, d__2 = z__[i4];
-	    qmax = max(d__1,d__2);
-/* L70: */
-	}
-
-/*        Prepare for the next iteration on K. */
-
-	pp = 1 - pp;
-/* L80: */
-    }
-
-    iter = 2;
-    nfail = 0;
-    ndiv = (n0 - i0) << (1);
-
-    i__1 = *n + 1;
-    for (iwhila = 1; iwhila <= i__1; ++iwhila) {
-	if (n0 < 1) {
-	    goto L150;
-	}
-
-/*
-          While array unfinished do
-
-          E(N0) holds the value of SIGMA when submatrix in I0:N0
-          splits from the rest of the array, but is negated.
-*/
-
-	desig = 0.;
-	if (n0 == *n) {
-	    sigma = 0.;
-	} else {
-	    sigma = -z__[((n0) << (2)) - 1];
-	}
-	if (sigma < 0.) {
-	    *info = 1;
-	    return 0;
-	}
-
-/*
-          Find last unreduced submatrix's top index I0, find QMAX and
-          EMIN. Find Gershgorin-type bound if Q's much greater than E's.
-*/
-
-	emax = 0.;
-	if (n0 > i0) {
-	    emin = (d__1 = z__[((n0) << (2)) - 5], abs(d__1));
-	} else {
-	    emin = 0.;
-	}
-	qmin = z__[((n0) << (2)) - 3];
-	qmax = qmin;
-	for (i4 = (n0) << (2); i4 >= 8; i4 += -4) {
-	    if (z__[i4 - 5] <= 0.) {
-		goto L100;
-	    }
-	    if (qmin >= emax * 4.) {
-/* Computing MIN */
-		d__1 = qmin, d__2 = z__[i4 - 3];
-		qmin = min(d__1,d__2);
-/* Computing MAX */
-		d__1 = emax, d__2 = z__[i4 - 5];
-		emax = max(d__1,d__2);
-	    }
-/* Computing MAX */
-	    d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5];
-	    qmax = max(d__1,d__2);
-/* Computing MIN */
-	    d__1 = emin, d__2 = z__[i4 - 5];
-	    emin = min(d__1,d__2);
-/* L90: */
-	}
-	i4 = 4;
-
-L100:
-	i0 = i4 / 4;
-
-/*        Store EMIN for passing to DLASQ3. */
-
-	z__[((n0) << (2)) - 1] = emin;
-
-/*
-          Put -(initial shift) into DMIN.
-
-   Computing MAX
-*/
-	d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax);
-	dmin__ = -max(d__1,d__2);
-
-/*        Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */
-
-	pp = 0;
-
-	nbig = (n0 - i0 + 1) * 30;
-	i__2 = nbig;
-	for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
-	    if (i0 > n0) {
-		goto L130;
-	    }
-
-/*           While submatrix unfinished take a good dqds step. */
-
-	    dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
-		    nfail, &iter, &ndiv, &ieee);
-
-	    pp = 1 - pp;
-
-/*           When EMIN is very small check for splits. */
-
-	    if (pp == 0 && n0 - i0 >= 3) {
-		if ((z__[n0 * 4] <= tol2 * qmax) || (z__[((n0) << (2)) - 1] <=
-			 tol2 * sigma)) {
-		    splt = i0 - 1;
-		    qmax = z__[((i0) << (2)) - 3];
-		    emin = z__[((i0) << (2)) - 1];
-		    oldemn = z__[i0 * 4];
-		    i__3 = (n0 - 3) << (2);
-		    for (i4 = (i0) << (2); i4 <= i__3; i4 += 4) {
-			if ((z__[i4] <= tol2 * z__[i4 - 3]) || (z__[i4 - 1] <=
-				 tol2 * sigma)) {
-			    z__[i4 - 1] = -sigma;
-			    splt = i4 / 4;
-			    qmax = 0.;
-			    emin = z__[i4 + 3];
-			    oldemn = z__[i4 + 4];
-			} else {
-/* Computing MAX */
-			    d__1 = qmax, d__2 = z__[i4 + 1];
-			    qmax = max(d__1,d__2);
-/* Computing MIN */
-			    d__1 = emin, d__2 = z__[i4 - 1];
-			    emin = min(d__1,d__2);
-/* Computing MIN */
-			    d__1 = oldemn, d__2 = z__[i4];
-			    oldemn = min(d__1,d__2);
-			}
-/* L110: */
-		    }
-		    z__[((n0) << (2)) - 1] = emin;
-		    z__[n0 * 4] = oldemn;
-		    i0 = splt + 1;
-		}
-	    }
-
-/* L120: */
-	}
-
-	*info = 2;
-	return 0;
-
-/*        end IWHILB */
-
-L130:
-
-/* L140: */
-	;
-    }
-
-    *info = 3;
-    return 0;
-
-/*     end IWHILA */
-
-L150:
-
-/*     Move q's to the front. */
-
-    i__1 = *n;
-    for (k = 2; k <= i__1; ++k) {
-	z__[k] = z__[((k) << (2)) - 3];
-/* L160: */
-    }
-
-/*     Sort and compute sum of eigenvalues. */
-
-    dlasrt_("D", n, &z__[1], &iinfo);
-
-    e = 0.;
-    for (k = *n; k >= 1; --k) {
-	e += z__[k];
-/* L170: */
-    }
-
-/*     Store trace, sum(eigenvalues) and information on performance. */
-
-    z__[((*n) << (1)) + 1] = trace;
-    z__[((*n) << (1)) + 2] = e;
-    z__[((*n) << (1)) + 3] = (doublereal) iter;
-/* Computing 2nd power */
-    i__1 = *n;
-    z__[((*n) << (1)) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1);
-    z__[((*n) << (1)) + 5] = nfail * 100. / (doublereal) iter;
-    return 0;
-
-/*     End of DLASQ2 */
-
-} /* dlasq2_ */
-
-/* Subroutine */ int dlasq3_(integer *i0, integer *n0, doublereal *z__,
-	integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig,
-	 doublereal *qmax, integer *nfail, integer *iter, integer *ndiv,
-	logical *ieee)
-{
-    /* Initialized data */
-
-    static integer ttype = 0;
-    static doublereal dmin1 = 0.;
-    static doublereal dmin2 = 0.;
-    static doublereal dn = 0.;
-    static doublereal dn1 = 0.;
-    static doublereal dn2 = 0.;
-    static doublereal tau = 0.;
-
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal s, t;
-    static integer j4, nn;
-    static doublereal eps, tol;
-    static integer n0in, ipn4;
-    static doublereal tol2, temp;
-    extern /* Subroutine */ int dlasq4_(integer *, integer *, doublereal *,
-	    integer *, integer *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *)
-	    , dlasq5_(integer *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, logical *), dlasq6_(
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *);
-
-    static doublereal safmin;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       May 17, 2000
-
-
-    Purpose
-    =======
-
-    DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds.
-    In case of failure it changes shifts, and tries again until output
-    is positive.
-
-    Arguments
-    =========
-
-    I0     (input) INTEGER
-           First index.
-
-    N0     (input) INTEGER
-           Last index.
-
-    Z      (input) DOUBLE PRECISION array, dimension ( 4*N )
-           Z holds the qd array.
-
-    PP     (input) INTEGER
-           PP=0 for ping, PP=1 for pong.
-
-    DMIN   (output) DOUBLE PRECISION
-           Minimum value of d.
-
-    SIGMA  (output) DOUBLE PRECISION
-           Sum of shifts used in current segment.
-
-    DESIG  (input/output) DOUBLE PRECISION
-           Lower order part of SIGMA
-
-    QMAX   (input) DOUBLE PRECISION
-           Maximum value of q.
-
-    NFAIL  (output) INTEGER
-           Number of times shift was too big.
-
-    ITER   (output) INTEGER
-           Number of iterations.
-
-    NDIV   (output) INTEGER
-           Number of divisions.
-
-    TTYPE  (output) INTEGER
-           Shift type.
-
-    IEEE   (input) LOGICAL
-           Flag for IEEE or non IEEE arithmetic (passed to DLASQ5).
-
-    =====================================================================
-*/
-
-    /* Parameter adjustments */
-    --z__;
-
-    /* Function Body */
-
-    n0in = *n0;
-    eps = PRECISION;
-    safmin = SAFEMINIMUM;
-    tol = eps * 100.;
-/* Computing 2nd power */
-    d__1 = tol;
-    tol2 = d__1 * d__1;
-
-/*     Check for deflation. */
-
-L10:
-
-    if (*n0 < *i0) {
-	return 0;
-    }
-    if (*n0 == *i0) {
-	goto L20;
-    }
-    nn = ((*n0) << (2)) + *pp;
-    if (*n0 == *i0 + 1) {
-	goto L40;
-    }
-
-/*     Check whether E(N0-1) is negligible, 1 eigenvalue. */
-
-    if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - ((*pp) << (1))
-	     - 4] > tol2 * z__[nn - 7]) {
-	goto L30;
-    }
-
-L20:
-
-    z__[((*n0) << (2)) - 3] = z__[((*n0) << (2)) + *pp - 3] + *sigma;
-    --(*n0);
-    goto L10;
-
-/*     Check  whether E(N0-2) is negligible, 2 eigenvalues. */
-
-L30:
-
-    if (z__[nn - 9] > tol2 * *sigma && z__[nn - ((*pp) << (1)) - 8] > tol2 *
-	    z__[nn - 11]) {
-	goto L50;
-    }
-
-L40:
-
-    if (z__[nn - 3] > z__[nn - 7]) {
-	s = z__[nn - 3];
-	z__[nn - 3] = z__[nn - 7];
-	z__[nn - 7] = s;
-    }
-    if (z__[nn - 5] > z__[nn - 3] * tol2) {
-	t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
-	s = z__[nn - 3] * (z__[nn - 5] / t);
-	if (s <= t) {
-	    s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.)));
-	} else {
-	    s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s)));
-	}
-	t = z__[nn - 7] + (s + z__[nn - 5]);
-	z__[nn - 3] *= z__[nn - 7] / t;
-	z__[nn - 7] = t;
-    }
-    z__[((*n0) << (2)) - 7] = z__[nn - 7] + *sigma;
-    z__[((*n0) << (2)) - 3] = z__[nn - 3] + *sigma;
-    *n0 += -2;
-    goto L10;
-
-L50:
-
-/*     Reverse the qd-array, if warranted. */
-
-    if ((*dmin__ <= 0.) || (*n0 < n0in)) {
-	if (z__[((*i0) << (2)) + *pp - 3] * 1.5 < z__[((*n0) << (2)) + *pp -
-		3]) {
-	    ipn4 = (*i0 + *n0) << (2);
-	    i__1 = (*i0 + *n0 - 1) << (1);
-	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
-		temp = z__[j4 - 3];
-		z__[j4 - 3] = z__[ipn4 - j4 - 3];
-		z__[ipn4 - j4 - 3] = temp;
-		temp = z__[j4 - 2];
-		z__[j4 - 2] = z__[ipn4 - j4 - 2];
-		z__[ipn4 - j4 - 2] = temp;
-		temp = z__[j4 - 1];
-		z__[j4 - 1] = z__[ipn4 - j4 - 5];
-		z__[ipn4 - j4 - 5] = temp;
-		temp = z__[j4];
-		z__[j4] = z__[ipn4 - j4 - 4];
-		z__[ipn4 - j4 - 4] = temp;
-/* L60: */
-	    }
-	    if (*n0 - *i0 <= 4) {
-		z__[((*n0) << (2)) + *pp - 1] = z__[((*i0) << (2)) + *pp - 1];
-		z__[((*n0) << (2)) - *pp] = z__[((*i0) << (2)) - *pp];
-	    }
-/* Computing MIN */
-	    d__1 = dmin2, d__2 = z__[((*n0) << (2)) + *pp - 1];
-	    dmin2 = min(d__1,d__2);
-/* Computing MIN */
-	    d__1 = z__[((*n0) << (2)) + *pp - 1], d__2 = z__[((*i0) << (2)) +
-		    *pp - 1], d__1 = min(d__1,d__2), d__2 = z__[((*i0) << (2))
-		     + *pp + 3];
-	    z__[((*n0) << (2)) + *pp - 1] = min(d__1,d__2);
-/* Computing MIN */
-	    d__1 = z__[((*n0) << (2)) - *pp], d__2 = z__[((*i0) << (2)) - *pp]
-		    , d__1 = min(d__1,d__2), d__2 = z__[((*i0) << (2)) - *pp
-		    + 4];
-	    z__[((*n0) << (2)) - *pp] = min(d__1,d__2);
-/* Computing MAX */
-	    d__1 = *qmax, d__2 = z__[((*i0) << (2)) + *pp - 3], d__1 = max(
-		    d__1,d__2), d__2 = z__[((*i0) << (2)) + *pp + 1];
-	    *qmax = max(d__1,d__2);
-	    *dmin__ = -0.;
-	}
-    }
-
-/*
-   L70:
-
-   Computing MIN
-*/
-    d__1 = z__[((*n0) << (2)) + *pp - 1], d__2 = z__[((*n0) << (2)) + *pp - 9]
-	    , d__1 = min(d__1,d__2), d__2 = dmin2 + z__[((*n0) << (2)) - *pp];
-    if ((*dmin__ < 0.) || (safmin * *qmax < min(d__1,d__2))) {
-
-/*        Choose a shift. */
-
-	dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1,
-		 &dn2, &tau, &ttype);
-
-/*        Call dqds until DMIN > 0. */
-
-L80:
-
-	dlasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1,
-		&dn2, ieee);
-
-	*ndiv += *n0 - *i0 + 2;
-	++(*iter);
-
-/*        Check status. */
-
-	if (*dmin__ >= 0. && dmin1 > 0.) {
-
-/*           Success. */
-
-	    goto L100;
-
-	} else if (*dmin__ < 0. && dmin1 > 0. && z__[((*n0 - 1) << (2)) - *pp]
-		 < tol * (*sigma + dn1) && abs(dn) < tol * *sigma) {
-
-/*           Convergence hidden by negative DN. */
-
-	    z__[((*n0 - 1) << (2)) - *pp + 2] = 0.;
-	    *dmin__ = 0.;
-	    goto L100;
-	} else if (*dmin__ < 0.) {
-
-/*           TAU too big. Select new TAU and try again. */
-
-	    ++(*nfail);
-	    if (ttype < -22) {
-
-/*              Failed twice. Play it safe. */
-
-		tau = 0.;
-	    } else if (dmin1 > 0.) {
-
-/*              Late failure. Gives excellent shift. */
-
-		tau = (tau + *dmin__) * (1. - eps * 2.);
-		ttype += -11;
-	    } else {
-
-/*              Early failure. Divide by 4. */
-
-		tau *= .25;
-		ttype += -12;
-	    }
-	    goto L80;
-	} else if (*dmin__ != *dmin__) {
-
-/*           NaN. */
-
-	    tau = 0.;
-	    goto L80;
-	} else {
-
-/*           Possible underflow. Play it safe. */
-
-	    goto L90;
-	}
-    }
-
-/*     Risk of underflow. */
-
-L90:
-    dlasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2);
-    *ndiv += *n0 - *i0 + 2;
-    ++(*iter);
-    tau = 0.;
-
-L100:
-    if (tau < *sigma) {
-	*desig += tau;
-	t = *sigma + *desig;
-	*desig -= t - *sigma;
-    } else {
-	t = *sigma + tau;
-	*desig = *sigma - (t - tau) + *desig;
-    }
-    *sigma = t;
-
-    return 0;
-
-/*     End of DLASQ3 */
-
-} /* dlasq3_ */
-
-/* Subroutine */ int dlasq4_(integer *i0, integer *n0, doublereal *z__,
-	integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1,
-	doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2,
-	doublereal *tau, integer *ttype)
-{
-    /* Initialized data */
-
-    static doublereal g = 0.;
-
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal s, a2, b1, b2;
-    static integer i4, nn, np;
-    static doublereal gam, gap1, gap2;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    DLASQ4 computes an approximation TAU to the smallest eigenvalue
-    using values of d from the previous transform.
-
-    I0    (input) INTEGER
-          First index.
-
-    N0    (input) INTEGER
-          Last index.
-
-    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
-          Z holds the qd array.
-
-    PP    (input) INTEGER
-          PP=0 for ping, PP=1 for pong.
-
-    NOIN  (input) INTEGER
-          The value of N0 at start of EIGTEST.
-
-    DMIN  (input) DOUBLE PRECISION
-          Minimum value of d.
-
-    DMIN1 (input) DOUBLE PRECISION
-          Minimum value of d, excluding D( N0 ).
-
-    DMIN2 (input) DOUBLE PRECISION
-          Minimum value of d, excluding D( N0 ) and D( N0-1 ).
-
-    DN    (input) DOUBLE PRECISION
-          d(N)
-
-    DN1   (input) DOUBLE PRECISION
-          d(N-1)
-
-    DN2   (input) DOUBLE PRECISION
-          d(N-2)
-
-    TAU   (output) DOUBLE PRECISION
-          This is the shift.
-
-    TTYPE (output) INTEGER
-          Shift type.
-
-    Further Details
-    ===============
-    CNST1 = 9/16
-
-    =====================================================================
-*/
-
-    /* Parameter adjustments */
-    --z__;
-
-    /* Function Body */
-
-/*
-       A negative DMIN forces the shift to take that absolute value
-       TTYPE records the type of shift.
-*/
-
-    if (*dmin__ <= 0.) {
-	*tau = -(*dmin__);
-	*ttype = -1;
-	return 0;
-    }
-
-    nn = ((*n0) << (2)) + *pp;
-    if (*n0in == *n0) {
-
-/*        No eigenvalues deflated. */
-
-	if ((*dmin__ == *dn) || (*dmin__ == *dn1)) {
-
-	    b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]);
-	    b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]);
-	    a2 = z__[nn - 7] + z__[nn - 5];
-
-/*           Cases 2 and 3. */
-
-	    if (*dmin__ == *dn && *dmin1 == *dn1) {
-		gap2 = *dmin2 - a2 - *dmin2 * .25;
-		if (gap2 > 0. && gap2 > b2) {
-		    gap1 = a2 - *dn - b2 / gap2 * b2;
-		} else {
-		    gap1 = a2 - *dn - (b1 + b2);
-		}
-		if (gap1 > 0. && gap1 > b1) {
-/* Computing MAX */
-		    d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
-		    s = max(d__1,d__2);
-		    *ttype = -2;
-		} else {
-		    s = 0.;
-		    if (*dn > b1) {
-			s = *dn - b1;
-		    }
-		    if (a2 > b1 + b2) {
-/* Computing MIN */
-			d__1 = s, d__2 = a2 - (b1 + b2);
-			s = min(d__1,d__2);
-		    }
-/* Computing MAX */
-		    d__1 = s, d__2 = *dmin__ * .333;
-		    s = max(d__1,d__2);
-		    *ttype = -3;
-		}
-	    } else {
-
-/*              Case 4. */
-
-		*ttype = -4;
-		s = *dmin__ * .25;
-		if (*dmin__ == *dn) {
-		    gam = *dn;
-		    a2 = 0.;
-		    if (z__[nn - 5] > z__[nn - 7]) {
-			return 0;
-		    }
-		    b2 = z__[nn - 5] / z__[nn - 7];
-		    np = nn - 9;
-		} else {
-		    np = nn - ((*pp) << (1));
-		    b2 = z__[np - 2];
-		    gam = *dn1;
-		    if (z__[np - 4] > z__[np - 2]) {
-			return 0;
-		    }
-		    a2 = z__[np - 4] / z__[np - 2];
-		    if (z__[nn - 9] > z__[nn - 11]) {
-			return 0;
-		    }
-		    b2 = z__[nn - 9] / z__[nn - 11];
-		    np = nn - 13;
-		}
-
-/*              Approximate contribution to norm squared from I < NN-1. */
-
-		a2 += b2;
-		i__1 = ((*i0) << (2)) - 1 + *pp;
-		for (i4 = np; i4 >= i__1; i4 += -4) {
-		    if (b2 == 0.) {
-			goto L20;
-		    }
-		    b1 = b2;
-		    if (z__[i4] > z__[i4 - 2]) {
-			return 0;
-		    }
-		    b2 *= z__[i4] / z__[i4 - 2];
-		    a2 += b2;
-		    if ((max(b2,b1) * 100. < a2) || (.563 < a2)) {
-			goto L20;
-		    }
-/* L10: */
-		}
-L20:
-		a2 *= 1.05;
-
-/*              Rayleigh quotient residual bound. */
-
-		if (a2 < .563) {
-		    s = gam * (1. - sqrt(a2)) / (a2 + 1.);
-		}
-	    }
-	} else if (*dmin__ == *dn2) {
-
-/*           Case 5. */
-
-	    *ttype = -5;
-	    s = *dmin__ * .25;
-
-/*           Compute contribution to norm squared from I > NN-2. */
-
-	    np = nn - ((*pp) << (1));
-	    b1 = z__[np - 2];
-	    b2 = z__[np - 6];
-	    gam = *dn2;
-	    if ((z__[np - 8] > b2) || (z__[np - 4] > b1)) {
-		return 0;
-	    }
-	    a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
-
-/*           Approximate contribution to norm squared from I < NN-2. */
-
-	    if (*n0 - *i0 > 2) {
-		b2 = z__[nn - 13] / z__[nn - 15];
-		a2 += b2;
-		i__1 = ((*i0) << (2)) - 1 + *pp;
-		for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
-		    if (b2 == 0.) {
-			goto L40;
-		    }
-		    b1 = b2;
-		    if (z__[i4] > z__[i4 - 2]) {
-			return 0;
-		    }
-		    b2 *= z__[i4] / z__[i4 - 2];
-		    a2 += b2;
-		    if ((max(b2,b1) * 100. < a2) || (.563 < a2)) {
-			goto L40;
-		    }
-/* L30: */
-		}
-L40:
-		a2 *= 1.05;
-	    }
-
-	    if (a2 < .563) {
-		s = gam * (1. - sqrt(a2)) / (a2 + 1.);
-	    }
-	} else {
-
-/*           Case 6, no information to guide us. */
-
-	    if (*ttype == -6) {
-		g += (1. - g) * .333;
-	    } else if (*ttype == -18) {
-		g = .083250000000000005;
-	    } else {
-		g = .25;
-	    }
-	    s = g * *dmin__;
-	    *ttype = -6;
-	}
-
-    } else if (*n0in == *n0 + 1) {
-
-/*        One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
-
-	if (*dmin1 == *dn1 && *dmin2 == *dn2) {
-
-/*           Cases 7 and 8. */
-
-	    *ttype = -7;
-	    s = *dmin1 * .333;
-	    if (z__[nn - 5] > z__[nn - 7]) {
-		return 0;
-	    }
-	    b1 = z__[nn - 5] / z__[nn - 7];
-	    b2 = b1;
-	    if (b2 == 0.) {
-		goto L60;
-	    }
-	    i__1 = ((*i0) << (2)) - 1 + *pp;
-	    for (i4 = ((*n0) << (2)) - 9 + *pp; i4 >= i__1; i4 += -4) {
-		a2 = b1;
-		if (z__[i4] > z__[i4 - 2]) {
-		    return 0;
-		}
-		b1 *= z__[i4] / z__[i4 - 2];
-		b2 += b1;
-		if (max(b1,a2) * 100. < b2) {
-		    goto L60;
-		}
-/* L50: */
-	    }
-L60:
-	    b2 = sqrt(b2 * 1.05);
-/* Computing 2nd power */
-	    d__1 = b2;
-	    a2 = *dmin1 / (d__1 * d__1 + 1.);
-	    gap2 = *dmin2 * .5 - a2;
-	    if (gap2 > 0. && gap2 > b2 * a2) {
-/* Computing MAX */
-		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
-		s = max(d__1,d__2);
-	    } else {
-/* Computing MAX */
-		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
-		s = max(d__1,d__2);
-		*ttype = -8;
-	    }
-	} else {
-
-/*           Case 9. */
-
-	    s = *dmin1 * .25;
-	    if (*dmin1 == *dn1) {
-		s = *dmin1 * .5;
-	    }
-	    *ttype = -9;
-	}
-
-    } else if (*n0in == *n0 + 2) {
-
-/*
-          Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN.
-
-          Cases 10 and 11.
-*/
-
-	if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
-	    *ttype = -10;
-	    s = *dmin2 * .333;
-	    if (z__[nn - 5] > z__[nn - 7]) {
-		return 0;
-	    }
-	    b1 = z__[nn - 5] / z__[nn - 7];
-	    b2 = b1;
-	    if (b2 == 0.) {
-		goto L80;
-	    }
-	    i__1 = ((*i0) << (2)) - 1 + *pp;
-	    for (i4 = ((*n0) << (2)) - 9 + *pp; i4 >= i__1; i4 += -4) {
-		if (z__[i4] > z__[i4 - 2]) {
-		    return 0;
-		}
-		b1 *= z__[i4] / z__[i4 - 2];
-		b2 += b1;
-		if (b1 * 100. < b2) {
-		    goto L80;
-		}
-/* L70: */
-	    }
-L80:
-	    b2 = sqrt(b2 * 1.05);
-/* Computing 2nd power */
-	    d__1 = b2;
-	    a2 = *dmin2 / (d__1 * d__1 + 1.);
-	    gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[
-		    nn - 9]) - a2;
-	    if (gap2 > 0. && gap2 > b2 * a2) {
-/* Computing MAX */
-		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
-		s = max(d__1,d__2);
-	    } else {
-/* Computing MAX */
-		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
-		s = max(d__1,d__2);
-	    }
-	} else {
-	    s = *dmin2 * .25;
-	    *ttype = -11;
-	}
-    } else if (*n0in > *n0 + 2) {
-
-/*        Case 12, more than two eigenvalues deflated. No information. */
-
-	s = 0.;
-	*ttype = -12;
-    }
-
-    *tau = s;
-    return 0;
-
-/*     End of DLASQ4 */
-
-} /* dlasq4_ */
-
-/* Subroutine */ int dlasq5_(integer *i0, integer *n0, doublereal *z__,
-	integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1,
-	doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2,
-	 logical *ieee)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1, d__2;
-
-    /* Local variables */
-    static doublereal d__;
-    static integer j4, j4p2;
-    static doublereal emin, temp;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       May 17, 2000
-
-
-    Purpose
-    =======
-
-    DLASQ5 computes one dqds transform in ping-pong form, one
-    version for IEEE machines another for non IEEE machines.
-
-    Arguments
-    =========
-
-    I0    (input) INTEGER
-          First index.
-
-    N0    (input) INTEGER
-          Last index.
-
-    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
-          Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
-          an extra argument.
-
-    PP    (input) INTEGER
-          PP=0 for ping, PP=1 for pong.
-
-    TAU   (input) DOUBLE PRECISION
-          This is the shift.
-
-    DMIN  (output) DOUBLE PRECISION
-          Minimum value of d.
-
-    DMIN1 (output) DOUBLE PRECISION
-          Minimum value of d, excluding D( N0 ).
-
-    DMIN2 (output) DOUBLE PRECISION
-          Minimum value of d, excluding D( N0 ) and D( N0-1 ).
-
-    DN    (output) DOUBLE PRECISION
-          d(N0), the last value of d.
-
-    DNM1  (output) DOUBLE PRECISION
-          d(N0-1).
-
-    DNM2  (output) DOUBLE PRECISION
-          d(N0-2).
-
-    IEEE  (input) LOGICAL
-          Flag for IEEE or non IEEE arithmetic.
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --z__;
-
-    /* Function Body */
-    if (*n0 - *i0 - 1 <= 0) {
-	return 0;
-    }
-
-    j4 = ((*i0) << (2)) + *pp - 3;
-    emin = z__[j4 + 4];
-    d__ = z__[j4] - *tau;
-    *dmin__ = d__;
-    *dmin1 = -z__[j4];
-
-    if (*ieee) {
-
-/*        Code for IEEE arithmetic. */
-
-	if (*pp == 0) {
-	    i__1 = (*n0 - 3) << (2);
-	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
-		z__[j4 - 2] = d__ + z__[j4 - 1];
-		temp = z__[j4 + 1] / z__[j4 - 2];
-		d__ = d__ * temp - *tau;
-		*dmin__ = min(*dmin__,d__);
-		z__[j4] = z__[j4 - 1] * temp;
-/* Computing MIN */
-		d__1 = z__[j4];
-		emin = min(d__1,emin);
-/* L10: */
-	    }
-	} else {
-	    i__1 = (*n0 - 3) << (2);
-	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
-		z__[j4 - 3] = d__ + z__[j4];
-		temp = z__[j4 + 2] / z__[j4 - 3];
-		d__ = d__ * temp - *tau;
-		*dmin__ = min(*dmin__,d__);
-		z__[j4 - 1] = z__[j4] * temp;
-/* Computing MIN */
-		d__1 = z__[j4 - 1];
-		emin = min(d__1,emin);
-/* L20: */
-	    }
-	}
-
-/*        Unroll last two steps. */
-
-	*dnm2 = d__;
-	*dmin2 = *dmin__;
-	j4 = ((*n0 - 2) << (2)) - *pp;
-	j4p2 = j4 + ((*pp) << (1)) - 1;
-	z__[j4 - 2] = *dnm2 + z__[j4p2];
-	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
-	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
-	*dmin__ = min(*dmin__,*dnm1);
-
-	*dmin1 = *dmin__;
-	j4 += 4;
-	j4p2 = j4 + ((*pp) << (1)) - 1;
-	z__[j4 - 2] = *dnm1 + z__[j4p2];
-	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
-	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
-	*dmin__ = min(*dmin__,*dn);
-
-    } else {
-
-/*        Code for non IEEE arithmetic. */
-
-	if (*pp == 0) {
-	    i__1 = (*n0 - 3) << (2);
-	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
-		z__[j4 - 2] = d__ + z__[j4 - 1];
-		if (d__ < 0.) {
-		    return 0;
-		} else {
-		    z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
-		    d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau;
-		}
-		*dmin__ = min(*dmin__,d__);
-/* Computing MIN */
-		d__1 = emin, d__2 = z__[j4];
-		emin = min(d__1,d__2);
-/* L30: */
-	    }
-	} else {
-	    i__1 = (*n0 - 3) << (2);
-	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
-		z__[j4 - 3] = d__ + z__[j4];
-		if (d__ < 0.) {
-		    return 0;
-		} else {
-		    z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
-		    d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau;
-		}
-		*dmin__ = min(*dmin__,d__);
-/* Computing MIN */
-		d__1 = emin, d__2 = z__[j4 - 1];
-		emin = min(d__1,d__2);
-/* L40: */
-	    }
-	}
-
-/*        Unroll last two steps. */
-
-	*dnm2 = d__;
-	*dmin2 = *dmin__;
-	j4 = ((*n0 - 2) << (2)) - *pp;
-	j4p2 = j4 + ((*pp) << (1)) - 1;
-	z__[j4 - 2] = *dnm2 + z__[j4p2];
-	if (*dnm2 < 0.) {
-	    return 0;
-	} else {
-	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
-	    *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
-	}
-	*dmin__ = min(*dmin__,*dnm1);
-
-	*dmin1 = *dmin__;
-	j4 += 4;
-	j4p2 = j4 + ((*pp) << (1)) - 1;
-	z__[j4 - 2] = *dnm1 + z__[j4p2];
-	if (*dnm1 < 0.) {
-	    return 0;
-	} else {
-	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
-	    *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
-	}
-	*dmin__ = min(*dmin__,*dn);
-
-    }
-
-    z__[j4 + 2] = *dn;
-    z__[((*n0) << (2)) - *pp] = emin;
-    return 0;
-
-/*     End of DLASQ5 */
-
-} /* dlasq5_ */
-
-/* Subroutine */ int dlasq6_(integer *i0, integer *n0, doublereal *z__,
-	integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2,
-	 doublereal *dn, doublereal *dnm1, doublereal *dnm2)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1, d__2;
-
-    /* Local variables */
-    static doublereal d__;
-    static integer j4, j4p2;
-    static doublereal emin, temp;
-
-    static doublereal safmin;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    DLASQ6 computes one dqd (shift equal to zero) transform in
-    ping-pong form, with protection against underflow and overflow.
-
-    Arguments
-    =========
-
-    I0    (input) INTEGER
-          First index.
-
-    N0    (input) INTEGER
-          Last index.
-
-    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
-          Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
-          an extra argument.
-
-    PP    (input) INTEGER
-          PP=0 for ping, PP=1 for pong.
-
-    DMIN  (output) DOUBLE PRECISION
-          Minimum value of d.
-
-    DMIN1 (output) DOUBLE PRECISION
-          Minimum value of d, excluding D( N0 ).
-
-    DMIN2 (output) DOUBLE PRECISION
-          Minimum value of d, excluding D( N0 ) and D( N0-1 ).
-
-    DN    (output) DOUBLE PRECISION
-          d(N0), the last value of d.
-
-    DNM1  (output) DOUBLE PRECISION
-          d(N0-1).
-
-    DNM2  (output) DOUBLE PRECISION
-          d(N0-2).
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --z__;
-
-    /* Function Body */
-    if (*n0 - *i0 - 1 <= 0) {
-	return 0;
-    }
-
-    safmin = SAFEMINIMUM;
-    j4 = ((*i0) << (2)) + *pp - 3;
-    emin = z__[j4 + 4];
-    d__ = z__[j4];
-    *dmin__ = d__;
-
-    if (*pp == 0) {
-	i__1 = (*n0 - 3) << (2);
-	for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
-	    z__[j4 - 2] = d__ + z__[j4 - 1];
-	    if (z__[j4 - 2] == 0.) {
-		z__[j4] = 0.;
-		d__ = z__[j4 + 1];
-		*dmin__ = d__;
-		emin = 0.;
-	    } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4
-		    - 2] < z__[j4 + 1]) {
-		temp = z__[j4 + 1] / z__[j4 - 2];
-		z__[j4] = z__[j4 - 1] * temp;
-		d__ *= temp;
-	    } else {
-		z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
-		d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]);
-	    }
-	    *dmin__ = min(*dmin__,d__);
-/* Computing MIN */
-	    d__1 = emin, d__2 = z__[j4];
-	    emin = min(d__1,d__2);
-/* L10: */
-	}
-    } else {
-	i__1 = (*n0 - 3) << (2);
-	for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
-	    z__[j4 - 3] = d__ + z__[j4];
-	    if (z__[j4 - 3] == 0.) {
-		z__[j4 - 1] = 0.;
-		d__ = z__[j4 + 2];
-		*dmin__ = d__;
-		emin = 0.;
-	    } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4
-		    - 3] < z__[j4 + 2]) {
-		temp = z__[j4 + 2] / z__[j4 - 3];
-		z__[j4 - 1] = z__[j4] * temp;
-		d__ *= temp;
-	    } else {
-		z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
-		d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]);
-	    }
-	    *dmin__ = min(*dmin__,d__);
-/* Computing MIN */
-	    d__1 = emin, d__2 = z__[j4 - 1];
-	    emin = min(d__1,d__2);
-/* L20: */
-	}
-    }
-
-/*     Unroll last two steps. */
-
-    *dnm2 = d__;
-    *dmin2 = *dmin__;
-    j4 = ((*n0 - 2) << (2)) - *pp;
-    j4p2 = j4 + ((*pp) << (1)) - 1;
-    z__[j4 - 2] = *dnm2 + z__[j4p2];
-    if (z__[j4 - 2] == 0.) {
-	z__[j4] = 0.;
-	*dnm1 = z__[j4p2 + 2];
-	*dmin__ = *dnm1;
-	emin = 0.;
-    } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
-	    z__[j4p2 + 2]) {
-	temp = z__[j4p2 + 2] / z__[j4 - 2];
-	z__[j4] = z__[j4p2] * temp;
-	*dnm1 = *dnm2 * temp;
-    } else {
-	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
-	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]);
-    }
-    *dmin__ = min(*dmin__,*dnm1);
-
-    *dmin1 = *dmin__;
-    j4 += 4;
-    j4p2 = j4 + ((*pp) << (1)) - 1;
-    z__[j4 - 2] = *dnm1 + z__[j4p2];
-    if (z__[j4 - 2] == 0.) {
-	z__[j4] = 0.;
-	*dn = z__[j4p2 + 2];
-	*dmin__ = *dn;
-	emin = 0.;
-    } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
-	    z__[j4p2 + 2]) {
-	temp = z__[j4p2 + 2] / z__[j4 - 2];
-	z__[j4] = z__[j4p2] * temp;
-	*dn = *dnm1 * temp;
-    } else {
-	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
-	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]);
-    }
-    *dmin__ = min(*dmin__,*dn);
-
-    z__[j4 + 2] = *dn;
-    z__[((*n0) << (2)) - *pp] = emin;
-    return 0;
-
-/*     End of DLASQ6 */
-
-} /* dlasq6_ */
-
-/* Subroutine */ int dlasr_(char *side, char *pivot, char *direct, integer *m,
-	 integer *n, doublereal *c__, doublereal *s, doublereal *a, integer *
-	lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *);
-    static doublereal ctemp, stemp;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLASR   performs the transformation
-
-       A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
-
-       A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
-
-    where A is an m by n real matrix and P is an orthogonal matrix,
-    consisting of a sequence of plane rotations determined by the
-    parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
-    and z = n when SIDE = 'R' or 'r' ):
-
-    When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
-
-       P = P( z - 1 )*...*P( 2 )*P( 1 ),
-
-    and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
-
-       P = P( 1 )*P( 2 )*...*P( z - 1 ),
-
-    where  P( k ) is a plane rotation matrix for the following planes:
-
-       when  PIVOT = 'V' or 'v'  ( Variable pivot ),
-          the plane ( k, k + 1 )
-
-       when  PIVOT = 'T' or 't'  ( Top pivot ),
-          the plane ( 1, k + 1 )
-
-       when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
-          the plane ( k, z )
-
-    c( k ) and s( k )  must contain the  cosine and sine that define the
-    matrix  P( k ).  The two by two plane rotation part of the matrix
-    P( k ), R( k ), is assumed to be of the form
-
-       R( k ) = (  c( k )  s( k ) ).
-                ( -s( k )  c( k ) )
-
-    This version vectorises across rows of the array A when SIDE = 'L'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            Specifies whether the plane rotation matrix P is applied to
-            A on the left or the right.
-            = 'L':  Left, compute A := P*A
-            = 'R':  Right, compute A:= A*P'
-
-    DIRECT  (input) CHARACTER*1
-            Specifies whether P is a forward or backward sequence of
-            plane rotations.
-            = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
-            = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
-
-    PIVOT   (input) CHARACTER*1
-            Specifies the plane for which P(k) is a plane rotation
-            matrix.
-            = 'V':  Variable pivot, the plane (k,k+1)
-            = 'T':  Top pivot, the plane (1,k+1)
-            = 'B':  Bottom pivot, the plane (k,z)
-
-    M       (input) INTEGER
-            The number of rows of the matrix A.  If m <= 1, an immediate
-            return is effected.
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.  If n <= 1, an
-            immediate return is effected.
-
-    C, S    (input) DOUBLE PRECISION arrays, dimension
-                    (M-1) if SIDE = 'L'
-                    (N-1) if SIDE = 'R'
-            c(k) and s(k) contain the cosine and sine that define the
-            matrix P(k).  The two by two plane rotation part of the
-            matrix P(k), R(k), is assumed to be of the form
-            R( k ) = (  c( k )  s( k ) ).
-                     ( -s( k )  c( k ) )
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            The m by n matrix A.  On exit, A is overwritten by P*A if
-            SIDE = 'R' or by A*P' if SIDE = 'L'.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    --c__;
-    --s;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (! ((lsame_(side, "L")) || (lsame_(side, "R")))) {
-	info = 1;
-    } else if (! (((lsame_(pivot, "V")) || (lsame_(
-	    pivot, "T"))) || (lsame_(pivot, "B")))) {
-	info = 2;
-    } else if (! ((lsame_(direct, "F")) || (lsame_(
-	    direct, "B")))) {
-	info = 3;
-    } else if (*m < 0) {
-	info = 4;
-    } else if (*n < 0) {
-	info = 5;
-    } else if (*lda < max(1,*m)) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("DLASR ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*m == 0) || (*n == 0)) {
-	return 0;
-    }
-    if (lsame_(side, "L")) {
-
-/*        Form  P * A */
-
-	if (lsame_(pivot, "V")) {
-	    if (lsame_(direct, "F")) {
-		i__1 = *m - 1;
-		for (j = 1; j <= i__1; ++j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
-			i__2 = *n;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    temp = a[j + 1 + i__ * a_dim1];
-			    a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp *
-				    a[j + i__ * a_dim1];
-			    a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j
-				    + i__ * a_dim1];
-/* L10: */
-			}
-		    }
-/* L20: */
-		}
-	    } else if (lsame_(direct, "B")) {
-		for (j = *m - 1; j >= 1; --j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
-			i__1 = *n;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    temp = a[j + 1 + i__ * a_dim1];
-			    a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp *
-				    a[j + i__ * a_dim1];
-			    a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j
-				    + i__ * a_dim1];
-/* L30: */
-			}
-		    }
-/* L40: */
-		}
-	    }
-	} else if (lsame_(pivot, "T")) {
-	    if (lsame_(direct, "F")) {
-		i__1 = *m;
-		for (j = 2; j <= i__1; ++j) {
-		    ctemp = c__[j - 1];
-		    stemp = s[j - 1];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
-			i__2 = *n;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    temp = a[j + i__ * a_dim1];
-			    a[j + i__ * a_dim1] = ctemp * temp - stemp * a[
-				    i__ * a_dim1 + 1];
-			    a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[
-				    i__ * a_dim1 + 1];
-/* L50: */
-			}
-		    }
-/* L60: */
-		}
-	    } else if (lsame_(direct, "B")) {
-		for (j = *m; j >= 2; --j) {
-		    ctemp = c__[j - 1];
-		    stemp = s[j - 1];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
-			i__1 = *n;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    temp = a[j + i__ * a_dim1];
-			    a[j + i__ * a_dim1] = ctemp * temp - stemp * a[
-				    i__ * a_dim1 + 1];
-			    a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[
-				    i__ * a_dim1 + 1];
-/* L70: */
-			}
-		    }
-/* L80: */
-		}
-	    }
-	} else if (lsame_(pivot, "B")) {
-	    if (lsame_(direct, "F")) {
-		i__1 = *m - 1;
-		for (j = 1; j <= i__1; ++j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
-			i__2 = *n;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    temp = a[j + i__ * a_dim1];
-			    a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1]
-				     + ctemp * temp;
-			    a[*m + i__ * a_dim1] = ctemp * a[*m + i__ *
-				    a_dim1] - stemp * temp;
-/* L90: */
-			}
-		    }
-/* L100: */
-		}
-	    } else if (lsame_(direct, "B")) {
-		for (j = *m - 1; j >= 1; --j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
-			i__1 = *n;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    temp = a[j + i__ * a_dim1];
-			    a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1]
-				     + ctemp * temp;
-			    a[*m + i__ * a_dim1] = ctemp * a[*m + i__ *
-				    a_dim1] - stemp * temp;
-/* L110: */
-			}
-		    }
-/* L120: */
-		}
-	    }
-	}
-    } else if (lsame_(side, "R")) {
-
-/*        Form A * P' */
-
-	if (lsame_(pivot, "V")) {
-	    if (lsame_(direct, "F")) {
-		i__1 = *n - 1;
-		for (j = 1; j <= i__1; ++j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    temp = a[i__ + (j + 1) * a_dim1];
-			    a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp *
-				     a[i__ + j * a_dim1];
-			    a[i__ + j * a_dim1] = stemp * temp + ctemp * a[
-				    i__ + j * a_dim1];
-/* L130: */
-			}
-		    }
-/* L140: */
-		}
-	    } else if (lsame_(direct, "B")) {
-		for (j = *n - 1; j >= 1; --j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    temp = a[i__ + (j + 1) * a_dim1];
-			    a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp *
-				     a[i__ + j * a_dim1];
-			    a[i__ + j * a_dim1] = stemp * temp + ctemp * a[
-				    i__ + j * a_dim1];
-/* L150: */
-			}
-		    }
-/* L160: */
-		}
-	    }
-	} else if (lsame_(pivot, "T")) {
-	    if (lsame_(direct, "F")) {
-		i__1 = *n;
-		for (j = 2; j <= i__1; ++j) {
-		    ctemp = c__[j - 1];
-		    stemp = s[j - 1];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    temp = a[i__ + j * a_dim1];
-			    a[i__ + j * a_dim1] = ctemp * temp - stemp * a[
-				    i__ + a_dim1];
-			    a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ +
-				    a_dim1];
-/* L170: */
-			}
-		    }
-/* L180: */
-		}
-	    } else if (lsame_(direct, "B")) {
-		for (j = *n; j >= 2; --j) {
-		    ctemp = c__[j - 1];
-		    stemp = s[j - 1];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    temp = a[i__ + j * a_dim1];
-			    a[i__ + j * a_dim1] = ctemp * temp - stemp * a[
-				    i__ + a_dim1];
-			    a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ +
-				    a_dim1];
-/* L190: */
-			}
-		    }
-/* L200: */
-		}
-	    }
-	} else if (lsame_(pivot, "B")) {
-	    if (lsame_(direct, "F")) {
-		i__1 = *n - 1;
-		for (j = 1; j <= i__1; ++j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
-			i__2 = *m;
-			for (i__ = 1; i__ <= i__2; ++i__) {
-			    temp = a[i__ + j * a_dim1];
-			    a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1]
-				     + ctemp * temp;
-			    a[i__ + *n * a_dim1] = ctemp * a[i__ + *n *
-				    a_dim1] - stemp * temp;
-/* L210: */
-			}
-		    }
-/* L220: */
-		}
-	    } else if (lsame_(direct, "B")) {
-		for (j = *n - 1; j >= 1; --j) {
-		    ctemp = c__[j];
-		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
-			i__1 = *m;
-			for (i__ = 1; i__ <= i__1; ++i__) {
-			    temp = a[i__ + j * a_dim1];
-			    a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1]
-				     + ctemp * temp;
-			    a[i__ + *n * a_dim1] = ctemp * a[i__ + *n *
-				    a_dim1] - stemp * temp;
-/* L230: */
-			}
-		    }
-/* L240: */
-		}
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DLASR */
-
-} /* dlasr_ */
-
-/* Subroutine */ int dlasrt_(char *id, integer *n, doublereal *d__, integer *
-	info)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j;
-    static doublereal d1, d2, d3;
-    static integer dir;
-    static doublereal tmp;
-    static integer endd;
-    extern logical lsame_(char *, char *);
-    static integer stack[64]	/* was [2][32] */;
-    static doublereal dmnmx;
-    static integer start;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static integer stkpnt;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    Sort the numbers in D in increasing order (if ID = 'I') or
-    in decreasing order (if ID = 'D' ).
-
-    Use Quick Sort, reverting to Insertion sort on arrays of
-    size <= 20. Dimension of STACK limits N to about 2**32.
-
-    Arguments
-    =========
-
-    ID      (input) CHARACTER*1
-            = 'I': sort D in increasing order;
-            = 'D': sort D in decreasing order.
-
-    N       (input) INTEGER
-            The length of the array D.
-
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the array to be sorted.
-            On exit, D has been sorted into increasing order
-            (D(1) <= ... <= D(N) ) or into decreasing order
-            (D(1) >= ... >= D(N) ), depending on ID.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input paramters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-
-    /* Function Body */
-    *info = 0;
-    dir = -1;
-    if (lsame_(id, "D")) {
-	dir = 0;
-    } else if (lsame_(id, "I")) {
-	dir = 1;
-    }
-    if (dir == -1) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLASRT", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n <= 1) {
-	return 0;
-    }
-
-    stkpnt = 1;
-    stack[0] = 1;
-    stack[1] = *n;
-L10:
-    start = stack[((stkpnt) << (1)) - 2];
-    endd = stack[((stkpnt) << (1)) - 1];
-    --stkpnt;
-    if (endd - start <= 20 && endd - start > 0) {
-
-/*        Do Insertion sort on D( START:ENDD ) */
-
-	if (dir == 0) {
-
-/*           Sort into decreasing order */
-
-	    i__1 = endd;
-	    for (i__ = start + 1; i__ <= i__1; ++i__) {
-		i__2 = start + 1;
-		for (j = i__; j >= i__2; --j) {
-		    if (d__[j] > d__[j - 1]) {
-			dmnmx = d__[j];
-			d__[j] = d__[j - 1];
-			d__[j - 1] = dmnmx;
-		    } else {
-			goto L30;
-		    }
-/* L20: */
-		}
-L30:
-		;
-	    }
-
-	} else {
-
-/*           Sort into increasing order */
-
-	    i__1 = endd;
-	    for (i__ = start + 1; i__ <= i__1; ++i__) {
-		i__2 = start + 1;
-		for (j = i__; j >= i__2; --j) {
-		    if (d__[j] < d__[j - 1]) {
-			dmnmx = d__[j];
-			d__[j] = d__[j - 1];
-			d__[j - 1] = dmnmx;
-		    } else {
-			goto L50;
-		    }
-/* L40: */
-		}
-L50:
-		;
-	    }
-
-	}
-
-    } else if (endd - start > 20) {
-
-/*
-          Partition D( START:ENDD ) and stack parts, largest one first
-
-          Choose partition entry as median of 3
-*/
-
-	d1 = d__[start];
-	d2 = d__[endd];
-	i__ = (start + endd) / 2;
-	d3 = d__[i__];
-	if (d1 < d2) {
-	    if (d3 < d1) {
-		dmnmx = d1;
-	    } else if (d3 < d2) {
-		dmnmx = d3;
-	    } else {
-		dmnmx = d2;
-	    }
-	} else {
-	    if (d3 < d2) {
-		dmnmx = d2;
-	    } else if (d3 < d1) {
-		dmnmx = d3;
-	    } else {
-		dmnmx = d1;
-	    }
-	}
-
-	if (dir == 0) {
-
-/*           Sort into decreasing order */
-
-	    i__ = start - 1;
-	    j = endd + 1;
-L60:
-L70:
-	    --j;
-	    if (d__[j] < dmnmx) {
-		goto L70;
-	    }
-L80:
-	    ++i__;
-	    if (d__[i__] > dmnmx) {
-		goto L80;
-	    }
-	    if (i__ < j) {
-		tmp = d__[i__];
-		d__[i__] = d__[j];
-		d__[j] = tmp;
-		goto L60;
-	    }
-	    if (j - start > endd - j - 1) {
-		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = start;
-		stack[((stkpnt) << (1)) - 1] = j;
-		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = j + 1;
-		stack[((stkpnt) << (1)) - 1] = endd;
-	    } else {
-		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = j + 1;
-		stack[((stkpnt) << (1)) - 1] = endd;
-		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = start;
-		stack[((stkpnt) << (1)) - 1] = j;
-	    }
-	} else {
-
-/*           Sort into increasing order */
-
-	    i__ = start - 1;
-	    j = endd + 1;
-L90:
-L100:
-	    --j;
-	    if (d__[j] > dmnmx) {
-		goto L100;
-	    }
-L110:
-	    ++i__;
-	    if (d__[i__] < dmnmx) {
-		goto L110;
-	    }
-	    if (i__ < j) {
-		tmp = d__[i__];
-		d__[i__] = d__[j];
-		d__[j] = tmp;
-		goto L90;
-	    }
-	    if (j - start > endd - j - 1) {
-		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = start;
-		stack[((stkpnt) << (1)) - 1] = j;
-		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = j + 1;
-		stack[((stkpnt) << (1)) - 1] = endd;
-	    } else {
-		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = j + 1;
-		stack[((stkpnt) << (1)) - 1] = endd;
-		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = start;
-		stack[((stkpnt) << (1)) - 1] = j;
-	    }
-	}
-    }
-    if (stkpnt > 0) {
-	goto L10;
-    }
-    return 0;
-
-/*     End of DLASRT */
-
-} /* dlasrt_ */
-
-/* Subroutine */ int dlassq_(integer *n, doublereal *x, integer *incx,
-	doublereal *scale, doublereal *sumsq)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    doublereal d__1;
-
-    /* Local variables */
-    static integer ix;
-    static doublereal absxi;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLASSQ  returns the values  scl  and  smsq  such that
-
-       ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
-
-    where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is
-    assumed to be non-negative and  scl  returns the value
-
-       scl = max( scale, abs( x( i ) ) ).
-
-    scale and sumsq must be supplied in SCALE and SUMSQ and
-    scl and smsq are overwritten on SCALE and SUMSQ respectively.
-
-    The routine makes only one pass through the vector x.
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The number of elements to be used from the vector X.
-
-    X       (input) DOUBLE PRECISION array, dimension (N)
-            The vector for which a scaled sum of squares is computed.
-               x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
-
-    INCX    (input) INTEGER
-            The increment between successive values of the vector X.
-            INCX > 0.
-
-    SCALE   (input/output) DOUBLE PRECISION
-            On entry, the value  scale  in the equation above.
-            On exit, SCALE is overwritten with  scl , the scaling factor
-            for the sum of squares.
-
-    SUMSQ   (input/output) DOUBLE PRECISION
-            On entry, the value  sumsq  in the equation above.
-            On exit, SUMSQ is overwritten with  smsq , the basic sum of
-            squares from which  scl  has been factored out.
-
-   =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if (*n > 0) {
-	i__1 = (*n - 1) * *incx + 1;
-	i__2 = *incx;
-	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-	    if (x[ix] != 0.) {
-		absxi = (d__1 = x[ix], abs(d__1));
-		if (*scale < absxi) {
-/* Computing 2nd power */
-		    d__1 = *scale / absxi;
-		    *sumsq = *sumsq * (d__1 * d__1) + 1;
-		    *scale = absxi;
-		} else {
-/* Computing 2nd power */
-		    d__1 = absxi / *scale;
-		    *sumsq += d__1 * d__1;
-		}
-	    }
-/* L10: */
-	}
-    }
-    return 0;
-
-/*     End of DLASSQ */
-
-} /* dlassq_ */
-
-/* Subroutine */ int dlasv2_(doublereal *f, doublereal *g, doublereal *h__,
-	doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal *
-	csr, doublereal *snl, doublereal *csl)
-{
-    /* System generated locals */
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static doublereal a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt,
-	     clt, crt, slt, srt;
-    static integer pmax;
-    static doublereal temp;
-    static logical swap;
-    static doublereal tsign;
-
-    static logical gasmal;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLASV2 computes the singular value decomposition of a 2-by-2
-    triangular matrix
-       [  F   G  ]
-       [  0   H  ].
-    On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the
-    smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and
-    right singular vectors for abs(SSMAX), giving the decomposition
-
-       [ CSL  SNL ] [  F   G  ] [ CSR -SNR ]  =  [ SSMAX   0   ]
-       [-SNL  CSL ] [  0   H  ] [ SNR  CSR ]     [  0    SSMIN ].
-
-    Arguments
-    =========
-
-    F       (input) DOUBLE PRECISION
-            The (1,1) element of the 2-by-2 matrix.
-
-    G       (input) DOUBLE PRECISION
-            The (1,2) element of the 2-by-2 matrix.
-
-    H       (input) DOUBLE PRECISION
-            The (2,2) element of the 2-by-2 matrix.
-
-    SSMIN   (output) DOUBLE PRECISION
-            abs(SSMIN) is the smaller singular value.
-
-    SSMAX   (output) DOUBLE PRECISION
-            abs(SSMAX) is the larger singular value.
-
-    SNL     (output) DOUBLE PRECISION
-    CSL     (output) DOUBLE PRECISION
-            The vector (CSL, SNL) is a unit left singular vector for the
-            singular value abs(SSMAX).
-
-    SNR     (output) DOUBLE PRECISION
-    CSR     (output) DOUBLE PRECISION
-            The vector (CSR, SNR) is a unit right singular vector for the
-            singular value abs(SSMAX).
-
-    Further Details
-    ===============
-
-    Any input parameter may be aliased with any output parameter.
-
-    Barring over/underflow and assuming a guard digit in subtraction, all
-    output quantities are correct to within a few units in the last
-    place (ulps).
-
-    In IEEE arithmetic, the code works correctly if one matrix element is
-    infinite.
-
-    Overflow will not occur unless the largest singular value itself
-    overflows or is within a few ulps of overflow. (On machines with
-    partial overflow, like the Cray, overflow may occur if the largest
-    singular value is within a factor of 2 of overflow.)
-
-    Underflow is harmless if underflow is gradual. Otherwise, results
-    may correspond to a matrix modified by perturbations of size near
-    the underflow threshold.
-
-   =====================================================================
-*/
-
-
-    ft = *f;
-    fa = abs(ft);
-    ht = *h__;
-    ha = abs(*h__);
-
-/*
-       PMAX points to the maximum absolute element of matrix
-         PMAX = 1 if F largest in absolute values
-         PMAX = 2 if G largest in absolute values
-         PMAX = 3 if H largest in absolute values
-*/
-
-    pmax = 1;
-    swap = ha > fa;
-    if (swap) {
-	pmax = 3;
-	temp = ft;
-	ft = ht;
-	ht = temp;
-	temp = fa;
-	fa = ha;
-	ha = temp;
-
-/*        Now FA .ge. HA */
-
-    }
-    gt = *g;
-    ga = abs(gt);
-    if (ga == 0.) {
-
-/*        Diagonal matrix */
-
-	*ssmin = ha;
-	*ssmax = fa;
-	clt = 1.;
-	crt = 1.;
-	slt = 0.;
-	srt = 0.;
-    } else {
-	gasmal = TRUE_;
-	if (ga > fa) {
-	    pmax = 2;
-	    if (fa / ga < EPSILON) {
-
-/*              Case of very large GA */
-
-		gasmal = FALSE_;
-		*ssmax = ga;
-		if (ha > 1.) {
-		    *ssmin = fa / (ga / ha);
-		} else {
-		    *ssmin = fa / ga * ha;
-		}
-		clt = 1.;
-		slt = ht / gt;
-		srt = 1.;
-		crt = ft / gt;
-	    }
-	}
-	if (gasmal) {
-
-/*           Normal case */
-
-	    d__ = fa - ha;
-	    if (d__ == fa) {
-
-/*              Copes with infinite F or H */
-
-		l = 1.;
-	    } else {
-		l = d__ / fa;
-	    }
-
-/*           Note that 0 .le. L .le. 1 */
-
-	    m = gt / ft;
-
-/*           Note that abs(M) .le. 1/macheps */
-
-	    t = 2. - l;
-
-/*           Note that T .ge. 1 */
-
-	    mm = m * m;
-	    tt = t * t;
-	    s = sqrt(tt + mm);
-
-/*           Note that 1 .le. S .le. 1 + 1/macheps */
-
-	    if (l == 0.) {
-		r__ = abs(m);
-	    } else {
-		r__ = sqrt(l * l + mm);
-	    }
-
-/*           Note that 0 .le. R .le. 1 + 1/macheps */
-
-	    a = (s + r__) * .5;
-
-/*           Note that 1 .le. A .le. 1 + abs(M) */
-
-	    *ssmin = ha / a;
-	    *ssmax = fa * a;
-	    if (mm == 0.) {
-
-/*              Note that M is very tiny */
-
-		if (l == 0.) {
-		    t = d_sign(&c_b5654, &ft) * d_sign(&c_b2865, &gt);
-		} else {
-		    t = gt / d_sign(&d__, &ft) + m / t;
-		}
-	    } else {
-		t = (m / (s + t) + m / (r__ + l)) * (a + 1.);
-	    }
-	    l = sqrt(t * t + 4.);
-	    crt = 2. / l;
-	    srt = t / l;
-	    clt = (crt + srt * m) / a;
-	    slt = ht / ft * srt / a;
-	}
-    }
-    if (swap) {
-	*csl = srt;
-	*snl = crt;
-	*csr = slt;
-	*snr = clt;
-    } else {
-	*csl = clt;
-	*snl = slt;
-	*csr = crt;
-	*snr = srt;
-    }
-
-/*     Correct signs of SSMAX and SSMIN */
-
-    if (pmax == 1) {
-	tsign = d_sign(&c_b2865, csr) * d_sign(&c_b2865, csl) * d_sign(&
-		c_b2865, f);
-    }
-    if (pmax == 2) {
-	tsign = d_sign(&c_b2865, snr) * d_sign(&c_b2865, csl) * d_sign(&
-		c_b2865, g);
-    }
-    if (pmax == 3) {
-	tsign = d_sign(&c_b2865, snr) * d_sign(&c_b2865, snl) * d_sign(&
-		c_b2865, h__);
-    }
-    *ssmax = d_sign(ssmax, &tsign);
-    d__1 = tsign * d_sign(&c_b2865, f) * d_sign(&c_b2865, h__);
-    *ssmin = d_sign(ssmin, &d__1);
-    return 0;
-
-/*     End of DLASV2 */
-
-} /* dlasv2_ */
-
-/* Subroutine */ int dlaswp_(integer *n, doublereal *a, integer *lda, integer
-	*k1, integer *k2, integer *ipiv, integer *incx)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc;
-    static doublereal temp;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DLASWP performs a series of row interchanges on the matrix A.
-    One row interchange is initiated for each of rows K1 through K2 of A.
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The number of columns of the matrix A.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the matrix of column dimension N to which the row
-            interchanges will be applied.
-            On exit, the permuted matrix.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-
-    K1      (input) INTEGER
-            The first element of IPIV for which a row interchange will
-            be done.
-
-    K2      (input) INTEGER
-            The last element of IPIV for which a row interchange will
-            be done.
-
-    IPIV    (input) INTEGER array, dimension (M*abs(INCX))
-            The vector of pivot indices.  Only the elements in positions
-            K1 through K2 of IPIV are accessed.
-            IPIV(K) = L implies rows K and L are to be interchanged.
-
-    INCX    (input) INTEGER
-            The increment between successive values of IPIV.  If IPIV
-            is negative, the pivots are applied in reverse order.
-
-    Further Details
-    ===============
-
-    Modified by
-     R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA
-
-   =====================================================================
-
-
-       Interchange row I with row IPIV(I) for each of rows K1 through K2.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-
-    /* Function Body */
-    if (*incx > 0) {
-	ix0 = *k1;
-	i1 = *k1;
-	i2 = *k2;
-	inc = 1;
-    } else if (*incx < 0) {
-	ix0 = (1 - *k2) * *incx + 1;
-	i1 = *k2;
-	i2 = *k1;
-	inc = -1;
-    } else {
-	return 0;
-    }
-
-    n32 = (*n / 32) << (5);
-    if (n32 != 0) {
-	i__1 = n32;
-	for (j = 1; j <= i__1; j += 32) {
-	    ix = ix0;
-	    i__2 = i2;
-	    i__3 = inc;
-	    for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3)
-		    {
-		ip = ipiv[ix];
-		if (ip != i__) {
-		    i__4 = j + 31;
-		    for (k = j; k <= i__4; ++k) {
-			temp = a[i__ + k * a_dim1];
-			a[i__ + k * a_dim1] = a[ip + k * a_dim1];
-			a[ip + k * a_dim1] = temp;
-/* L10: */
-		    }
-		}
-		ix += *incx;
-/* L20: */
-	    }
-/* L30: */
-	}
-    }
-    if (n32 != *n) {
-	++n32;
-	ix = ix0;
-	i__1 = i2;
-	i__3 = inc;
-	for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) {
-	    ip = ipiv[ix];
-	    if (ip != i__) {
-		i__2 = *n;
-		for (k = n32; k <= i__2; ++k) {
-		    temp = a[i__ + k * a_dim1];
-		    a[i__ + k * a_dim1] = a[ip + k * a_dim1];
-		    a[ip + k * a_dim1] = temp;
-/* L40: */
-		}
-	    }
-	    ix += *incx;
-/* L50: */
-	}
-    }
-
-    return 0;
-
-/*     End of DLASWP */
-
-} /* dlaswp_ */
-
-/* Subroutine */ int dlatrd_(char *uplo, integer *n, integer *nb, doublereal *
-	a, integer *lda, doublereal *e, doublereal *tau, doublereal *w,
-	integer *ldw)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, iw;
-    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    static doublereal alpha;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *), daxpy_(integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *),
-	    dsymv_(char *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *), dlarfg_(integer *, doublereal *, doublereal *, integer *,
-	     doublereal *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLATRD reduces NB rows and columns of a real symmetric matrix A to
-    symmetric tridiagonal form by an orthogonal similarity
-    transformation Q' * A * Q, and returns the matrices V and W which are
-    needed to apply the transformation to the unreduced part of A.
-
-    If UPLO = 'U', DLATRD reduces the last NB rows and columns of a
-    matrix, of which the upper triangle is supplied;
-    if UPLO = 'L', DLATRD reduces the first NB rows and columns of a
-    matrix, of which the lower triangle is supplied.
-
-    This is an auxiliary routine called by DSYTRD.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER
-            Specifies whether the upper or lower triangular part of the
-            symmetric matrix A is stored:
-            = 'U': Upper triangular
-            = 'L': Lower triangular
-
-    N       (input) INTEGER
-            The order of the matrix A.
-
-    NB      (input) INTEGER
-            The number of rows and columns to be reduced.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
-            n-by-n upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading n-by-n lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
-            On exit:
-            if UPLO = 'U', the last NB columns have been reduced to
-              tridiagonal form, with the diagonal elements overwriting
-              the diagonal elements of A; the elements above the diagonal
-              with the array TAU, represent the orthogonal matrix Q as a
-              product of elementary reflectors;
-            if UPLO = 'L', the first NB columns have been reduced to
-              tridiagonal form, with the diagonal elements overwriting
-              the diagonal elements of A; the elements below the diagonal
-              with the array TAU, represent the  orthogonal matrix Q as a
-              product of elementary reflectors.
-            See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= (1,N).
-
-    E       (output) DOUBLE PRECISION array, dimension (N-1)
-            If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
-            elements of the last NB columns of the reduced matrix;
-            if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
-            the first NB columns of the reduced matrix.
-
-    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
-            The scalar factors of the elementary reflectors, stored in
-            TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
-            See Further Details.
-
-    W       (output) DOUBLE PRECISION array, dimension (LDW,NB)
-            The n-by-nb matrix W required to update the unreduced part
-            of A.
-
-    LDW     (input) INTEGER
-            The leading dimension of the array W. LDW >= max(1,N).
-
-    Further Details
-    ===============
-
-    If UPLO = 'U', the matrix Q is represented as a product of elementary
-    reflectors
-
-       Q = H(n) H(n-1) . . . H(n-nb+1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
-    and tau in TAU(i-1).
-
-    If UPLO = 'L', the matrix Q is represented as a product of elementary
-    reflectors
-
-       Q = H(1) H(2) . . . H(nb).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
-    and tau in TAU(i).
-
-    The elements of the vectors v together form the n-by-nb matrix V
-    which is needed, with W, to apply the transformation to the unreduced
-    part of the matrix, using a symmetric rank-2k update of the form:
-    A := A - V*W' - W*V'.
-
-    The contents of A on exit are illustrated by the following examples
-    with n = 5 and nb = 2:
-
-    if UPLO = 'U':                       if UPLO = 'L':
-
-      (  a   a   a   v4  v5 )              (  d                  )
-      (      a   a   v4  v5 )              (  1   d              )
-      (          a   1   v5 )              (  v1  1   a          )
-      (              d   1  )              (  v1  v2  a   a      )
-      (                  d  )              (  v1  v2  a   a   a  )
-
-    where d denotes a diagonal element of the reduced matrix, a denotes
-    an element of the original matrix that is unchanged, and vi denotes
-    an element of the vector defining H(i).
-
-    =====================================================================
-
-
-       Quick return if possible
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --e;
-    --tau;
-    w_dim1 = *ldw;
-    w_offset = 1 + w_dim1;
-    w -= w_offset;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-
-    if (lsame_(uplo, "U")) {
-
-/*        Reduce last NB columns of upper triangle */
-
-	i__1 = *n - *nb + 1;
-	for (i__ = *n; i__ >= i__1; --i__) {
-	    iw = i__ - *n + *nb;
-	    if (i__ < *n) {
-
-/*              Update A(1:i,i) */
-
-		i__2 = *n - i__;
-		dgemv_("No transpose", &i__, &i__2, &c_b3001, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
-			c_b2865, &a[i__ * a_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		dgemv_("No transpose", &i__, &i__2, &c_b3001, &w[(iw + 1) *
-			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b2865, &a[i__ * a_dim1 + 1], &c__1);
-	    }
-	    if (i__ > 1) {
-
-/*
-                Generate elementary reflector H(i) to annihilate
-                A(1:i-2,i)
-*/
-
-		i__2 = i__ - 1;
-		dlarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 +
-			1], &c__1, &tau[i__ - 1]);
-		e[i__ - 1] = a[i__ - 1 + i__ * a_dim1];
-		a[i__ - 1 + i__ * a_dim1] = 1.;
-
-/*              Compute W(1:i-1,i) */
-
-		i__2 = i__ - 1;
-		dsymv_("Upper", &i__2, &c_b2865, &a[a_offset], lda, &a[i__ *
-			a_dim1 + 1], &c__1, &c_b2879, &w[iw * w_dim1 + 1], &
-			c__1);
-		if (i__ < *n) {
-		    i__2 = i__ - 1;
-		    i__3 = *n - i__;
-		    dgemv_("Transpose", &i__2, &i__3, &c_b2865, &w[(iw + 1) *
-			    w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, &
-			    c_b2879, &w[i__ + 1 + iw * w_dim1], &c__1);
-		    i__2 = i__ - 1;
-		    i__3 = *n - i__;
-		    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[(i__ +
-			    1) * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1],
-			    &c__1, &c_b2865, &w[iw * w_dim1 + 1], &c__1);
-		    i__2 = i__ - 1;
-		    i__3 = *n - i__;
-		    dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[(i__ + 1) *
-			     a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, &
-			    c_b2879, &w[i__ + 1 + iw * w_dim1], &c__1);
-		    i__2 = i__ - 1;
-		    i__3 = *n - i__;
-		    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &w[(iw + 1)
-			     * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
-			    c__1, &c_b2865, &w[iw * w_dim1 + 1], &c__1);
-		}
-		i__2 = i__ - 1;
-		dscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
-		i__2 = i__ - 1;
-		alpha = tau[i__ - 1] * -.5 * ddot_(&i__2, &w[iw * w_dim1 + 1],
-			 &c__1, &a[i__ * a_dim1 + 1], &c__1);
-		i__2 = i__ - 1;
-		daxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
-			w_dim1 + 1], &c__1);
-	    }
-
-/* L10: */
-	}
-    } else {
-
-/*        Reduce first NB columns of lower triangle */
-
-	i__1 = *nb;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*           Update A(i:n,i) */
-
-	    i__2 = *n - i__ + 1;
-	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[i__ + a_dim1],
-		    lda, &w[i__ + w_dim1], ldw, &c_b2865, &a[i__ + i__ *
-		    a_dim1], &c__1);
-	    i__2 = *n - i__ + 1;
-	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &w[i__ + w_dim1],
-		    ldw, &a[i__ + a_dim1], lda, &c_b2865, &a[i__ + i__ *
-		    a_dim1], &c__1);
-	    if (i__ < *n) {
-
-/*
-                Generate elementary reflector H(i) to annihilate
-                A(i+2:n,i)
-*/
-
-		i__2 = *n - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) +
-			i__ * a_dim1], &c__1, &tau[i__]);
-		e[i__] = a[i__ + 1 + i__ * a_dim1];
-		a[i__ + 1 + i__ * a_dim1] = 1.;
-
-/*              Compute W(i+1:n,i) */
-
-		i__2 = *n - i__;
-		dsymv_("Lower", &i__2, &c_b2865, &a[i__ + 1 + (i__ + 1) *
-			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b2879, &w[i__ + 1 + i__ * w_dim1], &c__1)
-			;
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &w[i__ + 1 +
-			w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b2879, &w[i__ * w_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[i__ + 1 +
-			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b2865, &
-			w[i__ + 1 + i__ * w_dim1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[i__ + 1 +
-			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b2879, &w[i__ * w_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &w[i__ + 1 +
-			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b2865, &
-			w[i__ + 1 + i__ * w_dim1], &c__1);
-		i__2 = *n - i__;
-		dscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
-		i__2 = *n - i__;
-		alpha = tau[i__] * -.5 * ddot_(&i__2, &w[i__ + 1 + i__ *
-			w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
-		i__2 = *n - i__;
-		daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
-			i__ + 1 + i__ * w_dim1], &c__1);
-	    }
-
-/* L20: */
-	}
-    }
-
-    return 0;
-
-/*     End of DLATRD */
-
-} /* dlatrd_ */
-
-/* Subroutine */ int dlauu2_(char *uplo, integer *n, doublereal *a, integer *
-	lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__;
-    static doublereal aii;
-    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DLAUU2 computes the product U * U' or L' * L, where the triangular
-    factor U or L is stored in the upper or lower triangular part of
-    the array A.
-
-    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
-    overwriting the factor U in A.
-    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
-    overwriting the factor L in A.
-
-    This is the unblocked form of the algorithm, calling Level 2 BLAS.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the triangular factor stored in the array A
-            is upper or lower triangular:
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    N       (input) INTEGER
-            The order of the triangular factor U or L.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the triangular factor U or L.
-            On exit, if UPLO = 'U', the upper triangle of A is
-            overwritten with the upper triangle of the product U * U';
-            if UPLO = 'L', the lower triangle of A is overwritten with
-            the lower triangle of the product L' * L.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAUU2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (upper) {
-
-/*        Compute the product U * U'. */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    aii = a[i__ + i__ * a_dim1];
-	    if (i__ < *n) {
-		i__2 = *n - i__ + 1;
-		a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1],
-			lda, &a[i__ + i__ * a_dim1], lda);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		dgemv_("No transpose", &i__2, &i__3, &c_b2865, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			aii, &a[i__ * a_dim1 + 1], &c__1);
-	    } else {
-		dscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
-	    }
-/* L10: */
-	}
-
-    } else {
-
-/*        Compute the product L' * L. */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    aii = a[i__ + i__ * a_dim1];
-	    if (i__ < *n) {
-		i__2 = *n - i__ + 1;
-		a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1], &
-			c__1, &a[i__ + i__ * a_dim1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[i__ + 1 +
-			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii,
-			 &a[i__ + a_dim1], lda);
-	    } else {
-		dscal_(&i__, &aii, &a[i__ + a_dim1], lda);
-	    }
-/* L20: */
-	}
-    }
-
-    return 0;
-
-/*     End of DLAUU2 */
-
-} /* dlauu2_ */
-
-/* Subroutine */ int dlauum_(char *uplo, integer *n, doublereal *a, integer *
-	lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, ib, nb;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
-	     integer *), dlauu2_(char *, integer *,
-	    doublereal *, integer *, integer *), xerbla_(char *,
-	    integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DLAUUM computes the product U * U' or L' * L, where the triangular
-    factor U or L is stored in the upper or lower triangular part of
-    the array A.
-
-    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
-    overwriting the factor U in A.
-    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
-    overwriting the factor L in A.
-
-    This is the blocked form of the algorithm, calling Level 3 BLAS.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the triangular factor stored in the array A
-            is upper or lower triangular:
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    N       (input) INTEGER
-            The order of the triangular factor U or L.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the triangular factor U or L.
-            On exit, if UPLO = 'U', the upper triangle of A is
-            overwritten with the upper triangle of the product U * U';
-            if UPLO = 'L', the lower triangle of A is overwritten with
-            the lower triangle of the product L' * L.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAUUM", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Determine the block size for this environment. */
-
-    nb = ilaenv_(&c__1, "DLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
-	    ftnlen)1);
-
-    if ((nb <= 1) || (nb >= *n)) {
-
-/*        Use unblocked code */
-
-	dlauu2_(uplo, n, &a[a_offset], lda, info);
-    } else {
-
-/*        Use blocked code */
-
-	if (upper) {
-
-/*           Compute the product U * U'. */
-
-	    i__1 = *n;
-	    i__2 = nb;
-	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-		i__3 = nb, i__4 = *n - i__ + 1;
-		ib = min(i__3,i__4);
-		i__3 = i__ - 1;
-		dtrmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib,
-			&c_b2865, &a[i__ + i__ * a_dim1], lda, &a[i__ *
-			a_dim1 + 1], lda);
-		dlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
-		if (i__ + ib <= *n) {
-		    i__3 = i__ - 1;
-		    i__4 = *n - i__ - ib + 1;
-		    dgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
-			    c_b2865, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__
-			    + (i__ + ib) * a_dim1], lda, &c_b2865, &a[i__ *
-			    a_dim1 + 1], lda);
-		    i__3 = *n - i__ - ib + 1;
-		    dsyrk_("Upper", "No transpose", &ib, &i__3, &c_b2865, &a[
-			    i__ + (i__ + ib) * a_dim1], lda, &c_b2865, &a[i__
-			    + i__ * a_dim1], lda);
-		}
-/* L10: */
-	    }
-	} else {
-
-/*           Compute the product L' * L. */
-
-	    i__2 = *n;
-	    i__1 = nb;
-	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-/* Computing MIN */
-		i__3 = nb, i__4 = *n - i__ + 1;
-		ib = min(i__3,i__4);
-		i__3 = i__ - 1;
-		dtrmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
-			c_b2865, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1]
-			, lda);
-		dlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
-		if (i__ + ib <= *n) {
-		    i__3 = i__ - 1;
-		    i__4 = *n - i__ - ib + 1;
-		    dgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
-			    c_b2865, &a[i__ + ib + i__ * a_dim1], lda, &a[i__
-			    + ib + a_dim1], lda, &c_b2865, &a[i__ + a_dim1],
-			    lda);
-		    i__3 = *n - i__ - ib + 1;
-		    dsyrk_("Lower", "Transpose", &ib, &i__3, &c_b2865, &a[i__
-			    + ib + i__ * a_dim1], lda, &c_b2865, &a[i__ + i__
-			    * a_dim1], lda);
-		}
-/* L20: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DLAUUM */
-
-} /* dlauum_ */
-
-/* Subroutine */ int dorg2r_(integer *m, integer *n, integer *k, doublereal *
-	a, integer *lda, doublereal *tau, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    doublereal d__1;
-
-    /* Local variables */
-    static integer i__, j, l;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *), dlarf_(char *, integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DORG2R generates an m by n real matrix Q with orthonormal columns,
-    which is defined as the first n columns of a product of k elementary
-    reflectors of order m
-
-          Q  =  H(1) H(2) . . . H(k)
-
-    as returned by DGEQRF.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix Q. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix Q. M >= N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines the
-            matrix Q. N >= K >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the i-th column must contain the vector which
-            defines the elementary reflector H(i), for i = 1,2,...,k, as
-            returned by DGEQRF in the first k columns of its array
-            argument A.
-            On exit, the m-by-n matrix Q.
-
-    LDA     (input) INTEGER
-            The first dimension of the array A. LDA >= max(1,M).
-
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEQRF.
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument has an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if ((*n < 0) || (*n > *m)) {
-	*info = -2;
-    } else if ((*k < 0) || (*k > *n)) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORG2R", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n <= 0) {
-	return 0;
-    }
-
-/*     Initialise columns k+1:n to columns of the unit matrix */
-
-    i__1 = *n;
-    for (j = *k + 1; j <= i__1; ++j) {
-	i__2 = *m;
-	for (l = 1; l <= i__2; ++l) {
-	    a[l + j * a_dim1] = 0.;
-/* L10: */
-	}
-	a[j + j * a_dim1] = 1.;
-/* L20: */
-    }
-
-    for (i__ = *k; i__ >= 1; --i__) {
-
-/*        Apply H(i) to A(i:m,i:n) from the left */
-
-	if (i__ < *n) {
-	    a[i__ + i__ * a_dim1] = 1.;
-	    i__1 = *m - i__ + 1;
-	    i__2 = *n - i__;
-	    dlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[
-		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
-	}
-	if (i__ < *m) {
-	    i__1 = *m - i__;
-	    d__1 = -tau[i__];
-	    dscal_(&i__1, &d__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
-	}
-	a[i__ + i__ * a_dim1] = 1. - tau[i__];
-
-/*        Set A(1:i-1,i) to zero */
-
-	i__1 = i__ - 1;
-	for (l = 1; l <= i__1; ++l) {
-	    a[l + i__ * a_dim1] = 0.;
-/* L30: */
-	}
-/* L40: */
-    }
-    return 0;
-
-/*     End of DORG2R */
-
-} /* dorg2r_ */
-
-/* Subroutine */ int dorgbr_(char *vect, integer *m, integer *n, integer *k,
-	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
-	integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, nb, mn;
-    extern logical lsame_(char *, char *);
-    static integer iinfo;
-    static logical wantq;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int dorglq_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    integer *), dorgqr_(integer *, integer *, integer *, doublereal *,
-	     integer *, doublereal *, doublereal *, integer *, integer *);
-    static integer lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DORGBR generates one of the real orthogonal matrices Q or P**T
-    determined by DGEBRD when reducing a real matrix A to bidiagonal
-    form: A = Q * B * P**T.  Q and P**T are defined as products of
-    elementary reflectors H(i) or G(i) respectively.
-
-    If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
-    is of order M:
-    if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n
-    columns of Q, where m >= n >= k;
-    if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an
-    M-by-M matrix.
-
-    If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
-    is of order N:
-    if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m
-    rows of P**T, where n >= m >= k;
-    if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as
-    an N-by-N matrix.
-
-    Arguments
-    =========
-
-    VECT    (input) CHARACTER*1
-            Specifies whether the matrix Q or the matrix P**T is
-            required, as defined in the transformation applied by DGEBRD:
-            = 'Q':  generate Q;
-            = 'P':  generate P**T.
-
-    M       (input) INTEGER
-            The number of rows of the matrix Q or P**T to be returned.
-            M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix Q or P**T to be returned.
-            N >= 0.
-            If VECT = 'Q', M >= N >= min(M,K);
-            if VECT = 'P', N >= M >= min(N,K).
-
-    K       (input) INTEGER
-            If VECT = 'Q', the number of columns in the original M-by-K
-            matrix reduced by DGEBRD.
-            If VECT = 'P', the number of rows in the original K-by-N
-            matrix reduced by DGEBRD.
-            K >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the vectors which define the elementary reflectors,
-            as returned by DGEBRD.
-            On exit, the M-by-N matrix Q or P**T.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A. LDA >= max(1,M).
-
-    TAU     (input) DOUBLE PRECISION array, dimension
-                                  (min(M,K)) if VECT = 'Q'
-                                  (min(N,K)) if VECT = 'P'
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i) or G(i), which determines Q or P**T, as
-            returned by DGEBRD in its array argument TAUQ or TAUP.
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK >= max(1,min(M,N)).
-            For optimum performance LWORK >= min(M,N)*NB, where NB
-            is the optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    wantq = lsame_(vect, "Q");
-    mn = min(*m,*n);
-    lquery = *lwork == -1;
-    if (! wantq && ! lsame_(vect, "P")) {
-	*info = -1;
-    } else if (*m < 0) {
-	*info = -2;
-    } else if (((*n < 0) || (wantq && ((*n > *m) || (*n < min(*m,*k))))) || (!
-	     wantq && ((*m > *n) || (*m < min(*n,*k))))) {
-	*info = -3;
-    } else if (*k < 0) {
-	*info = -4;
-    } else if (*lda < max(1,*m)) {
-	*info = -6;
-    } else if (*lwork < max(1,mn) && ! lquery) {
-	*info = -9;
-    }
-
-    if (*info == 0) {
-	if (wantq) {
-	    nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
-		    ftnlen)1);
-	} else {
-	    nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
-		    ftnlen)1);
-	}
-	lwkopt = max(1,mn) * nb;
-	work[1] = (doublereal) lwkopt;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORGBR", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*m == 0) || (*n == 0)) {
-	work[1] = 1.;
-	return 0;
-    }
-
-    if (wantq) {
-
-/*
-          Form Q, determined by a call to DGEBRD to reduce an m-by-k
-          matrix
-*/
-
-	if (*m >= *k) {
-
-/*           If m >= k, assume m >= n >= k */
-
-	    dorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
-		    iinfo);
-
-	} else {
-
-/*
-             If m < k, assume m = n
-
-             Shift the vectors which define the elementary reflectors one
-             column to the right, and set the first row and column of Q
-             to those of the unit matrix
-*/
-
-	    for (j = *m; j >= 2; --j) {
-		a[j * a_dim1 + 1] = 0.;
-		i__1 = *m;
-		for (i__ = j + 1; i__ <= i__1; ++i__) {
-		    a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1];
-/* L10: */
-		}
-/* L20: */
-	    }
-	    a[a_dim1 + 1] = 1.;
-	    i__1 = *m;
-	    for (i__ = 2; i__ <= i__1; ++i__) {
-		a[i__ + a_dim1] = 0.;
-/* L30: */
-	    }
-	    if (*m > 1) {
-
-/*              Form Q(2:m,2:m) */
-
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		i__3 = *m - 1;
-		dorgqr_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
-			tau[1], &work[1], lwork, &iinfo);
-	    }
-	}
-    } else {
-
-/*
-          Form P', determined by a call to DGEBRD to reduce a k-by-n
-          matrix
-*/
-
-	if (*k < *n) {
-
-/*           If k < n, assume k <= m <= n */
-
-	    dorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
-		    iinfo);
-
-	} else {
-
-/*
-             If k >= n, assume m = n
-
-             Shift the vectors which define the elementary reflectors one
-             row downward, and set the first row and column of P' to
-             those of the unit matrix
-*/
-
-	    a[a_dim1 + 1] = 1.;
-	    i__1 = *n;
-	    for (i__ = 2; i__ <= i__1; ++i__) {
-		a[i__ + a_dim1] = 0.;
-/* L40: */
-	    }
-	    i__1 = *n;
-	    for (j = 2; j <= i__1; ++j) {
-		for (i__ = j - 1; i__ >= 2; --i__) {
-		    a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1];
-/* L50: */
-		}
-		a[j * a_dim1 + 1] = 0.;
-/* L60: */
-	    }
-	    if (*n > 1) {
-
-/*              Form P'(2:n,2:n) */
-
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		i__3 = *n - 1;
-		dorglq_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
-			tau[1], &work[1], lwork, &iinfo);
-	    }
-	}
-    }
-    work[1] = (doublereal) lwkopt;
-    return 0;
-
-/*     End of DORGBR */
-
-} /* dorgbr_ */
-
-/* Subroutine */ int dorghr_(integer *n, integer *ilo, integer *ihi,
-	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
-	integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, nb, nh, iinfo;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    integer *);
-    static integer lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DORGHR generates a real orthogonal matrix Q which is defined as the
-    product of IHI-ILO elementary reflectors of order N, as returned by
-    DGEHRD:
-
-    Q = H(ilo) H(ilo+1) . . . H(ihi-1).
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The order of the matrix Q. N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            ILO and IHI must have the same values as in the previous call
-            of DGEHRD. Q is equal to the unit matrix except in the
-            submatrix Q(ilo+1:ihi,ilo+1:ihi).
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the vectors which define the elementary reflectors,
-            as returned by DGEHRD.
-            On exit, the N-by-N orthogonal matrix Q.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A. LDA >= max(1,N).
-
-    TAU     (input) DOUBLE PRECISION array, dimension (N-1)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEHRD.
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK >= IHI-ILO.
-            For optimum performance LWORK >= (IHI-ILO)*NB, where NB is
-            the optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    nh = *ihi - *ilo;
-    lquery = *lwork == -1;
-    if (*n < 0) {
-	*info = -1;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
-	*info = -2;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*lwork < max(1,nh) && ! lquery) {
-	*info = -8;
-    }
-
-    if (*info == 0) {
-	nb = ilaenv_(&c__1, "DORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (
-		ftnlen)1);
-	lwkopt = max(1,nh) * nb;
-	work[1] = (doublereal) lwkopt;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORGHR", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	work[1] = 1.;
-	return 0;
-    }
-
-/*
-       Shift the vectors which define the elementary reflectors one
-       column to the right, and set the first ilo and the last n-ihi
-       rows and columns to those of the unit matrix
-*/
-
-    i__1 = *ilo + 1;
-    for (j = *ihi; j >= i__1; --j) {
-	i__2 = j - 1;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = 0.;
-/* L10: */
-	}
-	i__2 = *ihi;
-	for (i__ = j + 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1];
-/* L20: */
-	}
-	i__2 = *n;
-	for (i__ = *ihi + 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = 0.;
-/* L30: */
-	}
-/* L40: */
-    }
-    i__1 = *ilo;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *n;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = 0.;
-/* L50: */
-	}
-	a[j + j * a_dim1] = 1.;
-/* L60: */
-    }
-    i__1 = *n;
-    for (j = *ihi + 1; j <= i__1; ++j) {
-	i__2 = *n;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = 0.;
-/* L70: */
-	}
-	a[j + j * a_dim1] = 1.;
-/* L80: */
-    }
-
-    if (nh > 0) {
-
-/*        Generate Q(ilo+1:ihi,ilo+1:ihi) */
-
-	dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*
-		ilo], &work[1], lwork, &iinfo);
-    }
-    work[1] = (doublereal) lwkopt;
-    return 0;
-
-/*     End of DORGHR */
-
-} /* dorghr_ */
-
-/* Subroutine */ int dorgl2_(integer *m, integer *n, integer *k, doublereal *
-	a, integer *lda, doublereal *tau, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    doublereal d__1;
-
-    /* Local variables */
-    static integer i__, j, l;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *), dlarf_(char *, integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DORGL2 generates an m by n real matrix Q with orthonormal rows,
-    which is defined as the first m rows of a product of k elementary
-    reflectors of order n
-
-          Q  =  H(k) . . . H(2) H(1)
-
-    as returned by DGELQF.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix Q. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix Q. N >= M.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines the
-            matrix Q. M >= K >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the i-th row must contain the vector which defines
-            the elementary reflector H(i), for i = 1,2,...,k, as returned
-            by DGELQF in the first k rows of its array argument A.
-            On exit, the m-by-n matrix Q.
-
-    LDA     (input) INTEGER
-            The first dimension of the array A. LDA >= max(1,M).
-
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGELQF.
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (M)
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument has an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < *m) {
-	*info = -2;
-    } else if ((*k < 0) || (*k > *m)) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORGL2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*m <= 0) {
-	return 0;
-    }
-
-    if (*k < *m) {
-
-/*        Initialise rows k+1:m to rows of the unit matrix */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (l = *k + 1; l <= i__2; ++l) {
-		a[l + j * a_dim1] = 0.;
-/* L10: */
-	    }
-	    if (j > *k && j <= *m) {
-		a[j + j * a_dim1] = 1.;
-	    }
-/* L20: */
-	}
-    }
-
-    for (i__ = *k; i__ >= 1; --i__) {
-
-/*        Apply H(i) to A(i:m,i:n) from the right */
-
-	if (i__ < *n) {
-	    if (i__ < *m) {
-		a[i__ + i__ * a_dim1] = 1.;
-		i__1 = *m - i__;
-		i__2 = *n - i__ + 1;
-		dlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &
-			tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
-	    }
-	    i__1 = *n - i__;
-	    d__1 = -tau[i__];
-	    dscal_(&i__1, &d__1, &a[i__ + (i__ + 1) * a_dim1], lda);
-	}
-	a[i__ + i__ * a_dim1] = 1. - tau[i__];
-
-/*        Set A(i,1:i-1) to zero */
-
-	i__1 = i__ - 1;
-	for (l = 1; l <= i__1; ++l) {
-	    a[i__ + l * a_dim1] = 0.;
-/* L30: */
-	}
-/* L40: */
-    }
-    return 0;
-
-/*     End of DORGL2 */
-
-} /* dorgl2_ */
-
-/* Subroutine */ int dorglq_(integer *m, integer *n, integer *k, doublereal *
-	a, integer *lda, doublereal *tau, doublereal *work, integer *lwork,
-	integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int dorgl2_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *),
-	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DORGLQ generates an M-by-N real matrix Q with orthonormal rows,
-    which is defined as the first M rows of a product of K elementary
-    reflectors of order N
-
-          Q  =  H(k) . . . H(2) H(1)
-
-    as returned by DGELQF.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix Q. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix Q. N >= M.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines the
-            matrix Q. M >= K >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the i-th row must contain the vector which defines
-            the elementary reflector H(i), for i = 1,2,...,k, as returned
-            by DGELQF in the first k rows of its array argument A.
-            On exit, the M-by-N matrix Q.
-
-    LDA     (input) INTEGER
-            The first dimension of the array A. LDA >= max(1,M).
-
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGELQF.
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK >= max(1,M).
-            For optimum performance LWORK >= M*NB, where NB is
-            the optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument has an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
-    lwkopt = max(1,*m) * nb;
-    work[1] = (doublereal) lwkopt;
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < *m) {
-	*info = -2;
-    } else if ((*k < 0) || (*k > *m)) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    } else if (*lwork < max(1,*m) && ! lquery) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORGLQ", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*m <= 0) {
-	work[1] = 1.;
-	return 0;
-    }
-
-    nbmin = 2;
-    nx = 0;
-    iws = *m;
-    if (nb > 1 && nb < *k) {
-
-/*
-          Determine when to cross over from blocked to unblocked code.
-
-   Computing MAX
-*/
-	i__1 = 0, i__2 = ilaenv_(&c__3, "DORGLQ", " ", m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < *k) {
-
-/*           Determine if workspace is large enough for blocked code. */
-
-	    ldwork = *m;
-	    iws = ldwork * nb;
-	    if (*lwork < iws) {
-
-/*
-                Not enough workspace to use optimal NB:  reduce NB and
-                determine the minimum value of NB.
-*/
-
-		nb = *lwork / ldwork;
-/* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "DORGLQ", " ", m, n, k, &c_n1,
-			 (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-	    }
-	}
-    }
-
-    if (nb >= nbmin && nb < *k && nx < *k) {
-
-/*
-          Use blocked code after the last block.
-          The first kk rows are handled by the block method.
-*/
-
-	ki = (*k - nx - 1) / nb * nb;
-/* Computing MIN */
-	i__1 = *k, i__2 = ki + nb;
-	kk = min(i__1,i__2);
-
-/*        Set A(kk+1:m,1:kk) to zero. */
-
-	i__1 = kk;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = kk + 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] = 0.;
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else {
-	kk = 0;
-    }
-
-/*     Use unblocked code for the last or only block. */
-
-    if (kk < *m) {
-	i__1 = *m - kk;
-	i__2 = *n - kk;
-	i__3 = *k - kk;
-	dorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
-		tau[kk + 1], &work[1], &iinfo);
-    }
-
-    if (kk > 0) {
-
-/*        Use blocked code */
-
-	i__1 = -nb;
-	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
-/* Computing MIN */
-	    i__2 = nb, i__3 = *k - i__ + 1;
-	    ib = min(i__2,i__3);
-	    if (i__ + ib <= *m) {
-
-/*
-                Form the triangular factor of the block reflector
-                H = H(i) H(i+1) . . . H(i+ib-1)
-*/
-
-		i__2 = *n - i__ + 1;
-		dlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ *
-			a_dim1], lda, &tau[i__], &work[1], &ldwork);
-
-/*              Apply H' to A(i+ib:m,i:n) from the right */
-
-		i__2 = *m - i__ - ib + 1;
-		i__3 = *n - i__ + 1;
-		dlarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, &
-			i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
-			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
-			1], &ldwork);
-	    }
-
-/*           Apply H' to columns i:n of current block */
-
-	    i__2 = *n - i__ + 1;
-	    dorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
-		    work[1], &iinfo);
-
-/*           Set columns 1:i-1 of current block to zero */
-
-	    i__2 = i__ - 1;
-	    for (j = 1; j <= i__2; ++j) {
-		i__3 = i__ + ib - 1;
-		for (l = i__; l <= i__3; ++l) {
-		    a[l + j * a_dim1] = 0.;
-/* L30: */
-		}
-/* L40: */
-	    }
-/* L50: */
-	}
-    }
-
-    work[1] = (doublereal) iws;
-    return 0;
-
-/*     End of DORGLQ */
-
-} /* dorglq_ */
-
-/* Subroutine */ int dorgqr_(integer *m, integer *n, integer *k, doublereal *
-	a, integer *lda, doublereal *tau, doublereal *work, integer *lwork,
-	integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int dorg2r_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *),
-	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DORGQR generates an M-by-N real matrix Q with orthonormal columns,
-    which is defined as the first N columns of a product of K elementary
-    reflectors of order M
-
-          Q  =  H(1) H(2) . . . H(k)
-
-    as returned by DGEQRF.
-
-    Arguments
-    =========
-
-    M       (input) INTEGER
-            The number of rows of the matrix Q. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix Q. M >= N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines the
-            matrix Q. N >= K >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the i-th column must contain the vector which
-            defines the elementary reflector H(i), for i = 1,2,...,k, as
-            returned by DGEQRF in the first k columns of its array
-            argument A.
-            On exit, the M-by-N matrix Q.
-
-    LDA     (input) INTEGER
-            The first dimension of the array A. LDA >= max(1,M).
-
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEQRF.
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK >= max(1,N).
-            For optimum performance LWORK >= N*NB, where NB is the
-            optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument has an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
-    lwkopt = max(1,*n) * nb;
-    work[1] = (doublereal) lwkopt;
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if ((*n < 0) || (*n > *m)) {
-	*info = -2;
-    } else if ((*k < 0) || (*k > *n)) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    } else if (*lwork < max(1,*n) && ! lquery) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORGQR", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n <= 0) {
-	work[1] = 1.;
-	return 0;
-    }
-
-    nbmin = 2;
-    nx = 0;
-    iws = *n;
-    if (nb > 1 && nb < *k) {
-
-/*
-          Determine when to cross over from blocked to unblocked code.
-
-   Computing MAX
-*/
-	i__1 = 0, i__2 = ilaenv_(&c__3, "DORGQR", " ", m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < *k) {
-
-/*           Determine if workspace is large enough for blocked code. */
-
-	    ldwork = *n;
-	    iws = ldwork * nb;
-	    if (*lwork < iws) {
-
-/*
-                Not enough workspace to use optimal NB:  reduce NB and
-                determine the minimum value of NB.
-*/
-
-		nb = *lwork / ldwork;
-/* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "DORGQR", " ", m, n, k, &c_n1,
-			 (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-	    }
-	}
-    }
-
-    if (nb >= nbmin && nb < *k && nx < *k) {
-
-/*
-          Use blocked code after the last block.
-          The first kk columns are handled by the block method.
-*/
-
-	ki = (*k - nx - 1) / nb * nb;
-/* Computing MIN */
-	i__1 = *k, i__2 = ki + nb;
-	kk = min(i__1,i__2);
-
-/*        Set A(1:kk,kk+1:n) to zero. */
-
-	i__1 = *n;
-	for (j = kk + 1; j <= i__1; ++j) {
-	    i__2 = kk;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] = 0.;
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else {
-	kk = 0;
-    }
-
-/*     Use unblocked code for the last or only block. */
-
-    if (kk < *n) {
-	i__1 = *m - kk;
-	i__2 = *n - kk;
-	i__3 = *k - kk;
-	dorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
-		tau[kk + 1], &work[1], &iinfo);
-    }
-
-    if (kk > 0) {
-
-/*        Use blocked code */
-
-	i__1 = -nb;
-	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
-/* Computing MIN */
-	    i__2 = nb, i__3 = *k - i__ + 1;
-	    ib = min(i__2,i__3);
-	    if (i__ + ib <= *n) {
-
-/*
-                Form the triangular factor of the block reflector
-                H = H(i) H(i+1) . . . H(i+ib-1)
-*/
-
-		i__2 = *m - i__ + 1;
-		dlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ *
-			a_dim1], lda, &tau[i__], &work[1], &ldwork);
-
-/*              Apply H to A(i:m,i+ib:n) from the left */
-
-		i__2 = *m - i__ + 1;
-		i__3 = *n - i__ - ib + 1;
-		dlarfb_("Left", "No transpose", "Forward", "Columnwise", &
-			i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
-			1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &
-			work[ib + 1], &ldwork);
-	    }
-
-/*           Apply H to rows i:m of current block */
-
-	    i__2 = *m - i__ + 1;
-	    dorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
-		    work[1], &iinfo);
-
-/*           Set rows 1:i-1 of current block to zero */
-
-	    i__2 = i__ + ib - 1;
-	    for (j = i__; j <= i__2; ++j) {
-		i__3 = i__ - 1;
-		for (l = 1; l <= i__3; ++l) {
-		    a[l + j * a_dim1] = 0.;
-/* L30: */
-		}
-/* L40: */
-	    }
-/* L50: */
-	}
-    }
-
-    work[1] = (doublereal) iws;
-    return 0;
-
-/*     End of DORGQR */
-
-} /* dorgqr_ */
-
-/* Subroutine */ int dorm2l_(char *side, char *trans, integer *m, integer *n,
-	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, i1, i2, i3, mi, ni, nq;
-    static doublereal aii;
-    static logical left;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical notran;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DORM2L overwrites the general real m by n matrix C with
-
-          Q * C  if SIDE = 'L' and TRANS = 'N', or
-
-          Q'* C  if SIDE = 'L' and TRANS = 'T', or
-
-          C * Q  if SIDE = 'R' and TRANS = 'N', or
-
-          C * Q' if SIDE = 'R' and TRANS = 'T',
-
-    where Q is a real orthogonal matrix defined as the product of k
-    elementary reflectors
-
-          Q = H(k) . . . H(2) H(1)
-
-    as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n
-    if SIDE = 'R'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q' from the Left
-            = 'R': apply Q or Q' from the Right
-
-    TRANS   (input) CHARACTER*1
-            = 'N': apply Q  (No transpose)
-            = 'T': apply Q' (Transpose)
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines
-            the matrix Q.
-            If SIDE = 'L', M >= K >= 0;
-            if SIDE = 'R', N >= K >= 0.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
-            The i-th column must contain the vector which defines the
-            elementary reflector H(i), for i = 1,2,...,k, as returned by
-            DGEQLF in the last k columns of its array argument A.
-            A is modified by the routine but restored on exit.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-            If SIDE = 'L', LDA >= max(1,M);
-            if SIDE = 'R', LDA >= max(1,N).
-
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEQLF.
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the m by n matrix C.
-            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension
-                                     (N) if SIDE = 'L',
-                                     (M) if SIDE = 'R'
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-
-/*     NQ is the order of Q */
-
-    if (left) {
-	nq = *m;
-    } else {
-	nq = *n;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "T")) {
-	*info = -2;
-    } else if (*m < 0) {
-	*info = -3;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
-	*info = -5;
-    } else if (*lda < max(1,nq)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORM2L", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	return 0;
-    }
-
-    if ((left && notran) || (! left && ! notran)) {
-	i1 = 1;
-	i2 = *k;
-	i3 = 1;
-    } else {
-	i1 = *k;
-	i2 = 1;
-	i3 = -1;
-    }
-
-    if (left) {
-	ni = *n;
-    } else {
-	mi = *m;
-    }
-
-    i__1 = i2;
-    i__2 = i3;
-    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	if (left) {
-
-/*           H(i) is applied to C(1:m-k+i,1:n) */
-
-	    mi = *m - *k + i__;
-	} else {
-
-/*           H(i) is applied to C(1:m,1:n-k+i) */
-
-	    ni = *n - *k + i__;
-	}
-
-/*        Apply H(i) */
-
-	aii = a[nq - *k + i__ + i__ * a_dim1];
-	a[nq - *k + i__ + i__ * a_dim1] = 1.;
-	dlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[
-		c_offset], ldc, &work[1]);
-	a[nq - *k + i__ + i__ * a_dim1] = aii;
-/* L10: */
-    }
-    return 0;
-
-/*     End of DORM2L */
-
-} /* dorm2l_ */
-
-/* Subroutine */ int dorm2r_(char *side, char *trans, integer *m, integer *n,
-	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
-    static doublereal aii;
-    static logical left;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical notran;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DORM2R overwrites the general real m by n matrix C with
-
-          Q * C  if SIDE = 'L' and TRANS = 'N', or
-
-          Q'* C  if SIDE = 'L' and TRANS = 'T', or
-
-          C * Q  if SIDE = 'R' and TRANS = 'N', or
-
-          C * Q' if SIDE = 'R' and TRANS = 'T',
-
-    where Q is a real orthogonal matrix defined as the product of k
-    elementary reflectors
-
-          Q = H(1) H(2) . . . H(k)
-
-    as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
-    if SIDE = 'R'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q' from the Left
-            = 'R': apply Q or Q' from the Right
-
-    TRANS   (input) CHARACTER*1
-            = 'N': apply Q  (No transpose)
-            = 'T': apply Q' (Transpose)
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines
-            the matrix Q.
-            If SIDE = 'L', M >= K >= 0;
-            if SIDE = 'R', N >= K >= 0.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
-            The i-th column must contain the vector which defines the
-            elementary reflector H(i), for i = 1,2,...,k, as returned by
-            DGEQRF in the first k columns of its array argument A.
-            A is modified by the routine but restored on exit.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-            If SIDE = 'L', LDA >= max(1,M);
-            if SIDE = 'R', LDA >= max(1,N).
-
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEQRF.
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the m by n matrix C.
-            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension
-                                     (N) if SIDE = 'L',
-                                     (M) if SIDE = 'R'
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-
-/*     NQ is the order of Q */
-
-    if (left) {
-	nq = *m;
-    } else {
-	nq = *n;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "T")) {
-	*info = -2;
-    } else if (*m < 0) {
-	*info = -3;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
-	*info = -5;
-    } else if (*lda < max(1,nq)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORM2R", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	return 0;
-    }
-
-    if ((left && ! notran) || (! left && notran)) {
-	i1 = 1;
-	i2 = *k;
-	i3 = 1;
-    } else {
-	i1 = *k;
-	i2 = 1;
-	i3 = -1;
-    }
-
-    if (left) {
-	ni = *n;
-	jc = 1;
-    } else {
-	mi = *m;
-	ic = 1;
-    }
-
-    i__1 = i2;
-    i__2 = i3;
-    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	if (left) {
-
-/*           H(i) is applied to C(i:m,1:n) */
-
-	    mi = *m - i__ + 1;
-	    ic = i__;
-	} else {
-
-/*           H(i) is applied to C(1:m,i:n) */
-
-	    ni = *n - i__ + 1;
-	    jc = i__;
-	}
-
-/*        Apply H(i) */
-
-	aii = a[i__ + i__ * a_dim1];
-	a[i__ + i__ * a_dim1] = 1.;
-	dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[
-		ic + jc * c_dim1], ldc, &work[1]);
-	a[i__ + i__ * a_dim1] = aii;
-/* L10: */
-    }
-    return 0;
-
-/*     End of DORM2R */
-
-} /* dorm2r_ */
-
-/* Subroutine */ int dormbr_(char *vect, char *side, char *trans, integer *m,
-	integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau,
-	doublereal *c__, integer *ldc, doublereal *work, integer *lwork,
-	integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2];
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer i1, i2, nb, mi, ni, nq, nw;
-    static logical left;
-    extern logical lsame_(char *, char *);
-    static integer iinfo;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, integer *);
-    static logical notran;
-    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, integer *);
-    static logical applyq;
-    static char transt[1];
-    static integer lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C
-    with
-                    SIDE = 'L'     SIDE = 'R'
-    TRANS = 'N':      Q * C          C * Q
-    TRANS = 'T':      Q**T * C       C * Q**T
-
-    If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C
-    with
-                    SIDE = 'L'     SIDE = 'R'
-    TRANS = 'N':      P * C          C * P
-    TRANS = 'T':      P**T * C       C * P**T
-
-    Here Q and P**T are the orthogonal matrices determined by DGEBRD when
-    reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
-    P**T are defined as products of elementary reflectors H(i) and G(i)
-    respectively.
-
-    Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
-    order of the orthogonal matrix Q or P**T that is applied.
-
-    If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
-    if nq >= k, Q = H(1) H(2) . . . H(k);
-    if nq < k, Q = H(1) H(2) . . . H(nq-1).
-
-    If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
-    if k < nq, P = G(1) G(2) . . . G(k);
-    if k >= nq, P = G(1) G(2) . . . G(nq-1).
-
-    Arguments
-    =========
-
-    VECT    (input) CHARACTER*1
-            = 'Q': apply Q or Q**T;
-            = 'P': apply P or P**T.
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q, Q**T, P or P**T from the Left;
-            = 'R': apply Q, Q**T, P or P**T from the Right.
-
-    TRANS   (input) CHARACTER*1
-            = 'N':  No transpose, apply Q  or P;
-            = 'T':  Transpose, apply Q**T or P**T.
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            If VECT = 'Q', the number of columns in the original
-            matrix reduced by DGEBRD.
-            If VECT = 'P', the number of rows in the original
-            matrix reduced by DGEBRD.
-            K >= 0.
-
-    A       (input) DOUBLE PRECISION array, dimension
-                                  (LDA,min(nq,K)) if VECT = 'Q'
-                                  (LDA,nq)        if VECT = 'P'
-            The vectors which define the elementary reflectors H(i) and
-            G(i), whose products determine the matrices Q and P, as
-            returned by DGEBRD.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-            If VECT = 'Q', LDA >= max(1,nq);
-            if VECT = 'P', LDA >= max(1,min(nq,K)).
-
-    TAU     (input) DOUBLE PRECISION array, dimension (min(nq,K))
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i) or G(i) which determines Q or P, as returned
-            by DGEBRD in the array argument TAUQ or TAUP.
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the M-by-N matrix C.
-            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q
-            or P*C or P**T*C or C*P or C*P**T.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If SIDE = 'L', LWORK >= max(1,N);
-            if SIDE = 'R', LWORK >= max(1,M).
-            For optimum performance LWORK >= N*NB if SIDE = 'L', and
-            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-            blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    applyq = lsame_(vect, "Q");
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-    lquery = *lwork == -1;
-
-/*     NQ is the order of Q or P and NW is the minimum dimension of WORK */
-
-    if (left) {
-	nq = *m;
-	nw = *n;
-    } else {
-	nq = *n;
-	nw = *m;
-    }
-    if (! applyq && ! lsame_(vect, "P")) {
-	*info = -1;
-    } else if (! left && ! lsame_(side, "R")) {
-	*info = -2;
-    } else if (! notran && ! lsame_(trans, "T")) {
-	*info = -3;
-    } else if (*m < 0) {
-	*info = -4;
-    } else if (*n < 0) {
-	*info = -5;
-    } else if (*k < 0) {
-	*info = -6;
-    } else /* if(complicated condition) */ {
-/* Computing MAX */
-	i__1 = 1, i__2 = min(nq,*k);
-	if ((applyq && *lda < max(1,nq)) || (! applyq && *lda < max(i__1,i__2)
-		)) {
-	    *info = -8;
-	} else if (*ldc < max(1,*m)) {
-	    *info = -11;
-	} else if (*lwork < max(1,nw) && ! lquery) {
-	    *info = -13;
-	}
-    }
-
-    if (*info == 0) {
-	if (applyq) {
-	    if (left) {
-/* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__1, n, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    } else {
-/* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__1, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    }
-	} else {
-	    if (left) {
-/* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		nb = ilaenv_(&c__1, "DORMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    } else {
-/* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		nb = ilaenv_(&c__1, "DORMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    }
-	}
-	lwkopt = max(1,nw) * nb;
-	work[1] = (doublereal) lwkopt;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORMBR", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    work[1] = 1.;
-    if ((*m == 0) || (*n == 0)) {
-	return 0;
-    }
-
-    if (applyq) {
-
-/*        Apply Q */
-
-	if (nq >= *k) {
-
-/*           Q was determined by a call to DGEBRD with nq >= k */
-
-	    dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
-		    c_offset], ldc, &work[1], lwork, &iinfo);
-	} else if (nq > 1) {
-
-/*           Q was determined by a call to DGEBRD with nq < k */
-
-	    if (left) {
-		mi = *m - 1;
-		ni = *n;
-		i1 = 2;
-		i2 = 1;
-	    } else {
-		mi = *m;
-		ni = *n - 1;
-		i1 = 1;
-		i2 = 2;
-	    }
-	    i__1 = nq - 1;
-	    dormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1]
-		    , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
-	}
-    } else {
-
-/*        Apply P */
-
-	if (notran) {
-	    *(unsigned char *)transt = 'T';
-	} else {
-	    *(unsigned char *)transt = 'N';
-	}
-	if (nq > *k) {
-
-/*           P was determined by a call to DGEBRD with nq > k */
-
-	    dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
-		    c_offset], ldc, &work[1], lwork, &iinfo);
-	} else if (nq > 1) {
-
-/*           P was determined by a call to DGEBRD with nq <= k */
-
-	    if (left) {
-		mi = *m - 1;
-		ni = *n;
-		i1 = 2;
-		i2 = 1;
-	    } else {
-		mi = *m;
-		ni = *n - 1;
-		i1 = 1;
-		i2 = 2;
-	    }
-	    i__1 = nq - 1;
-	    dormlq_(side, transt, &mi, &ni, &i__1, &a[((a_dim1) << (1)) + 1],
-		    lda, &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1],
-		    lwork, &iinfo);
-	}
-    }
-    work[1] = (doublereal) lwkopt;
-    return 0;
-
-/*     End of DORMBR */
-
-} /* dormbr_ */
-
-/* Subroutine */ int dorml2_(char *side, char *trans, integer *m, integer *n,
-	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
-    static doublereal aii;
-    static logical left;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical notran;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DORML2 overwrites the general real m by n matrix C with
-
-          Q * C  if SIDE = 'L' and TRANS = 'N', or
-
-          Q'* C  if SIDE = 'L' and TRANS = 'T', or
-
-          C * Q  if SIDE = 'R' and TRANS = 'N', or
-
-          C * Q' if SIDE = 'R' and TRANS = 'T',
-
-    where Q is a real orthogonal matrix defined as the product of k
-    elementary reflectors
-
-          Q = H(k) . . . H(2) H(1)
-
-    as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n
-    if SIDE = 'R'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q' from the Left
-            = 'R': apply Q or Q' from the Right
-
-    TRANS   (input) CHARACTER*1
-            = 'N': apply Q  (No transpose)
-            = 'T': apply Q' (Transpose)
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines
-            the matrix Q.
-            If SIDE = 'L', M >= K >= 0;
-            if SIDE = 'R', N >= K >= 0.
-
-    A       (input) DOUBLE PRECISION array, dimension
-                                 (LDA,M) if SIDE = 'L',
-                                 (LDA,N) if SIDE = 'R'
-            The i-th row must contain the vector which defines the
-            elementary reflector H(i), for i = 1,2,...,k, as returned by
-            DGELQF in the first k rows of its array argument A.
-            A is modified by the routine but restored on exit.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A. LDA >= max(1,K).
-
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGELQF.
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the m by n matrix C.
-            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension
-                                     (N) if SIDE = 'L',
-                                     (M) if SIDE = 'R'
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-
-/*     NQ is the order of Q */
-
-    if (left) {
-	nq = *m;
-    } else {
-	nq = *n;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "T")) {
-	*info = -2;
-    } else if (*m < 0) {
-	*info = -3;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
-	*info = -5;
-    } else if (*lda < max(1,*k)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORML2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	return 0;
-    }
-
-    if ((left && notran) || (! left && ! notran)) {
-	i1 = 1;
-	i2 = *k;
-	i3 = 1;
-    } else {
-	i1 = *k;
-	i2 = 1;
-	i3 = -1;
-    }
-
-    if (left) {
-	ni = *n;
-	jc = 1;
-    } else {
-	mi = *m;
-	ic = 1;
-    }
-
-    i__1 = i2;
-    i__2 = i3;
-    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	if (left) {
-
-/*           H(i) is applied to C(i:m,1:n) */
-
-	    mi = *m - i__ + 1;
-	    ic = i__;
-	} else {
-
-/*           H(i) is applied to C(1:m,i:n) */
-
-	    ni = *n - i__ + 1;
-	    jc = i__;
-	}
-
-/*        Apply H(i) */
-
-	aii = a[i__ + i__ * a_dim1];
-	a[i__ + i__ * a_dim1] = 1.;
-	dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[
-		ic + jc * c_dim1], ldc, &work[1]);
-	a[i__ + i__ * a_dim1] = aii;
-/* L10: */
-    }
-    return 0;
-
-/*     End of DORML2 */
-
-} /* dorml2_ */
-
-/* Subroutine */ int dormlq_(char *side, char *trans, integer *m, integer *n,
-	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
-	    i__5;
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer i__;
-    static doublereal t[4160]	/* was [65][64] */;
-    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
-    static logical left;
-    extern logical lsame_(char *, char *);
-    static integer nbmin, iinfo;
-    extern /* Subroutine */ int dorml2_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *), dlarfb_(char
-	    *, char *, char *, char *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static logical notran;
-    static integer ldwork;
-    static char transt[1];
-    static integer lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DORMLQ overwrites the general real M-by-N matrix C with
-
-                    SIDE = 'L'     SIDE = 'R'
-    TRANS = 'N':      Q * C          C * Q
-    TRANS = 'T':      Q**T * C       C * Q**T
-
-    where Q is a real orthogonal matrix defined as the product of k
-    elementary reflectors
-
-          Q = H(k) . . . H(2) H(1)
-
-    as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N
-    if SIDE = 'R'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q**T from the Left;
-            = 'R': apply Q or Q**T from the Right.
-
-    TRANS   (input) CHARACTER*1
-            = 'N':  No transpose, apply Q;
-            = 'T':  Transpose, apply Q**T.
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines
-            the matrix Q.
-            If SIDE = 'L', M >= K >= 0;
-            if SIDE = 'R', N >= K >= 0.
-
-    A       (input) DOUBLE PRECISION array, dimension
-                                 (LDA,M) if SIDE = 'L',
-                                 (LDA,N) if SIDE = 'R'
-            The i-th row must contain the vector which defines the
-            elementary reflector H(i), for i = 1,2,...,k, as returned by
-            DGELQF in the first k rows of its array argument A.
-            A is modified by the routine but restored on exit.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A. LDA >= max(1,K).
-
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGELQF.
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the M-by-N matrix C.
-            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If SIDE = 'L', LWORK >= max(1,N);
-            if SIDE = 'R', LWORK >= max(1,M).
-            For optimum performance LWORK >= N*NB if SIDE = 'L', and
-            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-            blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-    lquery = *lwork == -1;
-
-/*     NQ is the order of Q and NW is the minimum dimension of WORK */
-
-    if (left) {
-	nq = *m;
-	nw = *n;
-    } else {
-	nq = *n;
-	nw = *m;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "T")) {
-	*info = -2;
-    } else if (*m < 0) {
-	*info = -3;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
-	*info = -5;
-    } else if (*lda < max(1,*k)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    } else if (*lwork < max(1,nw) && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
-
-/*
-          Determine the block size.  NB may be at most NBMAX, where NBMAX
-          is used to define the local array T.
-
-   Computing MIN
-   Writing concatenation
-*/
-	i__3[0] = 1, a__1[0] = side;
-	i__3[1] = 1, a__1[1] = trans;
-	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)2);
-	nb = min(i__1,i__2);
-	lwkopt = max(1,nw) * nb;
-	work[1] = (doublereal) lwkopt;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORMLQ", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	work[1] = 1.;
-	return 0;
-    }
-
-    nbmin = 2;
-    ldwork = nw;
-    if (nb > 1 && nb < *k) {
-	iws = nw * nb;
-	if (*lwork < iws) {
-	    nb = *lwork / ldwork;
-/*
-   Computing MAX
-   Writing concatenation
-*/
-	    i__3[0] = 1, a__1[0] = side;
-	    i__3[1] = 1, a__1[1] = trans;
-	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1, (
-		    ftnlen)6, (ftnlen)2);
-	    nbmin = max(i__1,i__2);
-	}
-    } else {
-	iws = nw;
-    }
-
-    if ((nb < nbmin) || (nb >= *k)) {
-
-/*        Use unblocked code */
-
-	dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
-		c_offset], ldc, &work[1], &iinfo);
-    } else {
-
-/*        Use blocked code */
-
-	if ((left && notran) || (! left && ! notran)) {
-	    i1 = 1;
-	    i2 = *k;
-	    i3 = nb;
-	} else {
-	    i1 = (*k - 1) / nb * nb + 1;
-	    i2 = 1;
-	    i3 = -nb;
-	}
-
-	if (left) {
-	    ni = *n;
-	    jc = 1;
-	} else {
-	    mi = *m;
-	    ic = 1;
-	}
-
-	if (notran) {
-	    *(unsigned char *)transt = 'T';
-	} else {
-	    *(unsigned char *)transt = 'N';
-	}
-
-	i__1 = i2;
-	i__2 = i3;
-	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__4 = nb, i__5 = *k - i__ + 1;
-	    ib = min(i__4,i__5);
-
-/*
-             Form the triangular factor of the block reflector
-             H = H(i) H(i+1) . . . H(i+ib-1)
-*/
-
-	    i__4 = nq - i__ + 1;
-	    dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1],
-		    lda, &tau[i__], t, &c__65);
-	    if (left) {
-
-/*              H or H' is applied to C(i:m,1:n) */
-
-		mi = *m - i__ + 1;
-		ic = i__;
-	    } else {
-
-/*              H or H' is applied to C(1:m,i:n) */
-
-		ni = *n - i__ + 1;
-		jc = i__;
-	    }
-
-/*           Apply H or H' */
-
-	    dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__
-		    + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1],
-		    ldc, &work[1], &ldwork);
-/* L10: */
-	}
-    }
-    work[1] = (doublereal) lwkopt;
-    return 0;
-
-/*     End of DORMLQ */
-
-} /* dormlq_ */
-
-/* Subroutine */ int dormql_(char *side, char *trans, integer *m, integer *n,
-	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
-	    i__5;
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer i__;
-    static doublereal t[4160]	/* was [65][64] */;
-    static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws;
-    static logical left;
-    extern logical lsame_(char *, char *);
-    static integer nbmin, iinfo;
-    extern /* Subroutine */ int dorm2l_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *), dlarfb_(char
-	    *, char *, char *, char *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static logical notran;
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DORMQL overwrites the general real M-by-N matrix C with
-
-                    SIDE = 'L'     SIDE = 'R'
-    TRANS = 'N':      Q * C          C * Q
-    TRANS = 'T':      Q**T * C       C * Q**T
-
-    where Q is a real orthogonal matrix defined as the product of k
-    elementary reflectors
-
-          Q = H(k) . . . H(2) H(1)
-
-    as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N
-    if SIDE = 'R'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q**T from the Left;
-            = 'R': apply Q or Q**T from the Right.
-
-    TRANS   (input) CHARACTER*1
-            = 'N':  No transpose, apply Q;
-            = 'T':  Transpose, apply Q**T.
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines
-            the matrix Q.
-            If SIDE = 'L', M >= K >= 0;
-            if SIDE = 'R', N >= K >= 0.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
-            The i-th column must contain the vector which defines the
-            elementary reflector H(i), for i = 1,2,...,k, as returned by
-            DGEQLF in the last k columns of its array argument A.
-            A is modified by the routine but restored on exit.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-            If SIDE = 'L', LDA >= max(1,M);
-            if SIDE = 'R', LDA >= max(1,N).
-
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEQLF.
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the M-by-N matrix C.
-            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If SIDE = 'L', LWORK >= max(1,N);
-            if SIDE = 'R', LWORK >= max(1,M).
-            For optimum performance LWORK >= N*NB if SIDE = 'L', and
-            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-            blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-    lquery = *lwork == -1;
-
-/*     NQ is the order of Q and NW is the minimum dimension of WORK */
-
-    if (left) {
-	nq = *m;
-	nw = *n;
-    } else {
-	nq = *n;
-	nw = *m;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "T")) {
-	*info = -2;
-    } else if (*m < 0) {
-	*info = -3;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
-	*info = -5;
-    } else if (*lda < max(1,nq)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    } else if (*lwork < max(1,nw) && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
-
-/*
-          Determine the block size.  NB may be at most NBMAX, where NBMAX
-          is used to define the local array T.
-
-   Computing MIN
-   Writing concatenation
-*/
-	i__3[0] = 1, a__1[0] = side;
-	i__3[1] = 1, a__1[1] = trans;
-	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQL", ch__1, m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)2);
-	nb = min(i__1,i__2);
-	lwkopt = max(1,nw) * nb;
-	work[1] = (doublereal) lwkopt;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORMQL", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	work[1] = 1.;
-	return 0;
-    }
-
-    nbmin = 2;
-    ldwork = nw;
-    if (nb > 1 && nb < *k) {
-	iws = nw * nb;
-	if (*lwork < iws) {
-	    nb = *lwork / ldwork;
-/*
-   Computing MAX
-   Writing concatenation
-*/
-	    i__3[0] = 1, a__1[0] = side;
-	    i__3[1] = 1, a__1[1] = trans;
-	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQL", ch__1, m, n, k, &c_n1, (
-		    ftnlen)6, (ftnlen)2);
-	    nbmin = max(i__1,i__2);
-	}
-    } else {
-	iws = nw;
-    }
-
-    if ((nb < nbmin) || (nb >= *k)) {
-
-/*        Use unblocked code */
-
-	dorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
-		c_offset], ldc, &work[1], &iinfo);
-    } else {
-
-/*        Use blocked code */
-
-	if ((left && notran) || (! left && ! notran)) {
-	    i1 = 1;
-	    i2 = *k;
-	    i3 = nb;
-	} else {
-	    i1 = (*k - 1) / nb * nb + 1;
-	    i2 = 1;
-	    i3 = -nb;
-	}
-
-	if (left) {
-	    ni = *n;
-	} else {
-	    mi = *m;
-	}
-
-	i__1 = i2;
-	i__2 = i3;
-	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__4 = nb, i__5 = *k - i__ + 1;
-	    ib = min(i__4,i__5);
-
-/*
-             Form the triangular factor of the block reflector
-             H = H(i+ib-1) . . . H(i+1) H(i)
-*/
-
-	    i__4 = nq - *k + i__ + ib - 1;
-	    dlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1]
-		    , lda, &tau[i__], t, &c__65);
-	    if (left) {
-
-/*              H or H' is applied to C(1:m-k+i+ib-1,1:n) */
-
-		mi = *m - *k + i__ + ib - 1;
-	    } else {
-
-/*              H or H' is applied to C(1:m,1:n-k+i+ib-1) */
-
-		ni = *n - *k + i__ + ib - 1;
-	    }
-
-/*           Apply H or H' */
-
-	    dlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[
-		    i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, &
-		    work[1], &ldwork);
-/* L10: */
-	}
-    }
-    work[1] = (doublereal) lwkopt;
-    return 0;
-
-/*     End of DORMQL */
-
-} /* dormql_ */
-
-/* Subroutine */ int dormqr_(char *side, char *trans, integer *m, integer *n,
-	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
-	    i__5;
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer i__;
-    static doublereal t[4160]	/* was [65][64] */;
-    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
-    static logical left;
-    extern logical lsame_(char *, char *);
-    static integer nbmin, iinfo;
-    extern /* Subroutine */ int dorm2r_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *), dlarfb_(char
-	    *, char *, char *, char *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static logical notran;
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DORMQR overwrites the general real M-by-N matrix C with
-
-                    SIDE = 'L'     SIDE = 'R'
-    TRANS = 'N':      Q * C          C * Q
-    TRANS = 'T':      Q**T * C       C * Q**T
-
-    where Q is a real orthogonal matrix defined as the product of k
-    elementary reflectors
-
-          Q = H(1) H(2) . . . H(k)
-
-    as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
-    if SIDE = 'R'.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q**T from the Left;
-            = 'R': apply Q or Q**T from the Right.
-
-    TRANS   (input) CHARACTER*1
-            = 'N':  No transpose, apply Q;
-            = 'T':  Transpose, apply Q**T.
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    K       (input) INTEGER
-            The number of elementary reflectors whose product defines
-            the matrix Q.
-            If SIDE = 'L', M >= K >= 0;
-            if SIDE = 'R', N >= K >= 0.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
-            The i-th column must contain the vector which defines the
-            elementary reflector H(i), for i = 1,2,...,k, as returned by
-            DGEQRF in the first k columns of its array argument A.
-            A is modified by the routine but restored on exit.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-            If SIDE = 'L', LDA >= max(1,M);
-            if SIDE = 'R', LDA >= max(1,N).
-
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEQRF.
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the M-by-N matrix C.
-            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If SIDE = 'L', LWORK >= max(1,N);
-            if SIDE = 'R', LWORK >= max(1,M).
-            For optimum performance LWORK >= N*NB if SIDE = 'L', and
-            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-            blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    notran = lsame_(trans, "N");
-    lquery = *lwork == -1;
-
-/*     NQ is the order of Q and NW is the minimum dimension of WORK */
-
-    if (left) {
-	nq = *m;
-	nw = *n;
-    } else {
-	nq = *n;
-	nw = *m;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! notran && ! lsame_(trans, "T")) {
-	*info = -2;
-    } else if (*m < 0) {
-	*info = -3;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
-	*info = -5;
-    } else if (*lda < max(1,nq)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    } else if (*lwork < max(1,nw) && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
-
-/*
-          Determine the block size.  NB may be at most NBMAX, where NBMAX
-          is used to define the local array T.
-
-   Computing MIN
-   Writing concatenation
-*/
-	i__3[0] = 1, a__1[0] = side;
-	i__3[1] = 1, a__1[1] = trans;
-	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)2);
-	nb = min(i__1,i__2);
-	lwkopt = max(1,nw) * nb;
-	work[1] = (doublereal) lwkopt;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DORMQR", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	work[1] = 1.;
-	return 0;
-    }
-
-    nbmin = 2;
-    ldwork = nw;
-    if (nb > 1 && nb < *k) {
-	iws = nw * nb;
-	if (*lwork < iws) {
-	    nb = *lwork / ldwork;
-/*
-   Computing MAX
-   Writing concatenation
-*/
-	    i__3[0] = 1, a__1[0] = side;
-	    i__3[1] = 1, a__1[1] = trans;
-	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, (
-		    ftnlen)6, (ftnlen)2);
-	    nbmin = max(i__1,i__2);
-	}
-    } else {
-	iws = nw;
-    }
-
-    if ((nb < nbmin) || (nb >= *k)) {
-
-/*        Use unblocked code */
-
-	dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
-		c_offset], ldc, &work[1], &iinfo);
-    } else {
-
-/*        Use blocked code */
-
-	if ((left && ! notran) || (! left && notran)) {
-	    i1 = 1;
-	    i2 = *k;
-	    i3 = nb;
-	} else {
-	    i1 = (*k - 1) / nb * nb + 1;
-	    i2 = 1;
-	    i3 = -nb;
-	}
-
-	if (left) {
-	    ni = *n;
-	    jc = 1;
-	} else {
-	    mi = *m;
-	    ic = 1;
-	}
-
-	i__1 = i2;
-	i__2 = i3;
-	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__4 = nb, i__5 = *k - i__ + 1;
-	    ib = min(i__4,i__5);
-
-/*
-             Form the triangular factor of the block reflector
-             H = H(i) H(i+1) . . . H(i+ib-1)
-*/
-
-	    i__4 = nq - i__ + 1;
-	    dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ *
-		    a_dim1], lda, &tau[i__], t, &c__65)
-		    ;
-	    if (left) {
-
-/*              H or H' is applied to C(i:m,1:n) */
-
-		mi = *m - i__ + 1;
-		ic = i__;
-	    } else {
-
-/*              H or H' is applied to C(1:m,i:n) */
-
-		ni = *n - i__ + 1;
-		jc = i__;
-	    }
-
-/*           Apply H or H' */
-
-	    dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
-		    i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc *
-		    c_dim1], ldc, &work[1], &ldwork);
-/* L10: */
-	}
-    }
-    work[1] = (doublereal) lwkopt;
-    return 0;
-
-/*     End of DORMQR */
-
-} /* dormqr_ */
-
-/* Subroutine */ int dormtr_(char *side, char *uplo, char *trans, integer *m,
-	integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3;
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer i1, i2, nb, mi, ni, nq, nw;
-    static logical left;
-    extern logical lsame_(char *, char *);
-    static integer iinfo;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int dormql_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, integer *),
-	    dormqr_(char *, char *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *, integer *);
-    static integer lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DORMTR overwrites the general real M-by-N matrix C with
-
-                    SIDE = 'L'     SIDE = 'R'
-    TRANS = 'N':      Q * C          C * Q
-    TRANS = 'T':      Q**T * C       C * Q**T
-
-    where Q is a real orthogonal matrix of order nq, with nq = m if
-    SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
-    nq-1 elementary reflectors, as returned by DSYTRD:
-
-    if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
-
-    if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': apply Q or Q**T from the Left;
-            = 'R': apply Q or Q**T from the Right.
-
-    UPLO    (input) CHARACTER*1
-            = 'U': Upper triangle of A contains elementary reflectors
-                   from DSYTRD;
-            = 'L': Lower triangle of A contains elementary reflectors
-                   from DSYTRD.
-
-    TRANS   (input) CHARACTER*1
-            = 'N':  No transpose, apply Q;
-            = 'T':  Transpose, apply Q**T.
-
-    M       (input) INTEGER
-            The number of rows of the matrix C. M >= 0.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C. N >= 0.
-
-    A       (input) DOUBLE PRECISION array, dimension
-                                 (LDA,M) if SIDE = 'L'
-                                 (LDA,N) if SIDE = 'R'
-            The vectors which define the elementary reflectors, as
-            returned by DSYTRD.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.
-            LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
-
-    TAU     (input) DOUBLE PRECISION array, dimension
-                                 (M-1) if SIDE = 'L'
-                                 (N-1) if SIDE = 'R'
-            TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DSYTRD.
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the M-by-N matrix C.
-            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If SIDE = 'L', LWORK >= max(1,N);
-            if SIDE = 'R', LWORK >= max(1,M).
-            For optimum performance LWORK >= N*NB if SIDE = 'L', and
-            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-            blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input arguments
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    left = lsame_(side, "L");
-    upper = lsame_(uplo, "U");
-    lquery = *lwork == -1;
-
-/*     NQ is the order of Q and NW is the minimum dimension of WORK */
-
-    if (left) {
-	nq = *m;
-	nw = *n;
-    } else {
-	nq = *n;
-	nw = *m;
-    }
-    if (! left && ! lsame_(side, "R")) {
-	*info = -1;
-    } else if (! upper && ! lsame_(uplo, "L")) {
-	*info = -2;
-    } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "T")) {
-	*info = -3;
-    } else if (*m < 0) {
-	*info = -4;
-    } else if (*n < 0) {
-	*info = -5;
-    } else if (*lda < max(1,nq)) {
-	*info = -7;
-    } else if (*ldc < max(1,*m)) {
-	*info = -10;
-    } else if (*lwork < max(1,nw) && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
-	if (upper) {
-	    if (left) {
-/* Writing concatenation */
-		i__1[0] = 1, a__1[0] = side;
-		i__1[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
-		i__2 = *m - 1;
-		i__3 = *m - 1;
-		nb = ilaenv_(&c__1, "DORMQL", ch__1, &i__2, n, &i__3, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    } else {
-/* Writing concatenation */
-		i__1[0] = 1, a__1[0] = side;
-		i__1[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
-		i__2 = *n - 1;
-		i__3 = *n - 1;
-		nb = ilaenv_(&c__1, "DORMQL", ch__1, m, &i__2, &i__3, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    }
-	} else {
-	    if (left) {
-/* Writing concatenation */
-		i__1[0] = 1, a__1[0] = side;
-		i__1[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
-		i__2 = *m - 1;
-		i__3 = *m - 1;
-		nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__2, n, &i__3, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    } else {
-/* Writing concatenation */
-		i__1[0] = 1, a__1[0] = side;
-		i__1[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
-		i__2 = *n - 1;
-		i__3 = *n - 1;
-		nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__2, &i__3, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    }
-	}
-	lwkopt = max(1,nw) * nb;
-	work[1] = (doublereal) lwkopt;
-    }
-
-    if (*info != 0) {
-	i__2 = -(*info);
-	xerbla_("DORMTR", &i__2);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (((*m == 0) || (*n == 0)) || (nq == 1)) {
-	work[1] = 1.;
-	return 0;
-    }
-
-    if (left) {
-	mi = *m - 1;
-	ni = *n;
-    } else {
-	mi = *m;
-	ni = *n - 1;
-    }
-
-    if (upper) {
-
-/*        Q was determined by a call to DSYTRD with UPLO = 'U' */
-
-	i__2 = nq - 1;
-	dormql_(side, trans, &mi, &ni, &i__2, &a[((a_dim1) << (1)) + 1], lda,
-		&tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
-    } else {
-
-/*        Q was determined by a call to DSYTRD with UPLO = 'L' */
-
-	if (left) {
-	    i1 = 2;
-	    i2 = 1;
-	} else {
-	    i1 = 1;
-	    i2 = 2;
-	}
-	i__2 = nq - 1;
-	dormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], &
-		c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
-    }
-    work[1] = (doublereal) lwkopt;
-    return 0;
-
-/*     End of DORMTR */
-
-} /* dormtr_ */
-
-/* Subroutine */ int dpotf2_(char *uplo, integer *n, doublereal *a, integer *
-	lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer j;
-    static doublereal ajj;
-    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DPOTF2 computes the Cholesky factorization of a real symmetric
-    positive definite matrix A.
-
-    The factorization has the form
-       A = U' * U ,  if UPLO = 'U', or
-       A = L  * L',  if UPLO = 'L',
-    where U is an upper triangular matrix and L is lower triangular.
-
-    This is the unblocked version of the algorithm, calling Level 2 BLAS.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the upper or lower triangular part of the
-            symmetric matrix A is stored.
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
-            n by n upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading n by n lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
-
-            On exit, if INFO = 0, the factor U or L from the Cholesky
-            factorization A = U'*U  or A = L*L'.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-            > 0: if INFO = k, the leading minor of order k is not
-                 positive definite, and the factorization could not be
-                 completed.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DPOTF2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (upper) {
-
-/*        Compute the Cholesky factorization A = U'*U. */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-
-/*           Compute U(J,J) and test for non-positive-definiteness. */
-
-	    i__2 = j - 1;
-	    ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j * a_dim1 + 1], &c__1,
-		    &a[j * a_dim1 + 1], &c__1);
-	    if (ajj <= 0.) {
-		a[j + j * a_dim1] = ajj;
-		goto L30;
-	    }
-	    ajj = sqrt(ajj);
-	    a[j + j * a_dim1] = ajj;
-
-/*           Compute elements J+1:N of row J. */
-
-	    if (j < *n) {
-		i__2 = j - 1;
-		i__3 = *n - j;
-		dgemv_("Transpose", &i__2, &i__3, &c_b3001, &a[(j + 1) *
-			a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b2865,
-			 &a[j + (j + 1) * a_dim1], lda);
-		i__2 = *n - j;
-		d__1 = 1. / ajj;
-		dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda);
-	    }
-/* L10: */
-	}
-    } else {
-
-/*        Compute the Cholesky factorization A = L*L'. */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-
-/*           Compute L(J,J) and test for non-positive-definiteness. */
-
-	    i__2 = j - 1;
-	    ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j + a_dim1], lda, &a[j
-		    + a_dim1], lda);
-	    if (ajj <= 0.) {
-		a[j + j * a_dim1] = ajj;
-		goto L30;
-	    }
-	    ajj = sqrt(ajj);
-	    a[j + j * a_dim1] = ajj;
-
-/*           Compute elements J+1:N of column J. */
-
-	    if (j < *n) {
-		i__2 = *n - j;
-		i__3 = j - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[j + 1 +
-			a_dim1], lda, &a[j + a_dim1], lda, &c_b2865, &a[j + 1
-			+ j * a_dim1], &c__1);
-		i__2 = *n - j;
-		d__1 = 1. / ajj;
-		dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
-	    }
-/* L20: */
-	}
-    }
-    goto L40;
-
-L30:
-    *info = j;
-
-L40:
-    return 0;
-
-/*     End of DPOTF2 */
-
-} /* dpotf2_ */
-
-/* Subroutine */ int dpotrf_(char *uplo, integer *n, doublereal *a, integer *
-	lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer j, jb, nb;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
-	     integer *), dpotf2_(char *, integer *,
-	    doublereal *, integer *, integer *), xerbla_(char *,
-	    integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
-
-
-    Purpose
-    =======
-
-    DPOTRF computes the Cholesky factorization of a real symmetric
-    positive definite matrix A.
-
-    The factorization has the form
-       A = U**T * U,  if UPLO = 'U', or
-       A = L  * L**T,  if UPLO = 'L',
-    where U is an upper triangular matrix and L is lower triangular.
-
-    This is the block version of the algorithm, calling Level 3 BLAS.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
-            N-by-N upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading N-by-N lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
-
-            On exit, if INFO = 0, the factor U or L from the Cholesky
-            factorization A = U**T*U or A = L*L**T.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the leading minor of order i is not
-                  positive definite, and the factorization could not be
-                  completed.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DPOTRF", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Determine the block size for this environment. */
-
-    nb = ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
-	    ftnlen)1);
-    if ((nb <= 1) || (nb >= *n)) {
-
-/*        Use unblocked code. */
-
-	dpotf2_(uplo, n, &a[a_offset], lda, info);
-    } else {
-
-/*        Use blocked code. */
-
-	if (upper) {
-
-/*           Compute the Cholesky factorization A = U'*U. */
-
-	    i__1 = *n;
-	    i__2 = nb;
-	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
-
-/*
-                Update and factorize the current diagonal block and test
-                for non-positive-definiteness.
-
-   Computing MIN
-*/
-		i__3 = nb, i__4 = *n - j + 1;
-		jb = min(i__3,i__4);
-		i__3 = j - 1;
-		dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b3001, &a[j *
-			a_dim1 + 1], lda, &c_b2865, &a[j + j * a_dim1], lda);
-		dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
-		if (*info != 0) {
-		    goto L30;
-		}
-		if (j + jb <= *n) {
-
-/*                 Compute the current block row. */
-
-		    i__3 = *n - j - jb + 1;
-		    i__4 = j - 1;
-		    dgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, &
-			    c_b3001, &a[j * a_dim1 + 1], lda, &a[(j + jb) *
-			    a_dim1 + 1], lda, &c_b2865, &a[j + (j + jb) *
-			    a_dim1], lda);
-		    i__3 = *n - j - jb + 1;
-		    dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, &
-			    i__3, &c_b2865, &a[j + j * a_dim1], lda, &a[j + (
-			    j + jb) * a_dim1], lda);
-		}
-/* L10: */
-	    }
-
-	} else {
-
-/*           Compute the Cholesky factorization A = L*L'. */
-
-	    i__2 = *n;
-	    i__1 = nb;
-	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
-
-/*
-                Update and factorize the current diagonal block and test
-                for non-positive-definiteness.
-
-   Computing MIN
-*/
-		i__3 = nb, i__4 = *n - j + 1;
-		jb = min(i__3,i__4);
-		i__3 = j - 1;
-		dsyrk_("Lower", "No transpose", &jb, &i__3, &c_b3001, &a[j +
-			a_dim1], lda, &c_b2865, &a[j + j * a_dim1], lda);
-		dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
-		if (*info != 0) {
-		    goto L30;
-		}
-		if (j + jb <= *n) {
-
-/*                 Compute the current block column. */
-
-		    i__3 = *n - j - jb + 1;
-		    i__4 = j - 1;
-		    dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &
-			    c_b3001, &a[j + jb + a_dim1], lda, &a[j + a_dim1],
-			     lda, &c_b2865, &a[j + jb + j * a_dim1], lda);
-		    i__3 = *n - j - jb + 1;
-		    dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, &
-			    jb, &c_b2865, &a[j + j * a_dim1], lda, &a[j + jb
-			    + j * a_dim1], lda);
-		}
-/* L20: */
-	    }
-	}
-    }
-    goto L40;
-
-L30:
-    *info = *info + j - 1;
-
-L40:
-    return 0;
-
-/*     End of DPOTRF */
-
-} /* dpotrf_ */
-
-/* Subroutine */ int dpotri_(char *uplo, integer *n, doublereal *a, integer *
-	lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1;
-
-    /* Local variables */
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *), dlauum_(
-	    char *, integer *, doublereal *, integer *, integer *),
-	    dtrtri_(char *, char *, integer *, doublereal *, integer *,
-	    integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
-
-
-    Purpose
-    =======
-
-    DPOTRI computes the inverse of a real symmetric positive definite
-    matrix A using the Cholesky factorization A = U**T*U or A = L*L**T
-    computed by DPOTRF.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the triangular factor U or L from the Cholesky
-            factorization A = U**T*U or A = L*L**T, as computed by
-            DPOTRF.
-            On exit, the upper or lower triangle of the (symmetric)
-            inverse of A, overwriting the input factor U or L.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the (i,i) element of the factor U or L is
-                  zero, and the inverse could not be computed.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DPOTRI", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Invert the triangular Cholesky factor U or L. */
-
-    dtrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info);
-    if (*info > 0) {
-	return 0;
-    }
-
-/*     Form inv(U)*inv(U)' or inv(L)'*inv(L). */
-
-    dlauum_(uplo, n, &a[a_offset], lda, info);
-
-    return 0;
-
-/*     End of DPOTRI */
-
-} /* dpotri_ */
-
-/* Subroutine */ int dpotrs_(char *uplo, integer *n, integer *nrhs,
-	doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *
-	info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
-
-    /* Local variables */
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
-
-
-    Purpose
-    =======
-
-    DPOTRS solves a system of linear equations A*X = B with a symmetric
-    positive definite matrix A using the Cholesky factorization
-    A = U**T*U or A = L*L**T computed by DPOTRF.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrix B.  NRHS >= 0.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            The triangular factor U or L from the Cholesky factorization
-            A = U**T*U or A = L*L**T, as computed by DPOTRF.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
-            On entry, the right hand side matrix B.
-            On exit, the solution matrix X.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*nrhs < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*ldb < max(1,*n)) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DPOTRS", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*n == 0) || (*nrhs == 0)) {
-	return 0;
-    }
-
-    if (upper) {
-
-/*
-          Solve A*X = B where A = U'*U.
-
-          Solve U'*X = B, overwriting B with X.
-*/
-
-	dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b2865, &
-		a[a_offset], lda, &b[b_offset], ldb);
-
-/*        Solve U*X = B, overwriting B with X. */
-
-	dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b2865,
-		 &a[a_offset], lda, &b[b_offset], ldb);
-    } else {
-
-/*
-          Solve A*X = B where A = L*L'.
-
-          Solve L*X = B, overwriting B with X.
-*/
-
-	dtrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b2865,
-		 &a[a_offset], lda, &b[b_offset], ldb);
-
-/*        Solve L'*X = B, overwriting B with X. */
-
-	dtrsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b2865, &
-		a[a_offset], lda, &b[b_offset], ldb);
-    }
-
-    return 0;
-
-/*     End of DPOTRS */
-
-} /* dpotrs_ */
-
-/* Subroutine */ int dstedc_(char *compz, integer *n, doublereal *d__,
-	doublereal *e, doublereal *z__, integer *ldz, doublereal *work,
-	integer *lwork, integer *iwork, integer *liwork, integer *info)
-{
-    /* System generated locals */
-    integer z_dim1, z_offset, i__1, i__2;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double log(doublereal);
-    integer pow_ii(integer *, integer *);
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, j, k, m;
-    static doublereal p;
-    static integer ii, end, lgn;
-    static doublereal eps, tiny;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer lwmin;
-    extern /* Subroutine */ int dlaed0_(integer *, integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
-	     integer *, doublereal *, integer *, integer *);
-    static integer start;
-
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dlacpy_(char *, integer *, integer
-	    *, doublereal *, integer *, doublereal *, integer *),
-	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
-	     integer *), dlasrt_(char *, integer *, doublereal *, integer *);
-    static integer liwmin, icompz;
-    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *);
-    static doublereal orgnrm;
-    static logical lquery;
-    static integer smlsiz, dtrtrw, storez;
-
-
-/*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DSTEDC computes all eigenvalues and, optionally, eigenvectors of a
-    symmetric tridiagonal matrix using the divide and conquer method.
-    The eigenvectors of a full or band real symmetric matrix can also be
-    found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this
-    matrix to tridiagonal form.
-
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.  See DLAED3 for details.
-
-    Arguments
-    =========
-
-    COMPZ   (input) CHARACTER*1
-            = 'N':  Compute eigenvalues only.
-            = 'I':  Compute eigenvectors of tridiagonal matrix also.
-            = 'V':  Compute eigenvectors of original dense symmetric
-                    matrix also.  On entry, Z contains the orthogonal
-                    matrix used to reduce the original matrix to
-                    tridiagonal form.
-
-    N       (input) INTEGER
-            The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the diagonal elements of the tridiagonal matrix.
-            On exit, if INFO = 0, the eigenvalues in ascending order.
-
-    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
-            On entry, the subdiagonal elements of the tridiagonal matrix.
-            On exit, E has been destroyed.
-
-    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
-            On entry, if COMPZ = 'V', then Z contains the orthogonal
-            matrix used in the reduction to tridiagonal form.
-            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
-            orthonormal eigenvectors of the original symmetric matrix,
-            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
-            of the symmetric tridiagonal matrix.
-            If  COMPZ = 'N', then Z is not referenced.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.  LDZ >= 1.
-            If eigenvectors are desired, then LDZ >= max(1,N).
-
-    WORK    (workspace/output) DOUBLE PRECISION array,
-                                           dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If COMPZ = 'N' or N <= 1 then LWORK must be at least 1.
-            If COMPZ = 'V' and N > 1 then LWORK must be at least
-                           ( 1 + 3*N + 2*N*lg N + 3*N**2 ),
-                           where lg( N ) = smallest integer k such
-                           that 2**k >= N.
-            If COMPZ = 'I' and N > 1 then LWORK must be at least
-                           ( 1 + 4*N + N**2 ).
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
-            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
-
-    LIWORK  (input) INTEGER
-            The dimension of the array IWORK.
-            If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1.
-            If COMPZ = 'V' and N > 1 then LIWORK must be at least
-                           ( 6 + 6*N + 5*N*lg N ).
-            If COMPZ = 'I' and N > 1 then LIWORK must be at least
-                           ( 3 + 5*N ).
-
-            If LIWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the IWORK array,
-            returns this value as the first entry of the IWORK array, and
-            no error message related to LIWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The algorithm failed to compute an eigenvalue while
-                  working on the submatrix lying in rows and columns
-                  INFO/(N+1) through mod(INFO,N+1).
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-    Modified by Francoise Tisseur, University of Tennessee.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-    lquery = (*lwork == -1) || (*liwork == -1);
-
-    if (lsame_(compz, "N")) {
-	icompz = 0;
-    } else if (lsame_(compz, "V")) {
-	icompz = 1;
-    } else if (lsame_(compz, "I")) {
-	icompz = 2;
-    } else {
-	icompz = -1;
-    }
-    if ((*n <= 1) || (icompz <= 0)) {
-	liwmin = 1;
-	lwmin = 1;
-    } else {
-	lgn = (integer) (log((doublereal) (*n)) / log(2.));
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (icompz == 1) {
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lwmin = *n * 3 + 1 + ((*n) << (1)) * lgn + i__1 * i__1 * 3;
-	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
-	} else if (icompz == 2) {
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lwmin = ((*n) << (2)) + 1 + i__1 * i__1;
-	    liwmin = *n * 5 + 3;
-	}
-    }
-    if (icompz < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
-	*info = -6;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -10;
-    }
-
-    if (*info == 0) {
-	work[1] = (doublereal) lwmin;
-	iwork[1] = liwmin;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DSTEDC", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-    if (*n == 1) {
-	if (icompz != 0) {
-	    z__[z_dim1 + 1] = 1.;
-	}
-	return 0;
-    }
-
-    smlsiz = ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-
-/*
-       If the following conditional clause is removed, then the routine
-       will use the Divide and Conquer routine to compute only the
-       eigenvalues, which requires (3N + 3N**2) real workspace and
-       (2 + 5N + 2N lg(N)) integer workspace.
-       Since on many architectures DSTERF is much faster than any other
-       algorithm for finding eigenvalues only, it is used here
-       as the default.
-
-       If COMPZ = 'N', use DSTERF to compute the eigenvalues.
-*/
-
-    if (icompz == 0) {
-	dsterf_(n, &d__[1], &e[1], info);
-	return 0;
-    }
-
-/*
-       If N is smaller than the minimum divide size (SMLSIZ+1), then
-       solve the problem with another solver.
-*/
-
-    if (*n <= smlsiz) {
-	if (icompz == 0) {
-	    dsterf_(n, &d__[1], &e[1], info);
-	    return 0;
-	} else if (icompz == 2) {
-	    dsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
-		    info);
-	    return 0;
-	} else {
-	    dsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
-		    info);
-	    return 0;
-	}
-    }
-
-/*
-       If COMPZ = 'V', the Z matrix must be stored elsewhere for later
-       use.
-*/
-
-    if (icompz == 1) {
-	storez = *n * *n + 1;
-    } else {
-	storez = 1;
-    }
-
-    if (icompz == 2) {
-	dlaset_("Full", n, n, &c_b2879, &c_b2865, &z__[z_offset], ldz);
-    }
-
-/*     Scale. */
-
-    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.) {
-	return 0;
-    }
-
-    eps = EPSILON;
-
-    start = 1;
-
-/*     while ( START <= N ) */
-
-L10:
-    if (start <= *n) {
-
-/*
-       Let END be the position of the next subdiagonal entry such that
-       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
-       matrix identified by the elements between START and END
-       constitutes an independent sub-problem.
-*/
-
-	end = start;
-L20:
-	if (end < *n) {
-	    tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 =
-		    d__[end + 1], abs(d__2)));
-	    if ((d__1 = e[end], abs(d__1)) > tiny) {
-		++end;
-		goto L20;
-	    }
-	}
-
-/*        (Sub) Problem determined.  Compute its size and solve it. */
-
-	m = end - start + 1;
-	if (m == 1) {
-	    start = end + 1;
-	    goto L10;
-	}
-	if (m > smlsiz) {
-	    *info = smlsiz;
-
-/*           Scale. */
-
-	    orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
-	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, &m, &c__1, &d__[
-		    start], &m, info);
-	    i__1 = m - 1;
-	    i__2 = m - 1;
-	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, &i__1, &c__1, &e[
-		    start], &i__2, info);
-
-	    if (icompz == 1) {
-		dtrtrw = 1;
-	    } else {
-		dtrtrw = start;
-	    }
-	    dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[dtrtrw +
-		    start * z_dim1], ldz, &work[1], n, &work[storez], &iwork[
-		    1], info);
-	    if (*info != 0) {
-		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
-			+ 1) + start - 1;
-		return 0;
-	    }
-
-/*           Scale back. */
-
-	    dlascl_("G", &c__0, &c__0, &c_b2865, &orgnrm, &m, &c__1, &d__[
-		    start], &m, info);
-
-	} else {
-	    if (icompz == 1) {
-
-/*
-       Since QR won't update a Z matrix which is larger than the
-       length of D, we must solve the sub-problem in a workspace and
-       then multiply back into Z.
-*/
-
-		dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[
-			m * m + 1], info);
-		dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
-			storez], n);
-		dgemm_("N", "N", n, &m, &m, &c_b2865, &work[storez], ldz, &
-			work[1], &m, &c_b2879, &z__[start * z_dim1 + 1], ldz);
-	    } else if (icompz == 2) {
-		dsteqr_("I", &m, &d__[start], &e[start], &z__[start + start *
-			z_dim1], ldz, &work[1], info);
-	    } else {
-		dsterf_(&m, &d__[start], &e[start], info);
-	    }
-	    if (*info != 0) {
-		*info = start * (*n + 1) + end;
-		return 0;
-	    }
-	}
-
-	start = end + 1;
-	goto L10;
-    }
-
-/*
-       endwhile
-
-       If the problem split any number of times, then the eigenvalues
-       will not be properly ordered.  Here we permute the eigenvalues
-       (and the associated eigenvectors) into ascending order.
-*/
-
-    if (m != *n) {
-	if (icompz == 0) {
-
-/*        Use Quick Sort */
-
-	    dlasrt_("I", n, &d__[1], info);
-
-	} else {
-
-/*        Use Selection Sort to minimize swaps of eigenvectors */
-
-	    i__1 = *n;
-	    for (ii = 2; ii <= i__1; ++ii) {
-		i__ = ii - 1;
-		k = i__;
-		p = d__[i__];
-		i__2 = *n;
-		for (j = ii; j <= i__2; ++j) {
-		    if (d__[j] < p) {
-			k = j;
-			p = d__[j];
-		    }
-/* L30: */
-		}
-		if (k != i__) {
-		    d__[k] = d__[i__];
-		    d__[i__] = p;
-		    dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1
-			    + 1], &c__1);
-		}
-/* L40: */
-	    }
-	}
-    }
-
-    work[1] = (doublereal) lwmin;
-    iwork[1] = liwmin;
-
-    return 0;
-
-/*     End of DSTEDC */
-
-} /* dstedc_ */
-
-/* Subroutine */ int dsteqr_(char *compz, integer *n, doublereal *d__,
-	doublereal *e, doublereal *z__, integer *ldz, doublereal *work,
-	integer *info)
-{
-    /* System generated locals */
-    integer z_dim1, z_offset, i__1, i__2;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static doublereal b, c__, f, g;
-    static integer i__, j, k, l, m;
-    static doublereal p, r__, s;
-    static integer l1, ii, mm, lm1, mm1, nm1;
-    static doublereal rt1, rt2, eps;
-    static integer lsv;
-    static doublereal tst, eps2;
-    static integer lend, jtot;
-    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
-	    *, doublereal *, doublereal *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *);
-    static doublereal anorm;
-    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlaev2_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *);
-    static integer lendm1, lendp1;
-
-    static integer iscale;
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dlaset_(char *, integer *, integer
-	    *, doublereal *, doublereal *, doublereal *, integer *);
-    static doublereal safmin;
-    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *);
-    static doublereal safmax;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
-	    integer *);
-    static integer lendsv;
-    static doublereal ssfmin;
-    static integer nmaxit, icompz;
-    static doublereal ssfmax;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    DSTEQR computes all eigenvalues and, optionally, eigenvectors of a
-    symmetric tridiagonal matrix using the implicit QL or QR method.
-    The eigenvectors of a full or band symmetric matrix can also be found
-    if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to
-    tridiagonal form.
-
-    Arguments
-    =========
-
-    COMPZ   (input) CHARACTER*1
-            = 'N':  Compute eigenvalues only.
-            = 'V':  Compute eigenvalues and eigenvectors of the original
-                    symmetric matrix.  On entry, Z must contain the
-                    orthogonal matrix used to reduce the original matrix
-                    to tridiagonal form.
-            = 'I':  Compute eigenvalues and eigenvectors of the
-                    tridiagonal matrix.  Z is initialized to the identity
-                    matrix.
-
-    N       (input) INTEGER
-            The order of the matrix.  N >= 0.
-
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the diagonal elements of the tridiagonal matrix.
-            On exit, if INFO = 0, the eigenvalues in ascending order.
-
-    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
-            On entry, the (n-1) subdiagonal elements of the tridiagonal
-            matrix.
-            On exit, E has been destroyed.
-
-    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ, N)
-            On entry, if  COMPZ = 'V', then Z contains the orthogonal
-            matrix used in the reduction to tridiagonal form.
-            On exit, if INFO = 0, then if  COMPZ = 'V', Z contains the
-            orthonormal eigenvectors of the original symmetric matrix,
-            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
-            of the symmetric tridiagonal matrix.
-            If COMPZ = 'N', then Z is not referenced.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.  LDZ >= 1, and if
-            eigenvectors are desired, then  LDZ >= max(1,N).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
-            If COMPZ = 'N', then WORK is not referenced.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  the algorithm has failed to find all the eigenvalues in
-                  a total of 30*N iterations; if INFO = i, then i
-                  elements of E have not converged to zero; on exit, D
-                  and E contain the elements of a symmetric tridiagonal
-                  matrix which is orthogonally similar to the original
-                  matrix.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-
-    if (lsame_(compz, "N")) {
-	icompz = 0;
-    } else if (lsame_(compz, "V")) {
-	icompz = 1;
-    } else if (lsame_(compz, "I")) {
-	icompz = 2;
-    } else {
-	icompz = -1;
-    }
-    if (icompz < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
-	*info = -6;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DSTEQR", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (*n == 1) {
-	if (icompz == 2) {
-	    z__[z_dim1 + 1] = 1.;
-	}
-	return 0;
-    }
-
-/*     Determine the unit roundoff and over/underflow thresholds. */
-
-    eps = EPSILON;
-/* Computing 2nd power */
-    d__1 = eps;
-    eps2 = d__1 * d__1;
-    safmin = SAFEMINIMUM;
-    safmax = 1. / safmin;
-    ssfmax = sqrt(safmax) / 3.;
-    ssfmin = sqrt(safmin) / eps2;
-
-/*
-       Compute the eigenvalues and eigenvectors of the tridiagonal
-       matrix.
-*/
-
-    if (icompz == 2) {
-	dlaset_("Full", n, n, &c_b2879, &c_b2865, &z__[z_offset], ldz);
-    }
-
-    nmaxit = *n * 30;
-    jtot = 0;
-
-/*
-       Determine where the matrix splits and choose QL or QR iteration
-       for each block, according to whether top or bottom diagonal
-       element is smaller.
-*/
-
-    l1 = 1;
-    nm1 = *n - 1;
-
-L10:
-    if (l1 > *n) {
-	goto L160;
-    }
-    if (l1 > 1) {
-	e[l1 - 1] = 0.;
-    }
-    if (l1 <= nm1) {
-	i__1 = nm1;
-	for (m = l1; m <= i__1; ++m) {
-	    tst = (d__1 = e[m], abs(d__1));
-	    if (tst == 0.) {
-		goto L30;
-	    }
-	    if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m
-		    + 1], abs(d__2))) * eps) {
-		e[m] = 0.;
-		goto L30;
-	    }
-/* L20: */
-	}
-    }
-    m = *n;
-
-L30:
-    l = l1;
-    lsv = l;
-    lend = m;
-    lendsv = lend;
-    l1 = m + 1;
-    if (lend == l) {
-	goto L10;
-    }
-
-/*     Scale submatrix in rows and columns L to LEND */
-
-    i__1 = lend - l + 1;
-    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
-    iscale = 0;
-    if (anorm == 0.) {
-	goto L10;
-    }
-    if (anorm > ssfmax) {
-	iscale = 1;
-	i__1 = lend - l + 1;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
-		info);
-	i__1 = lend - l;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
-		info);
-    } else if (anorm < ssfmin) {
-	iscale = 2;
-	i__1 = lend - l + 1;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
-		info);
-	i__1 = lend - l;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
-		info);
-    }
-
-/*     Choose between QL and QR iteration */
-
-    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
-	lend = lsv;
-	l = lendsv;
-    }
-
-    if (lend > l) {
-
-/*
-          QL Iteration
-
-          Look for small subdiagonal element.
-*/
-
-L40:
-	if (l != lend) {
-	    lendm1 = lend - 1;
-	    i__1 = lendm1;
-	    for (m = l; m <= i__1; ++m) {
-/* Computing 2nd power */
-		d__2 = (d__1 = e[m], abs(d__1));
-		tst = d__2 * d__2;
-		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
-			+ 1], abs(d__2)) + safmin) {
-		    goto L60;
-		}
-/* L50: */
-	    }
-	}
-
-	m = lend;
-
-L60:
-	if (m < lend) {
-	    e[m] = 0.;
-	}
-	p = d__[l];
-	if (m == l) {
-	    goto L80;
-	}
-
-/*
-          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
-          to compute its eigensystem.
-*/
-
-	if (m == l + 1) {
-	    if (icompz > 0) {
-		dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
-		work[l] = c__;
-		work[*n - 1 + l] = s;
-		dlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
-			z__[l * z_dim1 + 1], ldz);
-	    } else {
-		dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
-	    }
-	    d__[l] = rt1;
-	    d__[l + 1] = rt2;
-	    e[l] = 0.;
-	    l += 2;
-	    if (l <= lend) {
-		goto L40;
-	    }
-	    goto L140;
-	}
-
-	if (jtot == nmaxit) {
-	    goto L140;
-	}
-	++jtot;
-
-/*        Form shift. */
-
-	g = (d__[l + 1] - p) / (e[l] * 2.);
-	r__ = dlapy2_(&g, &c_b2865);
-	g = d__[m] - p + e[l] / (g + d_sign(&r__, &g));
-
-	s = 1.;
-	c__ = 1.;
-	p = 0.;
-
-/*        Inner loop */
-
-	mm1 = m - 1;
-	i__1 = l;
-	for (i__ = mm1; i__ >= i__1; --i__) {
-	    f = s * e[i__];
-	    b = c__ * e[i__];
-	    dlartg_(&g, &f, &c__, &s, &r__);
-	    if (i__ != m - 1) {
-		e[i__ + 1] = r__;
-	    }
-	    g = d__[i__ + 1] - p;
-	    r__ = (d__[i__] - g) * s + c__ * 2. * b;
-	    p = s * r__;
-	    d__[i__ + 1] = g + p;
-	    g = c__ * r__ - b;
-
-/*           If eigenvectors are desired, then save rotations. */
-
-	    if (icompz > 0) {
-		work[i__] = c__;
-		work[*n - 1 + i__] = -s;
-	    }
-
-/* L70: */
-	}
-
-/*        If eigenvectors are desired, then apply saved rotations. */
-
-	if (icompz > 0) {
-	    mm = m - l + 1;
-	    dlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
-		    * z_dim1 + 1], ldz);
-	}
-
-	d__[l] -= p;
-	e[l] = g;
-	goto L40;
-
-/*        Eigenvalue found. */
-
-L80:
-	d__[l] = p;
-
-	++l;
-	if (l <= lend) {
-	    goto L40;
-	}
-	goto L140;
-
-    } else {
-
-/*
-          QR Iteration
-
-          Look for small superdiagonal element.
-*/
-
-L90:
-	if (l != lend) {
-	    lendp1 = lend + 1;
-	    i__1 = lendp1;
-	    for (m = l; m >= i__1; --m) {
-/* Computing 2nd power */
-		d__2 = (d__1 = e[m - 1], abs(d__1));
-		tst = d__2 * d__2;
-		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
-			- 1], abs(d__2)) + safmin) {
-		    goto L110;
-		}
-/* L100: */
-	    }
-	}
-
-	m = lend;
-
-L110:
-	if (m > lend) {
-	    e[m - 1] = 0.;
-	}
-	p = d__[l];
-	if (m == l) {
-	    goto L130;
-	}
-
-/*
-          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
-          to compute its eigensystem.
-*/
-
-	if (m == l - 1) {
-	    if (icompz > 0) {
-		dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
-			;
-		work[m] = c__;
-		work[*n - 1 + m] = s;
-		dlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
-			z__[(l - 1) * z_dim1 + 1], ldz);
-	    } else {
-		dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
-	    }
-	    d__[l - 1] = rt1;
-	    d__[l] = rt2;
-	    e[l - 1] = 0.;
-	    l += -2;
-	    if (l >= lend) {
-		goto L90;
-	    }
-	    goto L140;
-	}
-
-	if (jtot == nmaxit) {
-	    goto L140;
-	}
-	++jtot;
-
-/*        Form shift. */
-
-	g = (d__[l - 1] - p) / (e[l - 1] * 2.);
-	r__ = dlapy2_(&g, &c_b2865);
-	g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g));
-
-	s = 1.;
-	c__ = 1.;
-	p = 0.;
-
-/*        Inner loop */
-
-	lm1 = l - 1;
-	i__1 = lm1;
-	for (i__ = m; i__ <= i__1; ++i__) {
-	    f = s * e[i__];
-	    b = c__ * e[i__];
-	    dlartg_(&g, &f, &c__, &s, &r__);
-	    if (i__ != m) {
-		e[i__ - 1] = r__;
-	    }
-	    g = d__[i__] - p;
-	    r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b;
-	    p = s * r__;
-	    d__[i__] = g + p;
-	    g = c__ * r__ - b;
-
-/*           If eigenvectors are desired, then save rotations. */
-
-	    if (icompz > 0) {
-		work[i__] = c__;
-		work[*n - 1 + i__] = s;
-	    }
-
-/* L120: */
-	}
-
-/*        If eigenvectors are desired, then apply saved rotations. */
-
-	if (icompz > 0) {
-	    mm = l - m + 1;
-	    dlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
-		    * z_dim1 + 1], ldz);
-	}
-
-	d__[l] -= p;
-	e[lm1] = g;
-	goto L90;
-
-/*        Eigenvalue found. */
-
-L130:
-	d__[l] = p;
-
-	--l;
-	if (l >= lend) {
-	    goto L90;
-	}
-	goto L140;
-
-    }
-
-/*     Undo scaling if necessary */
-
-L140:
-    if (iscale == 1) {
-	i__1 = lendsv - lsv + 1;
-	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
-		n, info);
-	i__1 = lendsv - lsv;
-	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
-		info);
-    } else if (iscale == 2) {
-	i__1 = lendsv - lsv + 1;
-	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
-		n, info);
-	i__1 = lendsv - lsv;
-	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
-		info);
-    }
-
-/*
-       Check for no convergence to an eigenvalue after a total
-       of N*MAXIT iterations.
-*/
-
-    if (jtot < nmaxit) {
-	goto L10;
-    }
-    i__1 = *n - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (e[i__] != 0.) {
-	    ++(*info);
-	}
-/* L150: */
-    }
-    goto L190;
-
-/*     Order eigenvalues and eigenvectors. */
-
-L160:
-    if (icompz == 0) {
-
-/*        Use Quick Sort */
-
-	dlasrt_("I", n, &d__[1], info);
-
-    } else {
-
-/*        Use Selection Sort to minimize swaps of eigenvectors */
-
-	i__1 = *n;
-	for (ii = 2; ii <= i__1; ++ii) {
-	    i__ = ii - 1;
-	    k = i__;
-	    p = d__[i__];
-	    i__2 = *n;
-	    for (j = ii; j <= i__2; ++j) {
-		if (d__[j] < p) {
-		    k = j;
-		    p = d__[j];
-		}
-/* L170: */
-	    }
-	    if (k != i__) {
-		d__[k] = d__[i__];
-		d__[i__] = p;
-		dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
-			 &c__1);
-	    }
-/* L180: */
-	}
-    }
-
-L190:
-    return 0;
-
-/*     End of DSTEQR */
-
-} /* dsteqr_ */
-
-/* Subroutine */ int dsterf_(integer *n, doublereal *d__, doublereal *e,
-	integer *info)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1, d__2, d__3;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static doublereal c__;
-    static integer i__, l, m;
-    static doublereal p, r__, s;
-    static integer l1;
-    static doublereal bb, rt1, rt2, eps, rte;
-    static integer lsv;
-    static doublereal eps2, oldc;
-    static integer lend, jtot;
-    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
-	    *, doublereal *, doublereal *);
-    static doublereal gamma, alpha, sigma, anorm;
-
-    static integer iscale;
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *);
-    static doublereal oldgam, safmin;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static doublereal safmax;
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
-	    integer *);
-    static integer lendsv;
-    static doublereal ssfmin;
-    static integer nmaxit;
-    static doublereal ssfmax;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DSTERF computes all eigenvalues of a symmetric tridiagonal matrix
-    using the Pal-Walker-Kahan variant of the QL or QR algorithm.
-
-    Arguments
-    =========
-
-    N       (input) INTEGER
-            The order of the matrix.  N >= 0.
-
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the n diagonal elements of the tridiagonal matrix.
-            On exit, if INFO = 0, the eigenvalues in ascending order.
-
-    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
-            On entry, the (n-1) subdiagonal elements of the tridiagonal
-            matrix.
-            On exit, E has been destroyed.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  the algorithm failed to find all of the eigenvalues in
-                  a total of 30*N iterations; if INFO = i, then i
-                  elements of E have not converged to zero.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --e;
-    --d__;
-
-    /* Function Body */
-    *info = 0;
-
-/*     Quick return if possible */
-
-    if (*n < 0) {
-	*info = -1;
-	i__1 = -(*info);
-	xerbla_("DSTERF", &i__1);
-	return 0;
-    }
-    if (*n <= 1) {
-	return 0;
-    }
-
-/*     Determine the unit roundoff for this environment. */
-
-    eps = EPSILON;
-/* Computing 2nd power */
-    d__1 = eps;
-    eps2 = d__1 * d__1;
-    safmin = SAFEMINIMUM;
-    safmax = 1. / safmin;
-    ssfmax = sqrt(safmax) / 3.;
-    ssfmin = sqrt(safmin) / eps2;
-
-/*     Compute the eigenvalues of the tridiagonal matrix. */
-
-    nmaxit = *n * 30;
-    sigma = 0.;
-    jtot = 0;
-
-/*
-       Determine where the matrix splits and choose QL or QR iteration
-       for each block, according to whether top or bottom diagonal
-       element is smaller.
-*/
-
-    l1 = 1;
-
-L10:
-    if (l1 > *n) {
-	goto L170;
-    }
-    if (l1 > 1) {
-	e[l1 - 1] = 0.;
-    }
-    i__1 = *n - 1;
-    for (m = l1; m <= i__1; ++m) {
-	if ((d__3 = e[m], abs(d__3)) <= sqrt((d__1 = d__[m], abs(d__1))) *
-		sqrt((d__2 = d__[m + 1], abs(d__2))) * eps) {
-	    e[m] = 0.;
-	    goto L30;
-	}
-/* L20: */
-    }
-    m = *n;
-
-L30:
-    l = l1;
-    lsv = l;
-    lend = m;
-    lendsv = lend;
-    l1 = m + 1;
-    if (lend == l) {
-	goto L10;
-    }
-
-/*     Scale submatrix in rows and columns L to LEND */
-
-    i__1 = lend - l + 1;
-    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
-    iscale = 0;
-    if (anorm > ssfmax) {
-	iscale = 1;
-	i__1 = lend - l + 1;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
-		info);
-	i__1 = lend - l;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
-		info);
-    } else if (anorm < ssfmin) {
-	iscale = 2;
-	i__1 = lend - l + 1;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
-		info);
-	i__1 = lend - l;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
-		info);
-    }
-
-    i__1 = lend - 1;
-    for (i__ = l; i__ <= i__1; ++i__) {
-/* Computing 2nd power */
-	d__1 = e[i__];
-	e[i__] = d__1 * d__1;
-/* L40: */
-    }
-
-/*     Choose between QL and QR iteration */
-
-    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
-	lend = lsv;
-	l = lendsv;
-    }
-
-    if (lend >= l) {
-
-/*
-          QL Iteration
-
-          Look for small subdiagonal element.
-*/
-
-L50:
-	if (l != lend) {
-	    i__1 = lend - 1;
-	    for (m = l; m <= i__1; ++m) {
-		if ((d__2 = e[m], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m
-			+ 1], abs(d__1))) {
-		    goto L70;
-		}
-/* L60: */
-	    }
-	}
-	m = lend;
-
-L70:
-	if (m < lend) {
-	    e[m] = 0.;
-	}
-	p = d__[l];
-	if (m == l) {
-	    goto L90;
-	}
-
-/*
-          If remaining matrix is 2 by 2, use DLAE2 to compute its
-          eigenvalues.
-*/
-
-	if (m == l + 1) {
-	    rte = sqrt(e[l]);
-	    dlae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2);
-	    d__[l] = rt1;
-	    d__[l + 1] = rt2;
-	    e[l] = 0.;
-	    l += 2;
-	    if (l <= lend) {
-		goto L50;
-	    }
-	    goto L150;
-	}
-
-	if (jtot == nmaxit) {
-	    goto L150;
-	}
-	++jtot;
-
-/*        Form shift. */
-
-	rte = sqrt(e[l]);
-	sigma = (d__[l + 1] - p) / (rte * 2.);
-	r__ = dlapy2_(&sigma, &c_b2865);
-	sigma = p - rte / (sigma + d_sign(&r__, &sigma));
-
-	c__ = 1.;
-	s = 0.;
-	gamma = d__[m] - sigma;
-	p = gamma * gamma;
-
-/*        Inner loop */
-
-	i__1 = l;
-	for (i__ = m - 1; i__ >= i__1; --i__) {
-	    bb = e[i__];
-	    r__ = p + bb;
-	    if (i__ != m - 1) {
-		e[i__ + 1] = s * r__;
-	    }
-	    oldc = c__;
-	    c__ = p / r__;
-	    s = bb / r__;
-	    oldgam = gamma;
-	    alpha = d__[i__];
-	    gamma = c__ * (alpha - sigma) - s * oldgam;
-	    d__[i__ + 1] = oldgam + (alpha - gamma);
-	    if (c__ != 0.) {
-		p = gamma * gamma / c__;
-	    } else {
-		p = oldc * bb;
-	    }
-/* L80: */
-	}
-
-	e[l] = s * p;
-	d__[l] = sigma + gamma;
-	goto L50;
-
-/*        Eigenvalue found. */
-
-L90:
-	d__[l] = p;
-
-	++l;
-	if (l <= lend) {
-	    goto L50;
-	}
-	goto L150;
-
-    } else {
-
-/*
-          QR Iteration
-
-          Look for small superdiagonal element.
-*/
-
-L100:
-	i__1 = lend + 1;
-	for (m = l; m >= i__1; --m) {
-	    if ((d__2 = e[m - 1], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m
-		    - 1], abs(d__1))) {
-		goto L120;
-	    }
-/* L110: */
-	}
-	m = lend;
-
-L120:
-	if (m > lend) {
-	    e[m - 1] = 0.;
-	}
-	p = d__[l];
-	if (m == l) {
-	    goto L140;
-	}
-
-/*
-          If remaining matrix is 2 by 2, use DLAE2 to compute its
-          eigenvalues.
-*/
-
-	if (m == l - 1) {
-	    rte = sqrt(e[l - 1]);
-	    dlae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2);
-	    d__[l] = rt1;
-	    d__[l - 1] = rt2;
-	    e[l - 1] = 0.;
-	    l += -2;
-	    if (l >= lend) {
-		goto L100;
-	    }
-	    goto L150;
-	}
-
-	if (jtot == nmaxit) {
-	    goto L150;
-	}
-	++jtot;
-
-/*        Form shift. */
-
-	rte = sqrt(e[l - 1]);
-	sigma = (d__[l - 1] - p) / (rte * 2.);
-	r__ = dlapy2_(&sigma, &c_b2865);
-	sigma = p - rte / (sigma + d_sign(&r__, &sigma));
-
-	c__ = 1.;
-	s = 0.;
-	gamma = d__[m] - sigma;
-	p = gamma * gamma;
-
-/*        Inner loop */
-
-	i__1 = l - 1;
-	for (i__ = m; i__ <= i__1; ++i__) {
-	    bb = e[i__];
-	    r__ = p + bb;
-	    if (i__ != m) {
-		e[i__ - 1] = s * r__;
-	    }
-	    oldc = c__;
-	    c__ = p / r__;
-	    s = bb / r__;
-	    oldgam = gamma;
-	    alpha = d__[i__ + 1];
-	    gamma = c__ * (alpha - sigma) - s * oldgam;
-	    d__[i__] = oldgam + (alpha - gamma);
-	    if (c__ != 0.) {
-		p = gamma * gamma / c__;
-	    } else {
-		p = oldc * bb;
-	    }
-/* L130: */
-	}
-
-	e[l - 1] = s * p;
-	d__[l] = sigma + gamma;
-	goto L100;
-
-/*        Eigenvalue found. */
-
-L140:
-	d__[l] = p;
-
-	--l;
-	if (l >= lend) {
-	    goto L100;
-	}
-	goto L150;
-
-    }
-
-/*     Undo scaling if necessary */
-
-L150:
-    if (iscale == 1) {
-	i__1 = lendsv - lsv + 1;
-	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
-		n, info);
-    }
-    if (iscale == 2) {
-	i__1 = lendsv - lsv + 1;
-	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
-		n, info);
-    }
-
-/*
-       Check for no convergence to an eigenvalue after a total
-       of N*MAXIT iterations.
-*/
-
-    if (jtot < nmaxit) {
-	goto L10;
-    }
-    i__1 = *n - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (e[i__] != 0.) {
-	    ++(*info);
-	}
-/* L160: */
-    }
-    goto L180;
-
-/*     Sort eigenvalues in increasing order. */
-
-L170:
-    dlasrt_("I", n, &d__[1], info);
-
-L180:
-    return 0;
-
-/*     End of DSTERF */
-
-} /* dsterf_ */
-
-/* Subroutine */ int dsyevd_(char *jobz, char *uplo, integer *n, doublereal *
-	a, integer *lda, doublereal *w, doublereal *work, integer *lwork,
-	integer *iwork, integer *liwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static doublereal eps;
-    static integer inde;
-    static doublereal anrm, rmin, rmax;
-    static integer lopt;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    static doublereal sigma;
-    extern logical lsame_(char *, char *);
-    static integer iinfo, lwmin, liopt;
-    static logical lower, wantz;
-    static integer indwk2, llwrk2;
-
-    static integer iscale;
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dstedc_(char *, integer *,
-	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
-	     integer *, integer *, integer *, integer *), dlacpy_(
-	    char *, integer *, integer *, doublereal *, integer *, doublereal
-	    *, integer *);
-    static doublereal safmin;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static doublereal bignum;
-    static integer indtau;
-    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
-	     integer *);
-    extern doublereal dlansy_(char *, char *, integer *, doublereal *,
-	    integer *, doublereal *);
-    static integer indwrk, liwmin;
-    extern /* Subroutine */ int dormtr_(char *, char *, char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, integer *), dsytrd_(char *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
-	     integer *);
-    static integer llwork;
-    static doublereal smlnum;
-    static logical lquery;
-
-
-/*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DSYEVD computes all eigenvalues and, optionally, eigenvectors of a
-    real symmetric matrix A. If eigenvectors are desired, it uses a
-    divide and conquer algorithm.
-
-    The divide and conquer algorithm makes very mild assumptions about
-    floating point arithmetic. It will work on machines with a guard
-    digit in add/subtract, or on those binary machines without guard
-    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
-    Cray-2. It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
-
-    Because of large use of BLAS of level 3, DSYEVD needs N**2 more
-    workspace than DSYEVX.
-
-    Arguments
-    =========
-
-    JOBZ    (input) CHARACTER*1
-            = 'N':  Compute eigenvalues only;
-            = 'V':  Compute eigenvalues and eigenvectors.
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
-            On entry, the symmetric matrix A.  If UPLO = 'U', the
-            leading N-by-N upper triangular part of A contains the
-            upper triangular part of the matrix A.  If UPLO = 'L',
-            the leading N-by-N lower triangular part of A contains
-            the lower triangular part of the matrix A.
-            On exit, if JOBZ = 'V', then if INFO = 0, A contains the
-            orthonormal eigenvectors of the matrix A.
-            If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
-            or the upper triangle (if UPLO='U') of A, including the
-            diagonal, is destroyed.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    W       (output) DOUBLE PRECISION array, dimension (N)
-            If INFO = 0, the eigenvalues in ascending order.
-
-    WORK    (workspace/output) DOUBLE PRECISION array,
-                                           dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If N <= 1,               LWORK must be at least 1.
-            If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1.
-            If JOBZ = 'V' and N > 1, LWORK must be at least
-                                                  1 + 6*N + 2*N**2.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
-            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
-
-    LIWORK  (input) INTEGER
-            The dimension of the array IWORK.
-            If N <= 1,                LIWORK must be at least 1.
-            If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
-            If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
-
-            If LIWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the IWORK array,
-            returns this value as the first entry of the IWORK array, and
-            no error message related to LIWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the algorithm failed to converge; i
-                  off-diagonal elements of an intermediate tridiagonal
-                  form did not converge to zero.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-    Modified by Francoise Tisseur, University of Tennessee.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --w;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    wantz = lsame_(jobz, "V");
-    lower = lsame_(uplo, "L");
-    lquery = (*lwork == -1) || (*liwork == -1);
-
-    *info = 0;
-    if (*n <= 1) {
-	liwmin = 1;
-	lwmin = 1;
-	lopt = lwmin;
-	liopt = liwmin;
-    } else {
-	if (wantz) {
-	    liwmin = *n * 5 + 3;
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lwmin = *n * 6 + 1 + ((i__1 * i__1) << (1));
-	} else {
-	    liwmin = 1;
-	    lwmin = ((*n) << (1)) + 1;
-	}
-	lopt = lwmin;
-	liopt = liwmin;
-    }
-    if (! ((wantz) || (lsame_(jobz, "N")))) {
-	*info = -1;
-    } else if (! ((lower) || (lsame_(uplo, "U")))) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -10;
-    }
-
-    if (*info == 0) {
-	work[1] = (doublereal) lopt;
-	iwork[1] = liopt;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DSYEVD", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (*n == 1) {
-	w[1] = a[a_dim1 + 1];
-	if (wantz) {
-	    a[a_dim1 + 1] = 1.;
-	}
-	return 0;
-    }
-
-/*     Get machine constants. */
-
-    safmin = SAFEMINIMUM;
-    eps = PRECISION;
-    smlnum = safmin / eps;
-    bignum = 1. / smlnum;
-    rmin = sqrt(smlnum);
-    rmax = sqrt(bignum);
-
-/*     Scale matrix to allowable range, if necessary. */
-
-    anrm = dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]);
-    iscale = 0;
-    if (anrm > 0. && anrm < rmin) {
-	iscale = 1;
-	sigma = rmin / anrm;
-    } else if (anrm > rmax) {
-	iscale = 1;
-	sigma = rmax / anrm;
-    }
-    if (iscale == 1) {
-	dlascl_(uplo, &c__0, &c__0, &c_b2865, &sigma, n, n, &a[a_offset], lda,
-		 info);
-    }
-
-/*     Call DSYTRD to reduce symmetric matrix to tridiagonal form. */
-
-    inde = 1;
-    indtau = inde + *n;
-    indwrk = indtau + *n;
-    llwork = *lwork - indwrk + 1;
-    indwk2 = indwrk + *n * *n;
-    llwrk2 = *lwork - indwk2 + 1;
-
-    dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], &
-	    work[indwrk], &llwork, &iinfo);
-    lopt = (integer) (((*n) << (1)) + work[indwrk]);
-
-/*
-       For eigenvalues only, call DSTERF.  For eigenvectors, first call
-       DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
-       tridiagonal matrix, then call DORMTR to multiply it by the
-       Householder transformations stored in A.
-*/
-
-    if (! wantz) {
-	dsterf_(n, &w[1], &work[inde], info);
-    } else {
-	dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], &
-		llwrk2, &iwork[1], liwork, info);
-	dormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
-		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
-	dlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
-/*
-   Computing MAX
-   Computing 2nd power
-*/
-	i__3 = *n;
-	i__1 = lopt, i__2 = *n * 6 + 1 + ((i__3 * i__3) << (1));
-	lopt = max(i__1,i__2);
-    }
-
-/*     If matrix was scaled, then rescale eigenvalues appropriately. */
-
-    if (iscale == 1) {
-	d__1 = 1. / sigma;
-	dscal_(n, &d__1, &w[1], &c__1);
-    }
-
-    work[1] = (doublereal) lopt;
-    iwork[1] = liopt;
-
-    return 0;
-
-/*     End of DSYEVD */
-
-} /* dsyevd_ */
-
-/* Subroutine */ int dsytd2_(char *uplo, integer *n, doublereal *a, integer *
-	lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__;
-    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    static doublereal taui;
-    extern /* Subroutine */ int dsyr2_(char *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    static doublereal alpha;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *), dlarfg_(integer *, doublereal *,
-	     doublereal *, integer *, doublereal *), xerbla_(char *, integer *
-	    );
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal
-    form T by an orthogonal similarity transformation: Q' * A * Q = T.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the upper or lower triangular part of the
-            symmetric matrix A is stored:
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
-            n-by-n upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading n-by-n lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
-            On exit, if UPLO = 'U', the diagonal and first superdiagonal
-            of A are overwritten by the corresponding elements of the
-            tridiagonal matrix T, and the elements above the first
-            superdiagonal, with the array TAU, represent the orthogonal
-            matrix Q as a product of elementary reflectors; if UPLO
-            = 'L', the diagonal and first subdiagonal of A are over-
-            written by the corresponding elements of the tridiagonal
-            matrix T, and the elements below the first subdiagonal, with
-            the array TAU, represent the orthogonal matrix Q as a product
-            of elementary reflectors. See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    D       (output) DOUBLE PRECISION array, dimension (N)
-            The diagonal elements of the tridiagonal matrix T:
-            D(i) = A(i,i).
-
-    E       (output) DOUBLE PRECISION array, dimension (N-1)
-            The off-diagonal elements of the tridiagonal matrix T:
-            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
-
-    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    If UPLO = 'U', the matrix Q is represented as a product of elementary
-    reflectors
-
-       Q = H(n-1) . . . H(2) H(1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
-    A(1:i-1,i+1), and tau in TAU(i).
-
-    If UPLO = 'L', the matrix Q is represented as a product of elementary
-    reflectors
-
-       Q = H(1) H(2) . . . H(n-1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
-    and tau in TAU(i).
-
-    The contents of A on exit are illustrated by the following examples
-    with n = 5:
-
-    if UPLO = 'U':                       if UPLO = 'L':
-
-      (  d   e   v2  v3  v4 )              (  d                  )
-      (      d   e   v3  v4 )              (  e   d              )
-      (          d   e   v4 )              (  v1  e   d          )
-      (              d   e  )              (  v1  v2  e   d      )
-      (                  d  )              (  v1  v2  v3  e   d  )
-
-    where d and e denote diagonal and off-diagonal elements of T, and vi
-    denotes an element of the vector defining H(i).
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --d__;
-    --e;
-    --tau;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DSYTD2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n <= 0) {
-	return 0;
-    }
-
-    if (upper) {
-
-/*        Reduce the upper triangle of A */
-
-	for (i__ = *n - 1; i__ >= 1; --i__) {
-
-/*
-             Generate elementary reflector H(i) = I - tau * v * v'
-             to annihilate A(1:i-1,i+1)
-*/
-
-	    dlarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1
-		    + 1], &c__1, &taui);
-	    e[i__] = a[i__ + (i__ + 1) * a_dim1];
-
-	    if (taui != 0.) {
-
-/*              Apply H(i) from both sides to A(1:i,1:i) */
-
-		a[i__ + (i__ + 1) * a_dim1] = 1.;
-
-/*              Compute  x := tau * A * v  storing x in TAU(1:i) */
-
-		dsymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
-			a_dim1 + 1], &c__1, &c_b2879, &tau[1], &c__1);
-
-/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
-
-		alpha = taui * -.5 * ddot_(&i__, &tau[1], &c__1, &a[(i__ + 1)
-			* a_dim1 + 1], &c__1);
-		daxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
-			1], &c__1);
-
-/*
-                Apply the transformation as a rank-2 update:
-                   A := A - v * w' - w * v'
-*/
-
-		dsyr2_(uplo, &i__, &c_b3001, &a[(i__ + 1) * a_dim1 + 1], &
-			c__1, &tau[1], &c__1, &a[a_offset], lda);
-
-		a[i__ + (i__ + 1) * a_dim1] = e[i__];
-	    }
-	    d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1];
-	    tau[i__] = taui;
-/* L10: */
-	}
-	d__[1] = a[a_dim1 + 1];
-    } else {
-
-/*        Reduce the lower triangle of A */
-
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*
-             Generate elementary reflector H(i) = I - tau * v * v'
-             to annihilate A(i+2:n,i)
-*/
-
-	    i__2 = *n - i__;
-/* Computing MIN */
-	    i__3 = i__ + 2;
-	    dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
-		     a_dim1], &c__1, &taui);
-	    e[i__] = a[i__ + 1 + i__ * a_dim1];
-
-	    if (taui != 0.) {
-
-/*              Apply H(i) from both sides to A(i+1:n,i+1:n) */
-
-		a[i__ + 1 + i__ * a_dim1] = 1.;
-
-/*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */
-
-		i__2 = *n - i__;
-		dsymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
-			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b2879, &
-			tau[i__], &c__1);
-
-/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
-
-		i__2 = *n - i__;
-		alpha = taui * -.5 * ddot_(&i__2, &tau[i__], &c__1, &a[i__ +
-			1 + i__ * a_dim1], &c__1);
-		i__2 = *n - i__;
-		daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
-			i__], &c__1);
-
-/*
-                Apply the transformation as a rank-2 update:
-                   A := A - v * w' - w * v'
-*/
-
-		i__2 = *n - i__;
-		dsyr2_(uplo, &i__2, &c_b3001, &a[i__ + 1 + i__ * a_dim1], &
-			c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) *
-			a_dim1], lda);
-
-		a[i__ + 1 + i__ * a_dim1] = e[i__];
-	    }
-	    d__[i__] = a[i__ + i__ * a_dim1];
-	    tau[i__] = taui;
-/* L20: */
-	}
-	d__[*n] = a[*n + *n * a_dim1];
-    }
-
-    return 0;
-
-/*     End of DSYTD2 */
-
-} /* dsytd2_ */
-
-/* Subroutine */ int dsytrd_(char *uplo, integer *n, doublereal *a, integer *
-	lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal *
-	work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, nb, kk, nx, iws;
-    extern logical lsame_(char *, char *);
-    static integer nbmin, iinfo;
-    static logical upper;
-    extern /* Subroutine */ int dsytd2_(char *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *), dsyr2k_(char *, char *, integer *, integer *, doublereal
-	    *, doublereal *, integer *, doublereal *, integer *, doublereal *,
-	     doublereal *, integer *), dlatrd_(char *,
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *), xerbla_(char *,
-	    integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DSYTRD reduces a real symmetric matrix A to real symmetric
-    tridiagonal form T by an orthogonal similarity transformation:
-    Q**T * A * Q = T.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
-            N-by-N upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading N-by-N lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
-            On exit, if UPLO = 'U', the diagonal and first superdiagonal
-            of A are overwritten by the corresponding elements of the
-            tridiagonal matrix T, and the elements above the first
-            superdiagonal, with the array TAU, represent the orthogonal
-            matrix Q as a product of elementary reflectors; if UPLO
-            = 'L', the diagonal and first subdiagonal of A are over-
-            written by the corresponding elements of the tridiagonal
-            matrix T, and the elements below the first subdiagonal, with
-            the array TAU, represent the orthogonal matrix Q as a product
-            of elementary reflectors. See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    D       (output) DOUBLE PRECISION array, dimension (N)
-            The diagonal elements of the tridiagonal matrix T:
-            D(i) = A(i,i).
-
-    E       (output) DOUBLE PRECISION array, dimension (N-1)
-            The off-diagonal elements of the tridiagonal matrix T:
-            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
-
-    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= 1.
-            For optimum performance LWORK >= N*NB, where NB is the
-            optimal blocksize.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    If UPLO = 'U', the matrix Q is represented as a product of elementary
-    reflectors
-
-       Q = H(n-1) . . . H(2) H(1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
-    A(1:i-1,i+1), and tau in TAU(i).
-
-    If UPLO = 'L', the matrix Q is represented as a product of elementary
-    reflectors
-
-       Q = H(1) H(2) . . . H(n-1).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
-    and tau in TAU(i).
-
-    The contents of A on exit are illustrated by the following examples
-    with n = 5:
-
-    if UPLO = 'U':                       if UPLO = 'L':
-
-      (  d   e   v2  v3  v4 )              (  d                  )
-      (      d   e   v3  v4 )              (  e   d              )
-      (          d   e   v4 )              (  v1  e   d          )
-      (              d   e  )              (  v1  v2  e   d      )
-      (                  d  )              (  v1  v2  v3  e   d  )
-
-    where d and e denote diagonal and off-diagonal elements of T, and vi
-    denotes an element of the vector defining H(i).
-
-    =====================================================================
-
-
-       Test the input parameters
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --d__;
-    --e;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    lquery = *lwork == -1;
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    } else if (*lwork < 1 && ! lquery) {
-	*info = -9;
-    }
-
-    if (*info == 0) {
-
-/*        Determine the block size. */
-
-	nb = ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
-		 (ftnlen)1);
-	lwkopt = *n * nb;
-	work[1] = (doublereal) lwkopt;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DSYTRD", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	work[1] = 1.;
-	return 0;
-    }
-
-    nx = *n;
-    iws = 1;
-    if (nb > 1 && nb < *n) {
-
-/*
-          Determine when to cross over from blocked to unblocked code
-          (last block is always handled by unblocked code).
-
-   Computing MAX
-*/
-	i__1 = nb, i__2 = ilaenv_(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, &
-		c_n1, (ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < *n) {
-
-/*           Determine if workspace is large enough for blocked code. */
-
-	    ldwork = *n;
-	    iws = ldwork * nb;
-	    if (*lwork < iws) {
-
-/*
-                Not enough workspace to use optimal NB:  determine the
-                minimum value of NB, and reduce NB or force use of
-                unblocked code by setting NX = N.
-
-   Computing MAX
-*/
-		i__1 = *lwork / ldwork;
-		nb = max(i__1,1);
-		nbmin = ilaenv_(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
-			 (ftnlen)6, (ftnlen)1);
-		if (nb < nbmin) {
-		    nx = *n;
-		}
-	    }
-	} else {
-	    nx = *n;
-	}
-    } else {
-	nb = 1;
-    }
-
-    if (upper) {
-
-/*
-          Reduce the upper triangle of A.
-          Columns 1:kk are handled by the unblocked method.
-*/
-
-	kk = *n - (*n - nx + nb - 1) / nb * nb;
-	i__1 = kk + 1;
-	i__2 = -nb;
-	for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
-		i__2) {
-
-/*
-             Reduce columns i:i+nb-1 to tridiagonal form and form the
-             matrix W which is needed to update the unreduced part of
-             the matrix
-*/
-
-	    i__3 = i__ + nb - 1;
-	    dlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
-		    work[1], &ldwork);
-
-/*
-             Update the unreduced submatrix A(1:i-1,1:i-1), using an
-             update of the form:  A := A - V*W' - W*V'
-*/
-
-	    i__3 = i__ - 1;
-	    dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b3001, &a[i__ *
-		    a_dim1 + 1], lda, &work[1], &ldwork, &c_b2865, &a[
-		    a_offset], lda);
-
-/*
-             Copy superdiagonal elements back into A, and diagonal
-             elements into D
-*/
-
-	    i__3 = i__ + nb - 1;
-	    for (j = i__; j <= i__3; ++j) {
-		a[j - 1 + j * a_dim1] = e[j - 1];
-		d__[j] = a[j + j * a_dim1];
-/* L10: */
-	    }
-/* L20: */
-	}
-
-/*        Use unblocked code to reduce the last or only block */
-
-	dsytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
-    } else {
-
-/*        Reduce the lower triangle of A */
-
-	i__2 = *n - nx;
-	i__1 = nb;
-	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-
-/*
-             Reduce columns i:i+nb-1 to tridiagonal form and form the
-             matrix W which is needed to update the unreduced part of
-             the matrix
-*/
-
-	    i__3 = *n - i__ + 1;
-	    dlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
-		    tau[i__], &work[1], &ldwork);
-
-/*
-             Update the unreduced submatrix A(i+ib:n,i+ib:n), using
-             an update of the form:  A := A - V*W' - W*V'
-*/
-
-	    i__3 = *n - i__ - nb + 1;
-	    dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b3001, &a[i__ + nb +
-		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b2865, &a[
-		    i__ + nb + (i__ + nb) * a_dim1], lda);
-
-/*
-             Copy subdiagonal elements back into A, and diagonal
-             elements into D
-*/
-
-	    i__3 = i__ + nb - 1;
-	    for (j = i__; j <= i__3; ++j) {
-		a[j + 1 + j * a_dim1] = e[j];
-		d__[j] = a[j + j * a_dim1];
-/* L30: */
-	    }
-/* L40: */
-	}
-
-/*        Use unblocked code to reduce the last or only block */
-
-	i__1 = *n - i__ + 1;
-	dsytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__],
-		&tau[i__], &iinfo);
-    }
-
-    work[1] = (doublereal) lwkopt;
-    return 0;
-
-/*     End of DSYTRD */
-
-} /* dsytrd_ */
-
-/* Subroutine */ int dtrevc_(char *side, char *howmny, logical *select,
-	integer *n, doublereal *t, integer *ldt, doublereal *vl, integer *
-	ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m,
-	doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
-	    i__2, i__3;
-    doublereal d__1, d__2, d__3, d__4;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, j, k;
-    static doublereal x[4]	/* was [2][2] */;
-    static integer j1, j2, n2, ii, ki, ip, is;
-    static doublereal wi, wr, rec, ulp, beta, emax;
-    static logical pair;
-    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    static logical allv;
-    static integer ierr;
-    static doublereal unfl, ovfl, smin;
-    static logical over;
-    static doublereal vmax;
-    static integer jnxt;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    static doublereal scale;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
-    static doublereal remax;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static logical leftv, bothv;
-    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *);
-    static doublereal vcrit;
-    static logical somev;
-    static doublereal xnorm;
-    extern /* Subroutine */ int dlaln2_(logical *, integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
-	     doublereal *, doublereal *, integer *, doublereal *, doublereal *
-	    , doublereal *, integer *, doublereal *, doublereal *, integer *),
-	     dlabad_(doublereal *, doublereal *);
-
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static doublereal bignum;
-    static logical rightv;
-    static doublereal smlnum;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DTREVC computes some or all of the right and/or left eigenvectors of
-    a real upper quasi-triangular matrix T.
-
-    The right eigenvector x and the left eigenvector y of T corresponding
-    to an eigenvalue w are defined by:
-
-                 T*x = w*x,     y'*T = w*y'
-
-    where y' denotes the conjugate transpose of the vector y.
-
-    If all eigenvectors are requested, the routine may either return the
-    matrices X and/or Y of right or left eigenvectors of T, or the
-    products Q*X and/or Q*Y, where Q is an input orthogonal
-    matrix. If T was obtained from the real-Schur factorization of an
-    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
-    right or left eigenvectors of A.
-
-    T must be in Schur canonical form (as returned by DHSEQR), that is,
-    block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
-    2-by-2 diagonal block has its diagonal elements equal and its
-    off-diagonal elements of opposite sign.  Corresponding to each 2-by-2
-    diagonal block is a complex conjugate pair of eigenvalues and
-    eigenvectors; only one eigenvector of the pair is computed, namely
-    the one corresponding to the eigenvalue with positive imaginary part.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'R':  compute right eigenvectors only;
-            = 'L':  compute left eigenvectors only;
-            = 'B':  compute both right and left eigenvectors.
-
-    HOWMNY  (input) CHARACTER*1
-            = 'A':  compute all right and/or left eigenvectors;
-            = 'B':  compute all right and/or left eigenvectors,
-                    and backtransform them using the input matrices
-                    supplied in VR and/or VL;
-            = 'S':  compute selected right and/or left eigenvectors,
-                    specified by the logical array SELECT.
-
-    SELECT  (input/output) LOGICAL array, dimension (N)
-            If HOWMNY = 'S', SELECT specifies the eigenvectors to be
-            computed.
-            If HOWMNY = 'A' or 'B', SELECT is not referenced.
-            To select the real eigenvector corresponding to a real
-            eigenvalue w(j), SELECT(j) must be set to .TRUE..  To select
-            the complex eigenvector corresponding to a complex conjugate
-            pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be
-            set to .TRUE.; then on exit SELECT(j) is .TRUE. and
-            SELECT(j+1) is .FALSE..
-
-    N       (input) INTEGER
-            The order of the matrix T. N >= 0.
-
-    T       (input) DOUBLE PRECISION array, dimension (LDT,N)
-            The upper quasi-triangular matrix T in Schur canonical form.
-
-    LDT     (input) INTEGER
-            The leading dimension of the array T. LDT >= max(1,N).
-
-    VL      (input/output) DOUBLE PRECISION array, dimension (LDVL,MM)
-            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
-            contain an N-by-N matrix Q (usually the orthogonal matrix Q
-            of Schur vectors returned by DHSEQR).
-            On exit, if SIDE = 'L' or 'B', VL contains:
-            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
-                             VL has the same quasi-lower triangular form
-                             as T'. If T(i,i) is a real eigenvalue, then
-                             the i-th column VL(i) of VL  is its
-                             corresponding eigenvector. If T(i:i+1,i:i+1)
-                             is a 2-by-2 block whose eigenvalues are
-                             complex-conjugate eigenvalues of T, then
-                             VL(i)+sqrt(-1)*VL(i+1) is the complex
-                             eigenvector corresponding to the eigenvalue
-                             with positive real part.
-            if HOWMNY = 'B', the matrix Q*Y;
-            if HOWMNY = 'S', the left eigenvectors of T specified by
-                             SELECT, stored consecutively in the columns
-                             of VL, in the same order as their
-                             eigenvalues.
-            A complex eigenvector corresponding to a complex eigenvalue
-            is stored in two consecutive columns, the first holding the
-            real part, and the second the imaginary part.
-            If SIDE = 'R', VL is not referenced.
-
-    LDVL    (input) INTEGER
-            The leading dimension of the array VL.  LDVL >= max(1,N) if
-            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
-
-    VR      (input/output) DOUBLE PRECISION array, dimension (LDVR,MM)
-            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
-            contain an N-by-N matrix Q (usually the orthogonal matrix Q
-            of Schur vectors returned by DHSEQR).
-            On exit, if SIDE = 'R' or 'B', VR contains:
-            if HOWMNY = 'A', the matrix X of right eigenvectors of T;
-                             VR has the same quasi-upper triangular form
-                             as T. If T(i,i) is a real eigenvalue, then
-                             the i-th column VR(i) of VR  is its
-                             corresponding eigenvector. If T(i:i+1,i:i+1)
-                             is a 2-by-2 block whose eigenvalues are
-                             complex-conjugate eigenvalues of T, then
-                             VR(i)+sqrt(-1)*VR(i+1) is the complex
-                             eigenvector corresponding to the eigenvalue
-                             with positive real part.
-            if HOWMNY = 'B', the matrix Q*X;
-            if HOWMNY = 'S', the right eigenvectors of T specified by
-                             SELECT, stored consecutively in the columns
-                             of VR, in the same order as their
-                             eigenvalues.
-            A complex eigenvector corresponding to a complex eigenvalue
-            is stored in two consecutive columns, the first holding the
-            real part and the second the imaginary part.
-            If SIDE = 'L', VR is not referenced.
-
-    LDVR    (input) INTEGER
-            The leading dimension of the array VR.  LDVR >= max(1,N) if
-            SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
-
-    MM      (input) INTEGER
-            The number of columns in the arrays VL and/or VR. MM >= M.
-
-    M       (output) INTEGER
-            The number of columns in the arrays VL and/or VR actually
-            used to store the eigenvectors.
-            If HOWMNY = 'A' or 'B', M is set to N.
-            Each selected real eigenvector occupies one column and each
-            selected complex eigenvector occupies two columns.
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    The algorithm used in this program is basically backward (forward)
-    substitution, with scaling to make the code robust against
-    possible overflow.
-
-    Each eigenvector is normalized so that the element of largest
-    magnitude has magnitude 1; here the magnitude of a complex number
-    (x,y) is taken to be |x| + |y|.
-
-    =====================================================================
-
-
-       Decode and test the input parameters
-*/
-
-    /* Parameter adjustments */
-    --select;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-    vl_dim1 = *ldvl;
-    vl_offset = 1 + vl_dim1;
-    vl -= vl_offset;
-    vr_dim1 = *ldvr;
-    vr_offset = 1 + vr_dim1;
-    vr -= vr_offset;
-    --work;
-
-    /* Function Body */
-    bothv = lsame_(side, "B");
-    rightv = (lsame_(side, "R")) || (bothv);
-    leftv = (lsame_(side, "L")) || (bothv);
-
-    allv = lsame_(howmny, "A");
-    over = lsame_(howmny, "B");
-    somev = lsame_(howmny, "S");
-
-    *info = 0;
-    if (! rightv && ! leftv) {
-	*info = -1;
-    } else if (! allv && ! over && ! somev) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if (*ldt < max(1,*n)) {
-	*info = -6;
-    } else if ((*ldvl < 1) || (leftv && *ldvl < *n)) {
-	*info = -8;
-    } else if ((*ldvr < 1) || (rightv && *ldvr < *n)) {
-	*info = -10;
-    } else {
-
-/*
-          Set M to the number of columns required to store the selected
-          eigenvectors, standardize the array SELECT if necessary, and
-          test MM.
-*/
-
-	if (somev) {
-	    *m = 0;
-	    pair = FALSE_;
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (pair) {
-		    pair = FALSE_;
-		    select[j] = FALSE_;
-		} else {
-		    if (j < *n) {
-			if (t[j + 1 + j * t_dim1] == 0.) {
-			    if (select[j]) {
-				++(*m);
-			    }
-			} else {
-			    pair = TRUE_;
-			    if ((select[j]) || (select[j + 1])) {
-				select[j] = TRUE_;
-				*m += 2;
-			    }
-			}
-		    } else {
-			if (select[*n]) {
-			    ++(*m);
-			}
-		    }
-		}
-/* L10: */
-	    }
-	} else {
-	    *m = *n;
-	}
-
-	if (*mm < *m) {
-	    *info = -11;
-	}
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DTREVC", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Set the constants to control overflow. */
-
-    unfl = SAFEMINIMUM;
-    ovfl = 1. / unfl;
-    dlabad_(&unfl, &ovfl);
-    ulp = PRECISION;
-    smlnum = unfl * (*n / ulp);
-    bignum = (1. - ulp) / smlnum;
-
-/*
-       Compute 1-norm of each column of strictly upper triangular
-       part of T to control overflow in triangular solver.
-*/
-
-    work[1] = 0.;
-    i__1 = *n;
-    for (j = 2; j <= i__1; ++j) {
-	work[j] = 0.;
-	i__2 = j - 1;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1));
-/* L20: */
-	}
-/* L30: */
-    }
-
-/*
-       Index IP is used to specify the real or complex eigenvalue:
-         IP = 0, real eigenvalue,
-              1, first of conjugate complex pair: (wr,wi)
-             -1, second of conjugate complex pair: (wr,wi)
-*/
-
-    n2 = (*n) << (1);
-
-    if (rightv) {
-
-/*        Compute right eigenvectors. */
-
-	ip = 0;
-	is = *m;
-	for (ki = *n; ki >= 1; --ki) {
-
-	    if (ip == 1) {
-		goto L130;
-	    }
-	    if (ki == 1) {
-		goto L40;
-	    }
-	    if (t[ki + (ki - 1) * t_dim1] == 0.) {
-		goto L40;
-	    }
-	    ip = -1;
-
-L40:
-	    if (somev) {
-		if (ip == 0) {
-		    if (! select[ki]) {
-			goto L130;
-		    }
-		} else {
-		    if (! select[ki - 1]) {
-			goto L130;
-		    }
-		}
-	    }
-
-/*           Compute the KI-th eigenvalue (WR,WI). */
-
-	    wr = t[ki + ki * t_dim1];
-	    wi = 0.;
-	    if (ip != 0) {
-		wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) *
-			sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2)));
-	    }
-/* Computing MAX */
-	    d__1 = ulp * (abs(wr) + abs(wi));
-	    smin = max(d__1,smlnum);
-
-	    if (ip == 0) {
-
-/*              Real right eigenvector */
-
-		work[ki + *n] = 1.;
-
-/*              Form right-hand side */
-
-		i__1 = ki - 1;
-		for (k = 1; k <= i__1; ++k) {
-		    work[k + *n] = -t[k + ki * t_dim1];
-/* L50: */
-		}
-
-/*
-                Solve the upper quasi-triangular system:
-                   (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK.
-*/
-
-		jnxt = ki - 1;
-		for (j = ki - 1; j >= 1; --j) {
-		    if (j > jnxt) {
-			goto L60;
-		    }
-		    j1 = j;
-		    j2 = j;
-		    jnxt = j - 1;
-		    if (j > 1) {
-			if (t[j + (j - 1) * t_dim1] != 0.) {
-			    j1 = j - 1;
-			    jnxt = j - 2;
-			}
-		    }
-
-		    if (j1 == j2) {
-
-/*                    1-by-1 diagonal block */
-
-			dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b2865, &t[j
-				+ j * t_dim1], ldt, &c_b2865, &c_b2865, &work[
-				j + *n], n, &wr, &c_b2879, x, &c__2, &scale, &
-				xnorm, &ierr);
-
-/*
-                      Scale X(1,1) to avoid overflow when updating
-                      the right-hand side.
-*/
-
-			if (xnorm > 1.) {
-			    if (work[j] > bignum / xnorm) {
-				x[0] /= xnorm;
-				scale /= xnorm;
-			    }
-			}
-
-/*                    Scale if necessary */
-
-			if (scale != 1.) {
-			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
-			}
-			work[j + *n] = x[0];
-
-/*                    Update right-hand side */
-
-			i__1 = j - 1;
-			d__1 = -x[0];
-			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
-				*n + 1], &c__1);
-
-		    } else {
-
-/*                    2-by-2 diagonal block */
-
-			dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b2865, &t[j
-				- 1 + (j - 1) * t_dim1], ldt, &c_b2865, &
-				c_b2865, &work[j - 1 + *n], n, &wr, &c_b2879,
-				x, &c__2, &scale, &xnorm, &ierr);
-
-/*
-                      Scale X(1,1) and X(2,1) to avoid overflow when
-                      updating the right-hand side.
-*/
-
-			if (xnorm > 1.) {
-/* Computing MAX */
-			    d__1 = work[j - 1], d__2 = work[j];
-			    beta = max(d__1,d__2);
-			    if (beta > bignum / xnorm) {
-				x[0] /= xnorm;
-				x[1] /= xnorm;
-				scale /= xnorm;
-			    }
-			}
-
-/*                    Scale if necessary */
-
-			if (scale != 1.) {
-			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
-			}
-			work[j - 1 + *n] = x[0];
-			work[j + *n] = x[1];
-
-/*                    Update right-hand side */
-
-			i__1 = j - 2;
-			d__1 = -x[0];
-			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
-				&work[*n + 1], &c__1);
-			i__1 = j - 2;
-			d__1 = -x[1];
-			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
-				*n + 1], &c__1);
-		    }
-L60:
-		    ;
-		}
-
-/*              Copy the vector x or Q*x to VR and normalize. */
-
-		if (! over) {
-		    dcopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], &
-			    c__1);
-
-		    ii = idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
-		    remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1));
-		    dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
-
-		    i__1 = *n;
-		    for (k = ki + 1; k <= i__1; ++k) {
-			vr[k + is * vr_dim1] = 0.;
-/* L70: */
-		    }
-		} else {
-		    if (ki > 1) {
-			i__1 = ki - 1;
-			dgemv_("N", n, &i__1, &c_b2865, &vr[vr_offset], ldvr,
-				&work[*n + 1], &c__1, &work[ki + *n], &vr[ki *
-				 vr_dim1 + 1], &c__1);
-		    }
-
-		    ii = idamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
-		    remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1));
-		    dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
-		}
-
-	    } else {
-
-/*
-                Complex right eigenvector.
-
-                Initial solve
-                  [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
-                  [ (T(KI,KI-1)   T(KI,KI)   )               ]
-*/
-
-		if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= (d__2 = t[
-			ki + (ki - 1) * t_dim1], abs(d__2))) {
-		    work[ki - 1 + *n] = 1.;
-		    work[ki + n2] = wi / t[ki - 1 + ki * t_dim1];
-		} else {
-		    work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1];
-		    work[ki + n2] = 1.;
-		}
-		work[ki + *n] = 0.;
-		work[ki - 1 + n2] = 0.;
-
-/*              Form right-hand side */
-
-		i__1 = ki - 2;
-		for (k = 1; k <= i__1; ++k) {
-		    work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) *
-			    t_dim1];
-		    work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1];
-/* L80: */
-		}
-
-/*
-                Solve upper quasi-triangular system:
-                (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
-*/
-
-		jnxt = ki - 2;
-		for (j = ki - 2; j >= 1; --j) {
-		    if (j > jnxt) {
-			goto L90;
-		    }
-		    j1 = j;
-		    j2 = j;
-		    jnxt = j - 1;
-		    if (j > 1) {
-			if (t[j + (j - 1) * t_dim1] != 0.) {
-			    j1 = j - 1;
-			    jnxt = j - 2;
-			}
-		    }
-
-		    if (j1 == j2) {
-
-/*                    1-by-1 diagonal block */
-
-			dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b2865, &t[j
-				+ j * t_dim1], ldt, &c_b2865, &c_b2865, &work[
-				j + *n], n, &wr, &wi, x, &c__2, &scale, &
-				xnorm, &ierr);
-
-/*
-                      Scale X(1,1) and X(1,2) to avoid overflow when
-                      updating the right-hand side.
-*/
-
-			if (xnorm > 1.) {
-			    if (work[j] > bignum / xnorm) {
-				x[0] /= xnorm;
-				x[2] /= xnorm;
-				scale /= xnorm;
-			    }
-			}
-
-/*                    Scale if necessary */
-
-			if (scale != 1.) {
-			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
-			    dscal_(&ki, &scale, &work[n2 + 1], &c__1);
-			}
-			work[j + *n] = x[0];
-			work[j + n2] = x[2];
-
-/*                    Update the right-hand side */
-
-			i__1 = j - 1;
-			d__1 = -x[0];
-			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
-				*n + 1], &c__1);
-			i__1 = j - 1;
-			d__1 = -x[2];
-			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
-				n2 + 1], &c__1);
-
-		    } else {
-
-/*                    2-by-2 diagonal block */
-
-			dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b2865, &t[j
-				- 1 + (j - 1) * t_dim1], ldt, &c_b2865, &
-				c_b2865, &work[j - 1 + *n], n, &wr, &wi, x, &
-				c__2, &scale, &xnorm, &ierr);
-
-/*
-                      Scale X to avoid overflow when updating
-                      the right-hand side.
-*/
-
-			if (xnorm > 1.) {
-/* Computing MAX */
-			    d__1 = work[j - 1], d__2 = work[j];
-			    beta = max(d__1,d__2);
-			    if (beta > bignum / xnorm) {
-				rec = 1. / xnorm;
-				x[0] *= rec;
-				x[2] *= rec;
-				x[1] *= rec;
-				x[3] *= rec;
-				scale *= rec;
-			    }
-			}
-
-/*                    Scale if necessary */
-
-			if (scale != 1.) {
-			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
-			    dscal_(&ki, &scale, &work[n2 + 1], &c__1);
-			}
-			work[j - 1 + *n] = x[0];
-			work[j + *n] = x[1];
-			work[j - 1 + n2] = x[2];
-			work[j + n2] = x[3];
-
-/*                    Update the right-hand side */
-
-			i__1 = j - 2;
-			d__1 = -x[0];
-			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
-				&work[*n + 1], &c__1);
-			i__1 = j - 2;
-			d__1 = -x[1];
-			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
-				*n + 1], &c__1);
-			i__1 = j - 2;
-			d__1 = -x[2];
-			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
-				&work[n2 + 1], &c__1);
-			i__1 = j - 2;
-			d__1 = -x[3];
-			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
-				n2 + 1], &c__1);
-		    }
-L90:
-		    ;
-		}
-
-/*              Copy the vector x or Q*x to VR and normalize. */
-
-		if (! over) {
-		    dcopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1
-			    + 1], &c__1);
-		    dcopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], &
-			    c__1);
-
-		    emax = 0.;
-		    i__1 = ki;
-		    for (k = 1; k <= i__1; ++k) {
-/* Computing MAX */
-			d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1]
-				, abs(d__1)) + (d__2 = vr[k + is * vr_dim1],
-				abs(d__2));
-			emax = max(d__3,d__4);
-/* L100: */
-		    }
-
-		    remax = 1. / emax;
-		    dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1);
-		    dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
-
-		    i__1 = *n;
-		    for (k = ki + 1; k <= i__1; ++k) {
-			vr[k + (is - 1) * vr_dim1] = 0.;
-			vr[k + is * vr_dim1] = 0.;
-/* L110: */
-		    }
-
-		} else {
-
-		    if (ki > 2) {
-			i__1 = ki - 2;
-			dgemv_("N", n, &i__1, &c_b2865, &vr[vr_offset], ldvr,
-				&work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[
-				(ki - 1) * vr_dim1 + 1], &c__1);
-			i__1 = ki - 2;
-			dgemv_("N", n, &i__1, &c_b2865, &vr[vr_offset], ldvr,
-				&work[n2 + 1], &c__1, &work[ki + n2], &vr[ki *
-				 vr_dim1 + 1], &c__1);
-		    } else {
-			dscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1
-				+ 1], &c__1);
-			dscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], &
-				c__1);
-		    }
-
-		    emax = 0.;
-		    i__1 = *n;
-		    for (k = 1; k <= i__1; ++k) {
-/* Computing MAX */
-			d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1]
-				, abs(d__1)) + (d__2 = vr[k + ki * vr_dim1],
-				abs(d__2));
-			emax = max(d__3,d__4);
-/* L120: */
-		    }
-		    remax = 1. / emax;
-		    dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1);
-		    dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
-		}
-	    }
-
-	    --is;
-	    if (ip != 0) {
-		--is;
-	    }
-L130:
-	    if (ip == 1) {
-		ip = 0;
-	    }
-	    if (ip == -1) {
-		ip = 1;
-	    }
-/* L140: */
-	}
-    }
-
-    if (leftv) {
-
-/*        Compute left eigenvectors. */
-
-	ip = 0;
-	is = 1;
-	i__1 = *n;
-	for (ki = 1; ki <= i__1; ++ki) {
-
-	    if (ip == -1) {
-		goto L250;
-	    }
-	    if (ki == *n) {
-		goto L150;
-	    }
-	    if (t[ki + 1 + ki * t_dim1] == 0.) {
-		goto L150;
-	    }
-	    ip = 1;
-
-L150:
-	    if (somev) {
-		if (! select[ki]) {
-		    goto L250;
-		}
-	    }
-
-/*           Compute the KI-th eigenvalue (WR,WI). */
-
-	    wr = t[ki + ki * t_dim1];
-	    wi = 0.;
-	    if (ip != 0) {
-		wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) *
-			sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2)));
-	    }
-/* Computing MAX */
-	    d__1 = ulp * (abs(wr) + abs(wi));
-	    smin = max(d__1,smlnum);
-
-	    if (ip == 0) {
-
-/*              Real left eigenvector. */
-
-		work[ki + *n] = 1.;
-
-/*              Form right-hand side */
-
-		i__2 = *n;
-		for (k = ki + 1; k <= i__2; ++k) {
-		    work[k + *n] = -t[ki + k * t_dim1];
-/* L160: */
-		}
-
-/*
-                Solve the quasi-triangular system:
-                   (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK
-*/
-
-		vmax = 1.;
-		vcrit = bignum;
-
-		jnxt = ki + 1;
-		i__2 = *n;
-		for (j = ki + 1; j <= i__2; ++j) {
-		    if (j < jnxt) {
-			goto L170;
-		    }
-		    j1 = j;
-		    j2 = j;
-		    jnxt = j + 1;
-		    if (j < *n) {
-			if (t[j + 1 + j * t_dim1] != 0.) {
-			    j2 = j + 1;
-			    jnxt = j + 2;
-			}
-		    }
-
-		    if (j1 == j2) {
-
-/*
-                      1-by-1 diagonal block
-
-                      Scale if necessary to avoid overflow when forming
-                      the right-hand side.
-*/
-
-			if (work[j] > vcrit) {
-			    rec = 1. / vmax;
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
-			    vmax = 1.;
-			    vcrit = bignum;
-			}
-
-			i__3 = j - ki - 1;
-			work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1],
-				&c__1, &work[ki + 1 + *n], &c__1);
-
-/*                    Solve (T(J,J)-WR)'*X = WORK */
-
-			dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b2865, &t[j
-				+ j * t_dim1], ldt, &c_b2865, &c_b2865, &work[
-				j + *n], n, &wr, &c_b2879, x, &c__2, &scale, &
-				xnorm, &ierr);
-
-/*                    Scale if necessary */
-
-			if (scale != 1.) {
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
-			}
-			work[j + *n] = x[0];
-/* Computing MAX */
-			d__2 = (d__1 = work[j + *n], abs(d__1));
-			vmax = max(d__2,vmax);
-			vcrit = bignum / vmax;
-
-		    } else {
-
-/*
-                      2-by-2 diagonal block
-
-                      Scale if necessary to avoid overflow when forming
-                      the right-hand side.
-
-   Computing MAX
-*/
-			d__1 = work[j], d__2 = work[j + 1];
-			beta = max(d__1,d__2);
-			if (beta > vcrit) {
-			    rec = 1. / vmax;
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
-			    vmax = 1.;
-			    vcrit = bignum;
-			}
-
-			i__3 = j - ki - 1;
-			work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1],
-				&c__1, &work[ki + 1 + *n], &c__1);
-
-			i__3 = j - ki - 1;
-			work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 1 + (j + 1) *
-				 t_dim1], &c__1, &work[ki + 1 + *n], &c__1);
-
-/*
-                      Solve
-                        [T(J,J)-WR   T(J,J+1)     ]'* X = SCALE*( WORK1 )
-                        [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 )
-*/
-
-			dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b2865, &t[j
-				+ j * t_dim1], ldt, &c_b2865, &c_b2865, &work[
-				j + *n], n, &wr, &c_b2879, x, &c__2, &scale, &
-				xnorm, &ierr);
-
-/*                    Scale if necessary */
-
-			if (scale != 1.) {
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
-			}
-			work[j + *n] = x[0];
-			work[j + 1 + *n] = x[1];
-
-/* Computing MAX */
-			d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2
-				= work[j + 1 + *n], abs(d__2)), d__3 = max(
-				d__3,d__4);
-			vmax = max(d__3,vmax);
-			vcrit = bignum / vmax;
-
-		    }
-L170:
-		    ;
-		}
-
-/*              Copy the vector x or Q*x to VL and normalize. */
-
-		if (! over) {
-		    i__2 = *n - ki + 1;
-		    dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
-			    vl_dim1], &c__1);
-
-		    i__2 = *n - ki + 1;
-		    ii = idamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki -
-			    1;
-		    remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1));
-		    i__2 = *n - ki + 1;
-		    dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
-
-		    i__2 = ki - 1;
-		    for (k = 1; k <= i__2; ++k) {
-			vl[k + is * vl_dim1] = 0.;
-/* L180: */
-		    }
-
-		} else {
-
-		    if (ki < *n) {
-			i__2 = *n - ki;
-			dgemv_("N", n, &i__2, &c_b2865, &vl[(ki + 1) *
-				vl_dim1 + 1], ldvl, &work[ki + 1 + *n], &c__1,
-				 &work[ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
-		    }
-
-		    ii = idamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
-		    remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1));
-		    dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
-
-		}
-
-	    } else {
-
-/*
-                Complex left eigenvector.
-
-                 Initial solve:
-                   ((T(KI,KI)    T(KI,KI+1) )' - (WR - I* WI))*X = 0.
-                   ((T(KI+1,KI) T(KI+1,KI+1))                )
-*/
-
-		if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= (d__2 =
-			t[ki + 1 + ki * t_dim1], abs(d__2))) {
-		    work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1];
-		    work[ki + 1 + n2] = 1.;
-		} else {
-		    work[ki + *n] = 1.;
-		    work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1];
-		}
-		work[ki + 1 + *n] = 0.;
-		work[ki + n2] = 0.;
-
-/*              Form right-hand side */
-
-		i__2 = *n;
-		for (k = ki + 2; k <= i__2; ++k) {
-		    work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1];
-		    work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1]
-			    ;
-/* L190: */
-		}
-
-/*
-                Solve complex quasi-triangular system:
-                ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
-*/
-
-		vmax = 1.;
-		vcrit = bignum;
-
-		jnxt = ki + 2;
-		i__2 = *n;
-		for (j = ki + 2; j <= i__2; ++j) {
-		    if (j < jnxt) {
-			goto L200;
-		    }
-		    j1 = j;
-		    j2 = j;
-		    jnxt = j + 1;
-		    if (j < *n) {
-			if (t[j + 1 + j * t_dim1] != 0.) {
-			    j2 = j + 1;
-			    jnxt = j + 2;
-			}
-		    }
-
-		    if (j1 == j2) {
-
-/*
-                      1-by-1 diagonal block
-
-                      Scale if necessary to avoid overflow when
-                      forming the right-hand side elements.
-*/
-
-			if (work[j] > vcrit) {
-			    rec = 1. / vmax;
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &rec, &work[ki + n2], &c__1);
-			    vmax = 1.;
-			    vcrit = bignum;
-			}
-
-			i__3 = j - ki - 2;
-			work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
-				&c__1, &work[ki + 2 + *n], &c__1);
-			i__3 = j - ki - 2;
-			work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
-				&c__1, &work[ki + 2 + n2], &c__1);
-
-/*                    Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */
-
-			d__1 = -wi;
-			dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b2865, &t[j
-				+ j * t_dim1], ldt, &c_b2865, &c_b2865, &work[
-				j + *n], n, &wr, &d__1, x, &c__2, &scale, &
-				xnorm, &ierr);
-
-/*                    Scale if necessary */
-
-			if (scale != 1.) {
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &scale, &work[ki + n2], &c__1);
-			}
-			work[j + *n] = x[0];
-			work[j + n2] = x[2];
-/* Computing MAX */
-			d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2
-				= work[j + n2], abs(d__2)), d__3 = max(d__3,
-				d__4);
-			vmax = max(d__3,vmax);
-			vcrit = bignum / vmax;
-
-		    } else {
-
-/*
-                      2-by-2 diagonal block
-
-                      Scale if necessary to avoid overflow when forming
-                      the right-hand side elements.
-
-   Computing MAX
-*/
-			d__1 = work[j], d__2 = work[j + 1];
-			beta = max(d__1,d__2);
-			if (beta > vcrit) {
-			    rec = 1. / vmax;
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &rec, &work[ki + n2], &c__1);
-			    vmax = 1.;
-			    vcrit = bignum;
-			}
-
-			i__3 = j - ki - 2;
-			work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
-				&c__1, &work[ki + 2 + *n], &c__1);
-
-			i__3 = j - ki - 2;
-			work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
-				&c__1, &work[ki + 2 + n2], &c__1);
-
-			i__3 = j - ki - 2;
-			work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 2 + (j + 1) *
-				 t_dim1], &c__1, &work[ki + 2 + *n], &c__1);
-
-			i__3 = j - ki - 2;
-			work[j + 1 + n2] -= ddot_(&i__3, &t[ki + 2 + (j + 1) *
-				 t_dim1], &c__1, &work[ki + 2 + n2], &c__1);
-
-/*
-                      Solve 2-by-2 complex linear equation
-                        ([T(j,j)   T(j,j+1)  ]'-(wr-i*wi)*I)*X = SCALE*B
-                        ([T(j+1,j) T(j+1,j+1)]             )
-*/
-
-			d__1 = -wi;
-			dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b2865, &t[j
-				+ j * t_dim1], ldt, &c_b2865, &c_b2865, &work[
-				j + *n], n, &wr, &d__1, x, &c__2, &scale, &
-				xnorm, &ierr);
-
-/*                    Scale if necessary */
-
-			if (scale != 1.) {
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &scale, &work[ki + n2], &c__1);
-			}
-			work[j + *n] = x[0];
-			work[j + n2] = x[2];
-			work[j + 1 + *n] = x[1];
-			work[j + 1 + n2] = x[3];
-/* Computing MAX */
-			d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1,
-				d__2), d__2 = abs(x[1]), d__1 = max(d__1,d__2)
-				, d__2 = abs(x[3]), d__1 = max(d__1,d__2);
-			vmax = max(d__1,vmax);
-			vcrit = bignum / vmax;
-
-		    }
-L200:
-		    ;
-		}
-
-/*
-                Copy the vector x or Q*x to VL and normalize.
-
-   L210:
-*/
-		if (! over) {
-		    i__2 = *n - ki + 1;
-		    dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
-			    vl_dim1], &c__1);
-		    i__2 = *n - ki + 1;
-		    dcopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) *
-			    vl_dim1], &c__1);
-
-		    emax = 0.;
-		    i__2 = *n;
-		    for (k = ki; k <= i__2; ++k) {
-/* Computing MAX */
-			d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs(
-				d__1)) + (d__2 = vl[k + (is + 1) * vl_dim1],
-				abs(d__2));
-			emax = max(d__3,d__4);
-/* L220: */
-		    }
-		    remax = 1. / emax;
-		    i__2 = *n - ki + 1;
-		    dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
-		    i__2 = *n - ki + 1;
-		    dscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1)
-			    ;
-
-		    i__2 = ki - 1;
-		    for (k = 1; k <= i__2; ++k) {
-			vl[k + is * vl_dim1] = 0.;
-			vl[k + (is + 1) * vl_dim1] = 0.;
-/* L230: */
-		    }
-		} else {
-		    if (ki < *n - 1) {
-			i__2 = *n - ki - 1;
-			dgemv_("N", n, &i__2, &c_b2865, &vl[(ki + 2) *
-				vl_dim1 + 1], ldvl, &work[ki + 2 + *n], &c__1,
-				 &work[ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
-			i__2 = *n - ki - 1;
-			dgemv_("N", n, &i__2, &c_b2865, &vl[(ki + 2) *
-				vl_dim1 + 1], ldvl, &work[ki + 2 + n2], &c__1,
-				 &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 +
-				1], &c__1);
-		    } else {
-			dscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], &
-				c__1);
-			dscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1
-				+ 1], &c__1);
-		    }
-
-		    emax = 0.;
-		    i__2 = *n;
-		    for (k = 1; k <= i__2; ++k) {
-/* Computing MAX */
-			d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs(
-				d__1)) + (d__2 = vl[k + (ki + 1) * vl_dim1],
-				abs(d__2));
-			emax = max(d__3,d__4);
-/* L240: */
-		    }
-		    remax = 1. / emax;
-		    dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
-		    dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1);
-
-		}
-
-	    }
-
-	    ++is;
-	    if (ip != 0) {
-		++is;
-	    }
-L250:
-	    if (ip == -1) {
-		ip = 0;
-	    }
-	    if (ip == 1) {
-		ip = -1;
-	    }
-
-/* L260: */
-	}
-
-    }
-
-    return 0;
-
-/*     End of DTREVC */
-
-} /* dtrevc_ */
-
-/* Subroutine */ int dtrti2_(char *uplo, char *diag, integer *n, doublereal *
-	a, integer *lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer j;
-    static doublereal ajj;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    static logical upper;
-    extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *,
-	    doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
-    static logical nounit;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    DTRTI2 computes the inverse of a real upper or lower triangular
-    matrix.
-
-    This is the Level 2 BLAS version of the algorithm.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the matrix A is upper or lower triangular.
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    DIAG    (input) CHARACTER*1
-            Specifies whether or not the matrix A is unit triangular.
-            = 'N':  Non-unit triangular
-            = 'U':  Unit triangular
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the triangular matrix A.  If UPLO = 'U', the
-            leading n by n upper triangular part of the array A contains
-            the upper triangular matrix, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading n by n lower triangular part of the array A contains
-            the lower triangular matrix, and the strictly upper
-            triangular part of A is not referenced.  If DIAG = 'U', the
-            diagonal elements of A are also not referenced and are
-            assumed to be 1.
-
-            On exit, the (triangular) inverse of the original matrix, in
-            the same storage format.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    nounit = lsame_(diag, "N");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (! nounit && ! lsame_(diag, "U")) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DTRTI2", &i__1);
-	return 0;
-    }
-
-    if (upper) {
-
-/*        Compute inverse of upper triangular matrix. */
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (nounit) {
-		a[j + j * a_dim1] = 1. / a[j + j * a_dim1];
-		ajj = -a[j + j * a_dim1];
-	    } else {
-		ajj = -1.;
-	    }
-
-/*           Compute elements 1:j-1 of j-th column. */
-
-	    i__2 = j - 1;
-	    dtrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, &
-		    a[j * a_dim1 + 1], &c__1);
-	    i__2 = j - 1;
-	    dscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1);
-/* L10: */
-	}
-    } else {
-
-/*        Compute inverse of lower triangular matrix. */
-
-	for (j = *n; j >= 1; --j) {
-	    if (nounit) {
-		a[j + j * a_dim1] = 1. / a[j + j * a_dim1];
-		ajj = -a[j + j * a_dim1];
-	    } else {
-		ajj = -1.;
-	    }
-	    if (j < *n) {
-
-/*              Compute elements j+1:n of j-th column. */
-
-		i__1 = *n - j;
-		dtrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j +
-			1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1);
-		i__1 = *n - j;
-		dscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1);
-	    }
-/* L20: */
-	}
-    }
-
-    return 0;
-
-/*     End of DTRTI2 */
-
-} /* dtrti2_ */
-
-/* Subroutine */ int dtrtri_(char *uplo, char *diag, integer *n, doublereal *
-	a, integer *lda, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5;
-    char ch__1[2];
-
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
-
-    /* Local variables */
-    static integer j, jb, nb, nn;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *), dtrsm_(
-	    char *, char *, char *, char *, integer *, integer *, doublereal *
-	    , doublereal *, integer *, doublereal *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int dtrti2_(char *, char *, integer *, doublereal
-	    *, integer *, integer *), xerbla_(char *, integer
-	    *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static logical nounit;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
-
-
-    Purpose
-    =======
-
-    DTRTRI computes the inverse of a real upper or lower triangular
-    matrix A.
-
-    This is the Level 3 BLAS version of the algorithm.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  A is upper triangular;
-            = 'L':  A is lower triangular.
-
-    DIAG    (input) CHARACTER*1
-            = 'N':  A is non-unit triangular;
-            = 'U':  A is unit triangular.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the triangular matrix A.  If UPLO = 'U', the
-            leading N-by-N upper triangular part of the array A contains
-            the upper triangular matrix, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading N-by-N lower triangular part of the array A contains
-            the lower triangular matrix, and the strictly upper
-            triangular part of A is not referenced.  If DIAG = 'U', the
-            diagonal elements of A are also not referenced and are
-            assumed to be 1.
-            On exit, the (triangular) inverse of the original matrix, in
-            the same storage format.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
-                 matrix is singular and its inverse can not be computed.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    nounit = lsame_(diag, "N");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (! nounit && ! lsame_(diag, "U")) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DTRTRI", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Check for singularity if non-unit. */
-
-    if (nounit) {
-	i__1 = *n;
-	for (*info = 1; *info <= i__1; ++(*info)) {
-	    if (a[*info + *info * a_dim1] == 0.) {
-		return 0;
-	    }
-/* L10: */
-	}
-	*info = 0;
-    }
-
-/*
-       Determine the block size for this environment.
-
-   Writing concatenation
-*/
-    i__2[0] = 1, a__1[0] = uplo;
-    i__2[1] = 1, a__1[1] = diag;
-    s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
-    nb = ilaenv_(&c__1, "DTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-    if ((nb <= 1) || (nb >= *n)) {
-
-/*        Use unblocked code */
-
-	dtrti2_(uplo, diag, n, &a[a_offset], lda, info);
-    } else {
-
-/*        Use blocked code */
-
-	if (upper) {
-
-/*           Compute inverse of upper triangular matrix */
-
-	    i__1 = *n;
-	    i__3 = nb;
-	    for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) {
-/* Computing MIN */
-		i__4 = nb, i__5 = *n - j + 1;
-		jb = min(i__4,i__5);
-
-/*              Compute rows 1:j-1 of current block column */
-
-		i__4 = j - 1;
-		dtrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
-			c_b2865, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
-		i__4 = j - 1;
-		dtrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
-			c_b3001, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1],
-			lda);
-
-/*              Compute inverse of current diagonal block */
-
-		dtrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
-/* L20: */
-	    }
-	} else {
-
-/*           Compute inverse of lower triangular matrix */
-
-	    nn = (*n - 1) / nb * nb + 1;
-	    i__3 = -nb;
-	    for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) {
-/* Computing MIN */
-		i__1 = nb, i__4 = *n - j + 1;
-		jb = min(i__1,i__4);
-		if (j + jb <= *n) {
-
-/*                 Compute rows j+jb:n of current block column */
-
-		    i__1 = *n - j - jb + 1;
-		    dtrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
-			    &c_b2865, &a[j + jb + (j + jb) * a_dim1], lda, &a[
-			    j + jb + j * a_dim1], lda);
-		    i__1 = *n - j - jb + 1;
-		    dtrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
-			     &c_b3001, &a[j + j * a_dim1], lda, &a[j + jb + j
-			    * a_dim1], lda);
-		}
-
-/*              Compute inverse of current diagonal block */
-
-		dtrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
-/* L30: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of DTRTRI */
-
-} /* dtrtri_ */
-
-integer ieeeck_(integer *ispec, real *zero, real *one)
-{
-    /* System generated locals */
-    integer ret_val;
-
-    /* Local variables */
-    static real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro,
-	    newzro;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1998
-
-
-    Purpose
-    =======
-
-    IEEECK is called from the ILAENV to verify that Infinity and
-    possibly NaN arithmetic is safe (i.e. will not trap).
-
-    Arguments
-    =========
-
-    ISPEC   (input) INTEGER
-            Specifies whether to test just for inifinity arithmetic
-            or whether to test for infinity and NaN arithmetic.
-            = 0: Verify infinity arithmetic only.
-            = 1: Verify infinity and NaN arithmetic.
-
-    ZERO    (input) REAL
-            Must contain the value 0.0
-            This is passed to prevent the compiler from optimizing
-            away this code.
-
-    ONE     (input) REAL
-            Must contain the value 1.0
-            This is passed to prevent the compiler from optimizing
-            away this code.
-
-    RETURN VALUE:  INTEGER
-            = 0:  Arithmetic failed to produce the correct answers
-            = 1:  Arithmetic produced the correct answers
-*/
-
-    ret_val = 1;
-
-    posinf = *one / *zero;
-    if (posinf <= *one) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    neginf = -(*one) / *zero;
-    if (neginf >= *zero) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    negzro = *one / (neginf + *one);
-    if (negzro != *zero) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    neginf = *one / negzro;
-    if (neginf >= *zero) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    newzro = negzro + *zero;
-    if (newzro != *zero) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    posinf = *one / newzro;
-    if (posinf <= *one) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    neginf *= posinf;
-    if (neginf >= *zero) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    posinf *= posinf;
-    if (posinf <= *one) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-
-/*     Return if we were only asked to check infinity arithmetic */
-
-    if (*ispec == 0) {
-	return ret_val;
-    }
-
-    nan1 = posinf + neginf;
-
-    nan2 = posinf / neginf;
-
-    nan3 = posinf / posinf;
-
-    nan4 = posinf * *zero;
-
-    nan5 = neginf * negzro;
-
-    nan6 = nan5 * 0.f;
-
-    if (nan1 == nan1) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    if (nan2 == nan2) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    if (nan3 == nan3) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    if (nan4 == nan4) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    if (nan5 == nan5) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    if (nan6 == nan6) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    return ret_val;
-} /* ieeeck_ */
-
-integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
-	integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen
-	opts_len)
-{
-    /* System generated locals */
-    integer ret_val;
-
-    /* Builtin functions */
-    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
-    integer s_cmp(char *, char *, ftnlen, ftnlen);
-
-    /* Local variables */
-    static integer i__;
-    static char c1[1], c2[2], c3[3], c4[2];
-    static integer ic, nb, iz, nx;
-    static logical cname, sname;
-    static integer nbmin;
-    extern integer ieeeck_(integer *, real *, real *);
-    static char subnam[6];
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    ILAENV is called from the LAPACK routines to choose problem-dependent
-    parameters for the local environment.  See ISPEC for a description of
-    the parameters.
-
-    This version provides a set of parameters which should give good,
-    but not optimal, performance on many of the currently available
-    computers.  Users are encouraged to modify this subroutine to set
-    the tuning parameters for their particular machine using the option
-    and problem size information in the arguments.
-
-    This routine will not function correctly if it is converted to all
-    lower case.  Converting it to all upper case is allowed.
-
-    Arguments
-    =========
-
-    ISPEC   (input) INTEGER
-            Specifies the parameter to be returned as the value of
-            ILAENV.
-            = 1: the optimal blocksize; if this value is 1, an unblocked
-                 algorithm will give the best performance.
-            = 2: the minimum block size for which the block routine
-                 should be used; if the usable block size is less than
-                 this value, an unblocked routine should be used.
-            = 3: the crossover point (in a block routine, for N less
-                 than this value, an unblocked routine should be used)
-            = 4: the number of shifts, used in the nonsymmetric
-                 eigenvalue routines
-            = 5: the minimum column dimension for blocking to be used;
-                 rectangular blocks must have dimension at least k by m,
-                 where k is given by ILAENV(2,...) and m by ILAENV(5,...)
-            = 6: the crossover point for the SVD (when reducing an m by n
-                 matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds
-                 this value, a QR factorization is used first to reduce
-                 the matrix to a triangular form.)
-            = 7: the number of processors
-            = 8: the crossover point for the multishift QR and QZ methods
-                 for nonsymmetric eigenvalue problems.
-            = 9: maximum size of the subproblems at the bottom of the
-                 computation tree in the divide-and-conquer algorithm
-                 (used by xGELSD and xGESDD)
-            =10: ieee NaN arithmetic can be trusted not to trap
-            =11: infinity arithmetic can be trusted not to trap
-
-    NAME    (input) CHARACTER*(*)
-            The name of the calling subroutine, in either upper case or
-            lower case.
-
-    OPTS    (input) CHARACTER*(*)
-            The character options to the subroutine NAME, concatenated
-            into a single character string.  For example, UPLO = 'U',
-            TRANS = 'T', and DIAG = 'N' for a triangular routine would
-            be specified as OPTS = 'UTN'.
-
-    N1      (input) INTEGER
-    N2      (input) INTEGER
-    N3      (input) INTEGER
-    N4      (input) INTEGER
-            Problem dimensions for the subroutine NAME; these may not all
-            be required.
-
-   (ILAENV) (output) INTEGER
-            >= 0: the value of the parameter specified by ISPEC
-            < 0:  if ILAENV = -k, the k-th argument had an illegal value.
-
-    Further Details
-    ===============
-
-    The following conventions have been used when calling ILAENV from the
-    LAPACK routines:
-    1)  OPTS is a concatenation of all of the character options to
-        subroutine NAME, in the same order that they appear in the
-        argument list for NAME, even if they are not used in determining
-        the value of the parameter specified by ISPEC.
-    2)  The problem dimensions N1, N2, N3, N4 are specified in the order
-        that they appear in the argument list for NAME.  N1 is used
-        first, N2 second, and so on, and unused problem dimensions are
-        passed a value of -1.
-    3)  The parameter value returned by ILAENV is checked for validity in
-        the calling subroutine.  For example, ILAENV is used to retrieve
-        the optimal blocksize for STRTRI as follows:
-
-        NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 )
-        IF( NB.LE.1 ) NB = MAX( 1, N )
-
-    =====================================================================
-*/
-
-
-    switch (*ispec) {
-	case 1:  goto L100;
-	case 2:  goto L100;
-	case 3:  goto L100;
-	case 4:  goto L400;
-	case 5:  goto L500;
-	case 6:  goto L600;
-	case 7:  goto L700;
-	case 8:  goto L800;
-	case 9:  goto L900;
-	case 10:  goto L1000;
-	case 11:  goto L1100;
-    }
-
-/*     Invalid value for ISPEC */
-
-    ret_val = -1;
-    return ret_val;
-
-L100:
-
-/*     Convert NAME to upper case if the first character is lower case. */
-
-    ret_val = 1;
-    s_copy(subnam, name__, (ftnlen)6, name_len);
-    ic = *(unsigned char *)subnam;
-    iz = 'Z';
-    if ((iz == 90) || (iz == 122)) {
-
-/*        ASCII character set */
-
-	if (ic >= 97 && ic <= 122) {
-	    *(unsigned char *)subnam = (char) (ic - 32);
-	    for (i__ = 2; i__ <= 6; ++i__) {
-		ic = *(unsigned char *)&subnam[i__ - 1];
-		if (ic >= 97 && ic <= 122) {
-		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
-		}
-/* L10: */
-	    }
-	}
-
-    } else if ((iz == 233) || (iz == 169)) {
-
-/*        EBCDIC character set */
-
-	if (((ic >= 129 && ic <= 137) || (ic >= 145 && ic <= 153)) || (ic >=
-		162 && ic <= 169)) {
-	    *(unsigned char *)subnam = (char) (ic + 64);
-	    for (i__ = 2; i__ <= 6; ++i__) {
-		ic = *(unsigned char *)&subnam[i__ - 1];
-		if (((ic >= 129 && ic <= 137) || (ic >= 145 && ic <= 153)) ||
-			(ic >= 162 && ic <= 169)) {
-		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64);
-		}
-/* L20: */
-	    }
-	}
-
-    } else if ((iz == 218) || (iz == 250)) {
-
-/*        Prime machines:  ASCII+128 */
-
-	if (ic >= 225 && ic <= 250) {
-	    *(unsigned char *)subnam = (char) (ic - 32);
-	    for (i__ = 2; i__ <= 6; ++i__) {
-		ic = *(unsigned char *)&subnam[i__ - 1];
-		if (ic >= 225 && ic <= 250) {
-		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
-		}
-/* L30: */
-	    }
-	}
-    }
-
-    *(unsigned char *)c1 = *(unsigned char *)subnam;
-    sname = (*(unsigned char *)c1 == 'S') || (*(unsigned char *)c1 == 'D');
-    cname = (*(unsigned char *)c1 == 'C') || (*(unsigned char *)c1 == 'Z');
-    if (! ((cname) || (sname))) {
-	return ret_val;
-    }
-    s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2);
-    s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3);
-    s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2);
-
-    switch (*ispec) {
-	case 1:  goto L110;
-	case 2:  goto L200;
-	case 3:  goto L300;
-    }
-
-L110:
-
-/*
-       ISPEC = 1:  block size
-
-       In these examples, separate code is provided for setting NB for
-       real and complex.  We assume that NB will take the same value in
-       single or double precision.
-*/
-
-    nb = 1;
-
-    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	} else if ((((s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0) || (s_cmp(
-		c3, "RQF", (ftnlen)3, (ftnlen)3) == 0)) || (s_cmp(c3, "LQF", (
-		ftnlen)3, (ftnlen)3) == 0)) || (s_cmp(c3, "QLF", (ftnlen)3, (
-		ftnlen)3) == 0)) {
-	    if (sname) {
-		nb = 32;
-	    } else {
-		nb = 32;
-	    }
-	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 32;
-	    } else {
-		nb = 32;
-	    }
-	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 32;
-	    } else {
-		nb = 32;
-	    }
-	} else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	}
-    } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	}
-    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	} else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    nb = 32;
-	} else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
-	    nb = 64;
-	}
-    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    nb = 64;
-	} else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    nb = 32;
-	} else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
-	    nb = 64;
-	}
-    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
-		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
-		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
-		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
-		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
-		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
-		     {
-		nb = 32;
-	    }
-	} else if (*(unsigned char *)c3 == 'M') {
-	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
-		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
-		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
-		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
-		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
-		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
-		     {
-		nb = 32;
-	    }
-	}
-    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
-		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
-		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
-		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
-		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
-		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
-		     {
-		nb = 32;
-	    }
-	} else if (*(unsigned char *)c3 == 'M') {
-	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
-		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
-		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
-		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
-		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
-		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
-		     {
-		nb = 32;
-	    }
-	}
-    } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		if (*n4 <= 64) {
-		    nb = 1;
-		} else {
-		    nb = 32;
-		}
-	    } else {
-		if (*n4 <= 64) {
-		    nb = 1;
-		} else {
-		    nb = 32;
-		}
-	    }
-	}
-    } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		if (*n2 <= 64) {
-		    nb = 1;
-		} else {
-		    nb = 32;
-		}
-	    } else {
-		if (*n2 <= 64) {
-		    nb = 1;
-		} else {
-		    nb = 32;
-		}
-	    }
-	}
-    } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	}
-    } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	}
-    } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) {
-	    nb = 1;
-	}
-    }
-    ret_val = nb;
-    return ret_val;
-
-L200:
-
-/*     ISPEC = 2:  minimum block size */
-
-    nbmin = 2;
-    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
-	if ((((s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0) || (s_cmp(c3,
-		"RQF", (ftnlen)3, (ftnlen)3) == 0)) || (s_cmp(c3, "LQF", (
-		ftnlen)3, (ftnlen)3) == 0)) || (s_cmp(c3, "QLF", (ftnlen)3, (
-		ftnlen)3) == 0)) {
-	    if (sname) {
-		nbmin = 2;
-	    } else {
-		nbmin = 2;
-	    }
-	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nbmin = 2;
-	    } else {
-		nbmin = 2;
-	    }
-	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nbmin = 2;
-	    } else {
-		nbmin = 2;
-	    }
-	} else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nbmin = 2;
-	    } else {
-		nbmin = 2;
-	    }
-	}
-    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nbmin = 8;
-	    } else {
-		nbmin = 8;
-	    }
-	} else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    nbmin = 2;
-	}
-    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    nbmin = 2;
-	}
-    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
-		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
-		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
-		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
-		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
-		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
-		     {
-		nbmin = 2;
-	    }
-	} else if (*(unsigned char *)c3 == 'M') {
-	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
-		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
-		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
-		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
-		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
-		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
-		     {
-		nbmin = 2;
-	    }
-	}
-    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
-		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
-		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
-		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
-		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
-		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
-		     {
-		nbmin = 2;
-	    }
-	} else if (*(unsigned char *)c3 == 'M') {
-	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
-		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
-		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
-		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
-		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
-		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
-		     {
-		nbmin = 2;
-	    }
-	}
-    }
-    ret_val = nbmin;
-    return ret_val;
-
-L300:
-
-/*     ISPEC = 3:  crossover point */
-
-    nx = 0;
-    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
-	if ((((s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0) || (s_cmp(c3,
-		"RQF", (ftnlen)3, (ftnlen)3) == 0)) || (s_cmp(c3, "LQF", (
-		ftnlen)3, (ftnlen)3) == 0)) || (s_cmp(c3, "QLF", (ftnlen)3, (
-		ftnlen)3) == 0)) {
-	    if (sname) {
-		nx = 128;
-	    } else {
-		nx = 128;
-	    }
-	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nx = 128;
-	    } else {
-		nx = 128;
-	    }
-	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nx = 128;
-	    } else {
-		nx = 128;
-	    }
-	}
-    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
-	if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    nx = 32;
-	}
-    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    nx = 32;
-	}
-    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
-		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
-		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
-		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
-		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
-		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
-		     {
-		nx = 128;
-	    }
-	}
-    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
-		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
-		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
-		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
-		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
-		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
-		     {
-		nx = 128;
-	    }
-	}
-    }
-    ret_val = nx;
-    return ret_val;
-
-L400:
-
-/*     ISPEC = 4:  number of shifts (used by xHSEQR) */
-
-    ret_val = 6;
-    return ret_val;
-
-L500:
-
-/*     ISPEC = 5:  minimum column dimension (not used) */
-
-    ret_val = 2;
-    return ret_val;
-
-L600:
-
-/*     ISPEC = 6:  crossover point for SVD (used by xGELSS and xGESVD) */
-
-    ret_val = (integer) ((real) min(*n1,*n2) * 1.6f);
-    return ret_val;
-
-L700:
-
-/*     ISPEC = 7:  number of processors (not used) */
-
-    ret_val = 1;
-    return ret_val;
-
-L800:
-
-/*     ISPEC = 8:  crossover point for multishift (used by xHSEQR) */
-
-    ret_val = 50;
-    return ret_val;
-
-L900:
-
-/*
-       ISPEC = 9:  maximum size of the subproblems at the bottom of the
-                   computation tree in the divide-and-conquer algorithm
-                   (used by xGELSD and xGESDD)
-*/
-
-    ret_val = 25;
-    return ret_val;
-
-L1000:
-
-/*
-       ISPEC = 10: ieee NaN arithmetic can be trusted not to trap
-
-       ILAENV = 0
-*/
-    ret_val = 1;
-    if (ret_val == 1) {
-	ret_val = ieeeck_(&c__0, &c_b320, &c_b1011);
-    }
-    return ret_val;
-
-L1100:
-
-/*
-       ISPEC = 11: infinity arithmetic can be trusted not to trap
-
-       ILAENV = 0
-*/
-    ret_val = 1;
-    if (ret_val == 1) {
-	ret_val = ieeeck_(&c__1, &c_b320, &c_b1011);
-    }
-    return ret_val;
-
-/*     End of ILAENV */
-
-} /* ilaenv_ */
-
-/* Subroutine */ int sbdsdc_(char *uplo, char *compq, integer *n, real *d__,
-	real *e, real *u, integer *ldu, real *vt, integer *ldvt, real *q,
-	integer *iq, real *work, integer *iwork, integer *info)
-{
-    /* System generated locals */
-    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
-    real r__1;
-
-    /* Builtin functions */
-    double r_sign(real *, real *), log(doublereal);
-
-    /* Local variables */
-    static integer i__, j, k;
-    static real p, r__;
-    static integer z__, ic, ii, kk;
-    static real cs;
-    static integer is, iu;
-    static real sn;
-    static integer nm1;
-    static real eps;
-    static integer ivt, difl, difr, ierr, perm, mlvl, sqre;
-    extern logical lsame_(char *, char *);
-    static integer poles;
-    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *,
-	    integer *, real *, real *, real *, integer *);
-    static integer iuplo, nsize, start;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *), sswap_(integer *, real *, integer *, real *, integer *
-	    ), slasd0_(integer *, integer *, real *, real *, real *, integer *
-	    , real *, integer *, integer *, integer *, real *, integer *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int slasda_(integer *, integer *, integer *,
-	    integer *, real *, real *, real *, integer *, real *, integer *,
-	    real *, real *, real *, real *, integer *, integer *, integer *,
-	    integer *, real *, real *, real *, real *, integer *, integer *),
-	    xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *);
-    static integer givcol;
-    extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer
-	    *, integer *, integer *, real *, real *, real *, integer *, real *
-	    , integer *, real *, integer *, real *, integer *);
-    static integer icompq;
-    extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *,
-	    real *, real *, integer *), slartg_(real *, real *, real *
-	    , real *, real *);
-    static real orgnrm;
-    static integer givnum;
-    extern doublereal slanst_(char *, integer *, real *, real *);
-    static integer givptr, qstart, smlsiz, wstart, smlszp;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       December 1, 1999
-
-
-    Purpose
-    =======
-
-    SBDSDC computes the singular value decomposition (SVD) of a real
-    N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,
-    using a divide and conquer method, where S is a diagonal matrix
-    with non-negative diagonal elements (the singular values of B), and
-    U and VT are orthogonal matrices of left and right singular vectors,
-    respectively. SBDSDC can be used to compute all singular values,
-    and optionally, singular vectors or singular vectors in compact form.
-
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.  See SLASD3 for details.
-
-    The code currently call SLASDQ if singular values only are desired.
-    However, it can be slightly modified to compute singular values
-    using the divide and conquer method.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  B is upper bidiagonal.
-            = 'L':  B is lower bidiagonal.
-
-    COMPQ   (input) CHARACTER*1
-            Specifies whether singular vectors are to be computed
-            as follows:
-            = 'N':  Compute singular values only;
-            = 'P':  Compute singular values and compute singular
-                    vectors in compact form;
-            = 'I':  Compute singular values and singular vectors.
-
-    N       (input) INTEGER
-            The order of the matrix B.  N >= 0.
-
-    D       (input/output) REAL array, dimension (N)
-            On entry, the n diagonal elements of the bidiagonal matrix B.
-            On exit, if INFO=0, the singular values of B.
-
-    E       (input/output) REAL array, dimension (N)
-            On entry, the elements of E contain the offdiagonal
-            elements of the bidiagonal matrix whose SVD is desired.
-            On exit, E has been destroyed.
-
-    U       (output) REAL array, dimension (LDU,N)
-            If  COMPQ = 'I', then:
-               On exit, if INFO = 0, U contains the left singular vectors
-               of the bidiagonal matrix.
-            For other values of COMPQ, U is not referenced.
-
-    LDU     (input) INTEGER
-            The leading dimension of the array U.  LDU >= 1.
-            If singular vectors are desired, then LDU >= max( 1, N ).
-
-    VT      (output) REAL array, dimension (LDVT,N)
-            If  COMPQ = 'I', then:
-               On exit, if INFO = 0, VT' contains the right singular
-               vectors of the bidiagonal matrix.
-            For other values of COMPQ, VT is not referenced.
-
-    LDVT    (input) INTEGER
-            The leading dimension of the array VT.  LDVT >= 1.
-            If singular vectors are desired, then LDVT >= max( 1, N ).
-
-    Q       (output) REAL array, dimension (LDQ)
-            If  COMPQ = 'P', then:
-               On exit, if INFO = 0, Q and IQ contain the left
-               and right singular vectors in a compact form,
-               requiring O(N log N) space instead of 2*N**2.
-               In particular, Q contains all the REAL data in
-               LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1))))
-               words of memory, where SMLSIZ is returned by ILAENV and
-               is equal to the maximum size of the subproblems at the
-               bottom of the computation tree (usually about 25).
-            For other values of COMPQ, Q is not referenced.
-
-    IQ      (output) INTEGER array, dimension (LDIQ)
-            If  COMPQ = 'P', then:
-               On exit, if INFO = 0, Q and IQ contain the left
-               and right singular vectors in a compact form,
-               requiring O(N log N) space instead of 2*N**2.
-               In particular, IQ contains all INTEGER data in
-               LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1))))
-               words of memory, where SMLSIZ is returned by ILAENV and
-               is equal to the maximum size of the subproblems at the
-               bottom of the computation tree (usually about 25).
-            For other values of COMPQ, IQ is not referenced.
-
-    WORK    (workspace) REAL array, dimension (LWORK)
-            If COMPQ = 'N' then LWORK >= (4 * N).
-            If COMPQ = 'P' then LWORK >= (6 * N).
-            If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).
-
-    IWORK   (workspace) INTEGER array, dimension (8*N)
-
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The algorithm failed to compute an singular value.
-                  The update process of divide and conquer failed.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    --q;
-    --iq;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-    iuplo = 0;
-    if (lsame_(uplo, "U")) {
-	iuplo = 1;
-    }
-    if (lsame_(uplo, "L")) {
-	iuplo = 2;
-    }
-    if (lsame_(compq, "N")) {
-	icompq = 0;
-    } else if (lsame_(compq, "P")) {
-	icompq = 1;
-    } else if (lsame_(compq, "I")) {
-	icompq = 2;
-    } else {
-	icompq = -1;
-    }
-    if (iuplo == 0) {
-	*info = -1;
-    } else if (icompq < 0) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if ((*ldu < 1) || (icompq == 2 && *ldu < *n)) {
-	*info = -7;
-    } else if ((*ldvt < 1) || (icompq == 2 && *ldvt < *n)) {
-	*info = -9;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("SBDSDC", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-    smlsiz = ilaenv_(&c__9, "SBDSDC", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-    if (*n == 1) {
-	if (icompq == 1) {
-	    q[1] = r_sign(&c_b1011, &d__[1]);
-	    q[smlsiz * *n + 1] = 1.f;
-	} else if (icompq == 2) {
-	    u[u_dim1 + 1] = r_sign(&c_b1011, &d__[1]);
-	    vt[vt_dim1 + 1] = 1.f;
-	}
-	d__[1] = dabs(d__[1]);
-	return 0;
-    }
-    nm1 = *n - 1;
-
-/*
-       If matrix lower bidiagonal, rotate to be upper bidiagonal
-       by applying Givens rotations on the left
-*/
-
-    wstart = 1;
-    qstart = 3;
-    if (icompq == 1) {
-	scopy_(n, &d__[1], &c__1, &q[1], &c__1);
-	i__1 = *n - 1;
-	scopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1);
-    }
-    if (iuplo == 2) {
-	qstart = 5;
-	wstart = ((*n) << (1)) - 1;
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
-	    d__[i__] = r__;
-	    e[i__] = sn * d__[i__ + 1];
-	    d__[i__ + 1] = cs * d__[i__ + 1];
-	    if (icompq == 1) {
-		q[i__ + ((*n) << (1))] = cs;
-		q[i__ + *n * 3] = sn;
-	    } else if (icompq == 2) {
-		work[i__] = cs;
-		work[nm1 + i__] = -sn;
-	    }
-/* L10: */
-	}
-    }
-
-/*     If ICOMPQ = 0, use SLASDQ to compute the singular values. */
-
-    if (icompq == 0) {
-	slasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
-		vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
-		wstart], info);
-	goto L40;
-    }
-
-/*
-       If N is smaller than the minimum divide size SMLSIZ, then solve
-       the problem with another solver.
-*/
-
-    if (*n <= smlsiz) {
-	if (icompq == 2) {
-	    slaset_("A", n, n, &c_b320, &c_b1011, &u[u_offset], ldu);
-	    slaset_("A", n, n, &c_b320, &c_b1011, &vt[vt_offset], ldvt);
-	    slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
-		    , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
-		    wstart], info);
-	} else if (icompq == 1) {
-	    iu = 1;
-	    ivt = iu + *n;
-	    slaset_("A", n, n, &c_b320, &c_b1011, &q[iu + (qstart - 1) * *n],
-		    n);
-	    slaset_("A", n, n, &c_b320, &c_b1011, &q[ivt + (qstart - 1) * *n],
-		     n);
-	    slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (
-		    qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[
-		    iu + (qstart - 1) * *n], n, &work[wstart], info);
-	}
-	goto L40;
-    }
-
-    if (icompq == 2) {
-	slaset_("A", n, n, &c_b320, &c_b1011, &u[u_offset], ldu);
-	slaset_("A", n, n, &c_b320, &c_b1011, &vt[vt_offset], ldvt)
-		;
-    }
-
-/*     Scale. */
-
-    orgnrm = slanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.f) {
-	return 0;
-    }
-    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, &c__1, &d__[1], n, &ierr);
-    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &nm1, &c__1, &e[1], &nm1, &
-	    ierr);
-
-    eps = slamch_("Epsilon");
-
-    mlvl = (integer) (log((real) (*n) / (real) (smlsiz + 1)) / log(2.f)) + 1;
-    smlszp = smlsiz + 1;
-
-    if (icompq == 1) {
-	iu = 1;
-	ivt = smlsiz + 1;
-	difl = ivt + smlszp;
-	difr = difl + mlvl;
-	z__ = difr + ((mlvl) << (1));
-	ic = z__ + mlvl;
-	is = ic + 1;
-	poles = is + 1;
-	givnum = poles + ((mlvl) << (1));
-
-	k = 1;
-	givptr = 2;
-	perm = 3;
-	givcol = perm + mlvl;
-    }
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((r__1 = d__[i__], dabs(r__1)) < eps) {
-	    d__[i__] = r_sign(&eps, &d__[i__]);
-	}
-/* L20: */
-    }
-
-    start = 1;
-    sqre = 0;
-
-    i__1 = nm1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (((r__1 = e[i__], dabs(r__1)) < eps) || (i__ == nm1)) {
-
-/*
-          Subproblem found. First determine its size and then
-          apply divide and conquer on it.
-*/
-
-	    if (i__ < nm1) {
-
-/*        A subproblem with E(I) small for I < NM1. */
-
-		nsize = i__ - start + 1;
-	    } else if ((r__1 = e[i__], dabs(r__1)) >= eps) {
-
-/*        A subproblem with E(NM1) not too small but I = NM1. */
-
-		nsize = *n - start + 1;
-	    } else {
-
-/*
-          A subproblem with E(NM1) small. This implies an
-          1-by-1 subproblem at D(N). Solve this 1-by-1 problem
-          first.
-*/
-
-		nsize = i__ - start + 1;
-		if (icompq == 2) {
-		    u[*n + *n * u_dim1] = r_sign(&c_b1011, &d__[*n]);
-		    vt[*n + *n * vt_dim1] = 1.f;
-		} else if (icompq == 1) {
-		    q[*n + (qstart - 1) * *n] = r_sign(&c_b1011, &d__[*n]);
-		    q[*n + (smlsiz + qstart - 1) * *n] = 1.f;
-		}
-		d__[*n] = (r__1 = d__[*n], dabs(r__1));
-	    }
-	    if (icompq == 2) {
-		slasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start +
-			start * u_dim1], ldu, &vt[start + start * vt_dim1],
-			ldvt, &smlsiz, &iwork[1], &work[wstart], info);
-	    } else {
-		slasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[
-			start], &q[start + (iu + qstart - 2) * *n], n, &q[
-			start + (ivt + qstart - 2) * *n], &iq[start + k * *n],
-			 &q[start + (difl + qstart - 2) * *n], &q[start + (
-			difr + qstart - 2) * *n], &q[start + (z__ + qstart -
-			2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[
-			start + givptr * *n], &iq[start + givcol * *n], n, &
-			iq[start + perm * *n], &q[start + (givnum + qstart -
-			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
-			start + (is + qstart - 2) * *n], &work[wstart], &
-			iwork[1], info);
-		if (*info != 0) {
-		    return 0;
-		}
-	    }
-	    start = i__ + 1;
-	}
-/* L30: */
-    }
-
-/*     Unscale */
-
-    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, n, &c__1, &d__[1], n, &ierr);
-L40:
-
-/*     Use Selection Sort to minimize swaps of singular vectors */
-
-    i__1 = *n;
-    for (ii = 2; ii <= i__1; ++ii) {
-	i__ = ii - 1;
-	kk = i__;
-	p = d__[i__];
-	i__2 = *n;
-	for (j = ii; j <= i__2; ++j) {
-	    if (d__[j] > p) {
-		kk = j;
-		p = d__[j];
-	    }
-/* L50: */
-	}
-	if (kk != i__) {
-	    d__[kk] = d__[i__];
-	    d__[i__] = p;
-	    if (icompq == 1) {
-		iq[i__] = kk;
-	    } else if (icompq == 2) {
-		sswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &
-			c__1);
-		sswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt);
-	    }
-	} else if (icompq == 1) {
-	    iq[i__] = i__;
-	}
-/* L60: */
-    }
-
-/*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */
-
-    if (icompq == 1) {
-	if (iuplo == 1) {
-	    iq[*n] = 1;
-	} else {
-	    iq[*n] = 0;
-	}
-    }
-
-/*
-       If B is lower bidiagonal, update U by those Givens rotations
-       which rotated B to be upper bidiagonal
-*/
-
-    if (iuplo == 2 && icompq == 2) {
-	slasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu);
-    }
-
-    return 0;
-
-/*     End of SBDSDC */
-
-} /* sbdsdc_ */
-
-/* Subroutine */ int sbdsqr_(char *uplo, integer *n, integer *ncvt, integer *
-	nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real *
-	u, integer *ldu, real *c__, integer *ldc, real *work, integer *info)
-{
-    /* System generated locals */
-    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
-	    i__2;
-    real r__1, r__2, r__3, r__4;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double pow_dd(doublereal *, doublereal *), sqrt(doublereal), r_sign(real *
-	    , real *);
-
-    /* Local variables */
-    static real f, g, h__;
-    static integer i__, j, m;
-    static real r__, cs;
-    static integer ll;
-    static real sn, mu;
-    static integer nm1, nm12, nm13, lll;
-    static real eps, sll, tol, abse;
-    static integer idir;
-    static real abss;
-    static integer oldm;
-    static real cosl;
-    static integer isub, iter;
-    static real unfl, sinl, cosr, smin, smax, sinr;
-    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
-	    integer *, real *, real *), slas2_(real *, real *, real *, real *,
-	     real *);
-    extern logical lsame_(char *, char *);
-    static real oldcs;
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
-    static integer oldll;
-    static real shift, sigmn, oldsn;
-    static integer maxit;
-    static real sminl;
-    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *,
-	    integer *, real *, real *, real *, integer *);
-    static real sigmx;
-    static logical lower;
-    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
-	    integer *), slasq1_(integer *, real *, real *, real *, integer *),
-	     slasv2_(real *, real *, real *, real *, real *, real *, real *,
-	    real *, real *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static real sminoa;
-    extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
-	    );
-    static real thresh;
-    static logical rotate;
-    static real sminlo, tolmul;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
-
-
-    Purpose
-    =======
-
-    SBDSQR computes the singular value decomposition (SVD) of a real
-    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
-    denotes the transpose of P), where S is a diagonal matrix with
-    non-negative diagonal elements (the singular values of B), and Q
-    and P are orthogonal matrices.
-
-    The routine computes S, and optionally computes U * Q, P' * VT,
-    or Q' * C, for given real input matrices U, VT, and C.
-
-    See "Computing  Small Singular Values of Bidiagonal Matrices With
-    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
-    LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
-    no. 5, pp. 873-912, Sept 1990) and
-    "Accurate singular values and differential qd algorithms," by
-    B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
-    Department, University of California at Berkeley, July 1992
-    for a detailed description of the algorithm.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  B is upper bidiagonal;
-            = 'L':  B is lower bidiagonal.
-
-    N       (input) INTEGER
-            The order of the matrix B.  N >= 0.
-
-    NCVT    (input) INTEGER
-            The number of columns of the matrix VT. NCVT >= 0.
-
-    NRU     (input) INTEGER
-            The number of rows of the matrix U. NRU >= 0.
-
-    NCC     (input) INTEGER
-            The number of columns of the matrix C. NCC >= 0.
-
-    D       (input/output) REAL array, dimension (N)
-            On entry, the n diagonal elements of the bidiagonal matrix B.
-            On exit, if INFO=0, the singular values of B in decreasing
-            order.
-
-    E       (input/output) REAL array, dimension (N)
-            On entry, the elements of E contain the
-            offdiagonal elements of the bidiagonal matrix whose SVD
-            is desired. On normal exit (INFO = 0), E is destroyed.
-            If the algorithm does not converge (INFO > 0), D and E
-            will contain the diagonal and superdiagonal elements of a
-            bidiagonal matrix orthogonally equivalent to the one given
-            as input. E(N) is used for workspace.
-
-    VT      (input/output) REAL array, dimension (LDVT, NCVT)
-            On entry, an N-by-NCVT matrix VT.
-            On exit, VT is overwritten by P' * VT.
-            VT is not referenced if NCVT = 0.
-
-    LDVT    (input) INTEGER
-            The leading dimension of the array VT.
-            LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
-
-    U       (input/output) REAL array, dimension (LDU, N)
-            On entry, an NRU-by-N matrix U.
-            On exit, U is overwritten by U * Q.
-            U is not referenced if NRU = 0.
-
-    LDU     (input) INTEGER
-            The leading dimension of the array U.  LDU >= max(1,NRU).
-
-    C       (input/output) REAL array, dimension (LDC, NCC)
-            On entry, an N-by-NCC matrix C.
-            On exit, C is overwritten by Q' * C.
-            C is not referenced if NCC = 0.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C.
-            LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
-
-    WORK    (workspace) REAL array, dimension (4*N)
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  If INFO = -i, the i-th argument had an illegal value
-            > 0:  the algorithm did not converge; D and E contain the
-                  elements of a bidiagonal matrix which is orthogonally
-                  similar to the input matrix B;  if INFO = i, i
-                  elements of E have not converged to zero.
-
-    Internal Parameters
-    ===================
-
-    TOLMUL  REAL, default = max(10,min(100,EPS**(-1/8)))
-            TOLMUL controls the convergence criterion of the QR loop.
-            If it is positive, TOLMUL*EPS is the desired relative
-               precision in the computed singular values.
-            If it is negative, abs(TOLMUL*EPS*sigma_max) is the
-               desired absolute accuracy in the computed singular
-               values (corresponds to relative accuracy
-               abs(TOLMUL*EPS) in the largest singular value.
-            abs(TOLMUL) should be between 1 and 1/EPS, and preferably
-               between 10 (for fast convergence) and .1/EPS
-               (for there to be some accuracy in the results).
-            Default is to lose at either one eighth or 2 of the
-               available decimal digits in each computed singular value
-               (whichever is smaller).
-
-    MAXITR  INTEGER, default = 6
-            MAXITR controls the maximum number of passes of the
-            algorithm through its inner loop. The algorithms stops
-            (and so fails to converge) if the number of passes
-            through the inner loop exceeds MAXITR*N**2.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    lower = lsame_(uplo, "L");
-    if (! lsame_(uplo, "U") && ! lower) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*ncvt < 0) {
-	*info = -3;
-    } else if (*nru < 0) {
-	*info = -4;
-    } else if (*ncc < 0) {
-	*info = -5;
-    } else if ((*ncvt == 0 && *ldvt < 1) || (*ncvt > 0 && *ldvt < max(1,*n)))
-	    {
-	*info = -9;
-    } else if (*ldu < max(1,*nru)) {
-	*info = -11;
-    } else if ((*ncc == 0 && *ldc < 1) || (*ncc > 0 && *ldc < max(1,*n))) {
-	*info = -13;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("SBDSQR", &i__1);
-	return 0;
-    }
-    if (*n == 0) {
-	return 0;
-    }
-    if (*n == 1) {
-	goto L160;
-    }
-
-/*     ROTATE is true if any singular vectors desired, false otherwise */
-
-    rotate = ((*ncvt > 0) || (*nru > 0)) || (*ncc > 0);
-
-/*     If no singular vectors desired, use qd algorithm */
-
-    if (! rotate) {
-	slasq1_(n, &d__[1], &e[1], &work[1], info);
-	return 0;
-    }
-
-    nm1 = *n - 1;
-    nm12 = nm1 + nm1;
-    nm13 = nm12 + nm1;
-    idir = 0;
-
-/*     Get machine constants */
-
-    eps = slamch_("Epsilon");
-    unfl = slamch_("Safe minimum");
-
-/*
-       If matrix lower bidiagonal, rotate to be upper bidiagonal
-       by applying Givens rotations on the left
-*/
-
-    if (lower) {
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
-	    d__[i__] = r__;
-	    e[i__] = sn * d__[i__ + 1];
-	    d__[i__ + 1] = cs * d__[i__ + 1];
-	    work[i__] = cs;
-	    work[nm1 + i__] = sn;
-/* L10: */
-	}
-
-/*        Update singular vectors if desired */
-
-	if (*nru > 0) {
-	    slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset],
-		    ldu);
-	}
-	if (*ncc > 0) {
-	    slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
-		     ldc);
-	}
-    }
-
-/*
-       Compute singular values to relative accuracy TOL
-       (By setting TOL to be negative, algorithm will compute
-       singular values to absolute accuracy ABS(TOL)*norm(input matrix))
-
-   Computing MAX
-   Computing MIN
-*/
-    d__1 = (doublereal) eps;
-    r__3 = 100.f, r__4 = pow_dd(&d__1, &c_b2944);
-    r__1 = 10.f, r__2 = dmin(r__3,r__4);
-    tolmul = dmax(r__1,r__2);
-    tol = tolmul * eps;
-
-/*     Compute approximate maximum, minimum singular values */
-
-    smax = 0.f;
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	r__2 = smax, r__3 = (r__1 = d__[i__], dabs(r__1));
-	smax = dmax(r__2,r__3);
-/* L20: */
-    }
-    i__1 = *n - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	r__2 = smax, r__3 = (r__1 = e[i__], dabs(r__1));
-	smax = dmax(r__2,r__3);
-/* L30: */
-    }
-    sminl = 0.f;
-    if (tol >= 0.f) {
-
-/*        Relative accuracy desired */
-
-	sminoa = dabs(d__[1]);
-	if (sminoa == 0.f) {
-	    goto L50;
-	}
-	mu = sminoa;
-	i__1 = *n;
-	for (i__ = 2; i__ <= i__1; ++i__) {
-	    mu = (r__2 = d__[i__], dabs(r__2)) * (mu / (mu + (r__1 = e[i__ -
-		    1], dabs(r__1))));
-	    sminoa = dmin(sminoa,mu);
-	    if (sminoa == 0.f) {
-		goto L50;
-	    }
-/* L40: */
-	}
-L50:
-	sminoa /= sqrt((real) (*n));
-/* Computing MAX */
-	r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl;
-	thresh = dmax(r__1,r__2);
-    } else {
-
-/*
-          Absolute accuracy desired
-
-   Computing MAX
-*/
-	r__1 = dabs(tol) * smax, r__2 = *n * 6 * *n * unfl;
-	thresh = dmax(r__1,r__2);
-    }
-
-/*
-       Prepare for main iteration loop for the singular values
-       (MAXIT is the maximum number of passes through the inner
-       loop permitted before nonconvergence signalled.)
-*/
-
-    maxit = *n * 6 * *n;
-    iter = 0;
-    oldll = -1;
-    oldm = -1;
-
-/*     M points to last element of unconverged part of matrix */
-
-    m = *n;
-
-/*     Begin main iteration loop */
-
-L60:
-
-/*     Check for convergence or exceeding iteration count */
-
-    if (m <= 1) {
-	goto L160;
-    }
-    if (iter > maxit) {
-	goto L200;
-    }
-
-/*     Find diagonal block of matrix to work on */
-
-    if (tol < 0.f && (r__1 = d__[m], dabs(r__1)) <= thresh) {
-	d__[m] = 0.f;
-    }
-    smax = (r__1 = d__[m], dabs(r__1));
-    smin = smax;
-    i__1 = m - 1;
-    for (lll = 1; lll <= i__1; ++lll) {
-	ll = m - lll;
-	abss = (r__1 = d__[ll], dabs(r__1));
-	abse = (r__1 = e[ll], dabs(r__1));
-	if (tol < 0.f && abss <= thresh) {
-	    d__[ll] = 0.f;
-	}
-	if (abse <= thresh) {
-	    goto L80;
-	}
-	smin = dmin(smin,abss);
-/* Computing MAX */
-	r__1 = max(smax,abss);
-	smax = dmax(r__1,abse);
-/* L70: */
-    }
-    ll = 0;
-    goto L90;
-L80:
-    e[ll] = 0.f;
-
-/*     Matrix splits since E(LL) = 0 */
-
-    if (ll == m - 1) {
-
-/*        Convergence of bottom singular value, return to top of loop */
-
-	--m;
-	goto L60;
-    }
-L90:
-    ++ll;
-
-/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */
-
-    if (ll == m - 1) {
-
-/*        2 by 2 block, handle separately */
-
-	slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
-		 &sinl, &cosl);
-	d__[m - 1] = sigmx;
-	e[m - 1] = 0.f;
-	d__[m] = sigmn;
-
-/*        Compute singular vectors, if desired */
-
-	if (*ncvt > 0) {
-	    srot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &
-		    cosr, &sinr);
-	}
-	if (*nru > 0) {
-	    srot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &
-		    c__1, &cosl, &sinl);
-	}
-	if (*ncc > 0) {
-	    srot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &
-		    cosl, &sinl);
-	}
-	m += -2;
-	goto L60;
-    }
-
-/*
-       If working on new submatrix, choose shift direction
-       (from larger end diagonal element towards smaller)
-*/
-
-    if ((ll > oldm) || (m < oldll)) {
-	if ((r__1 = d__[ll], dabs(r__1)) >= (r__2 = d__[m], dabs(r__2))) {
-
-/*           Chase bulge from top (big end) to bottom (small end) */
-
-	    idir = 1;
-	} else {
-
-/*           Chase bulge from bottom (big end) to top (small end) */
-
-	    idir = 2;
-	}
-    }
-
-/*     Apply convergence tests */
-
-    if (idir == 1) {
-
-/*
-          Run convergence test in forward direction
-          First apply standard test to bottom of matrix
-*/
-
-	if (((r__2 = e[m - 1], dabs(r__2)) <= dabs(tol) * (r__1 = d__[m],
-		dabs(r__1))) || (tol < 0.f && (r__3 = e[m - 1], dabs(r__3)) <=
-		 thresh)) {
-	    e[m - 1] = 0.f;
-	    goto L60;
-	}
-
-	if (tol >= 0.f) {
-
-/*
-             If relative accuracy desired,
-             apply convergence criterion forward
-*/
-
-	    mu = (r__1 = d__[ll], dabs(r__1));
-	    sminl = mu;
-	    i__1 = m - 1;
-	    for (lll = ll; lll <= i__1; ++lll) {
-		if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) {
-		    e[lll] = 0.f;
-		    goto L60;
-		}
-		sminlo = sminl;
-		mu = (r__2 = d__[lll + 1], dabs(r__2)) * (mu / (mu + (r__1 =
-			e[lll], dabs(r__1))));
-		sminl = dmin(sminl,mu);
-/* L100: */
-	    }
-	}
-
-    } else {
-
-/*
-          Run convergence test in backward direction
-          First apply standard test to top of matrix
-*/
-
-	if (((r__2 = e[ll], dabs(r__2)) <= dabs(tol) * (r__1 = d__[ll], dabs(
-		r__1))) || (tol < 0.f && (r__3 = e[ll], dabs(r__3)) <= thresh)
-		) {
-	    e[ll] = 0.f;
-	    goto L60;
-	}
-
-	if (tol >= 0.f) {
-
-/*
-             If relative accuracy desired,
-             apply convergence criterion backward
-*/
-
-	    mu = (r__1 = d__[m], dabs(r__1));
-	    sminl = mu;
-	    i__1 = ll;
-	    for (lll = m - 1; lll >= i__1; --lll) {
-		if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) {
-		    e[lll] = 0.f;
-		    goto L60;
-		}
-		sminlo = sminl;
-		mu = (r__2 = d__[lll], dabs(r__2)) * (mu / (mu + (r__1 = e[
-			lll], dabs(r__1))));
-		sminl = dmin(sminl,mu);
-/* L110: */
-	    }
-	}
-    }
-    oldll = ll;
-    oldm = m;
-
-/*
-       Compute shift.  First, test if shifting would ruin relative
-       accuracy, and if so set the shift to zero.
-
-   Computing MAX
-*/
-    r__1 = eps, r__2 = tol * .01f;
-    if (tol >= 0.f && *n * tol * (sminl / smax) <= dmax(r__1,r__2)) {
-
-/*        Use a zero shift to avoid loss of relative accuracy */
-
-	shift = 0.f;
-    } else {
-
-/*        Compute the shift from 2-by-2 block at end of matrix */
-
-	if (idir == 1) {
-	    sll = (r__1 = d__[ll], dabs(r__1));
-	    slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
-	} else {
-	    sll = (r__1 = d__[m], dabs(r__1));
-	    slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
-	}
-
-/*        Test if shift negligible, and if so set to zero */
-
-	if (sll > 0.f) {
-/* Computing 2nd power */
-	    r__1 = shift / sll;
-	    if (r__1 * r__1 < eps) {
-		shift = 0.f;
-	    }
-	}
-    }
-
-/*     Increment iteration count */
-
-    iter = iter + m - ll;
-
-/*     If SHIFT = 0, do simplified QR iteration */
-
-    if (shift == 0.f) {
-	if (idir == 1) {
+    if (shift == 0.) {
+	if (idir == 1) {
 
 /*
              Chase bulge from top to bottom
              Save cosines and sines for later singular vector updates
 */
 
-	    cs = 1.f;
-	    oldcs = 1.f;
+	    cs = 1.;
+	    oldcs = 1.;
 	    i__1 = m - 1;
 	    for (i__ = ll; i__ <= i__1; ++i__) {
-		r__1 = d__[i__] * cs;
-		slartg_(&r__1, &e[i__], &cs, &sn, &r__);
+		d__1 = d__[i__] * cs;
+		dlartg_(&d__1, &e[i__], &cs, &sn, &r__);
 		if (i__ > ll) {
 		    e[i__ - 1] = oldsn * r__;
 		}
-		r__1 = oldcs * r__;
-		r__2 = d__[i__ + 1] * sn;
-		slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
+		d__1 = oldcs * r__;
+		d__2 = d__[i__ + 1] * sn;
+		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
 		work[i__ - ll + 1] = cs;
 		work[i__ - ll + 1 + nm1] = sn;
 		work[i__ - ll + 1 + nm12] = oldcs;
@@ -64956,24 +1148,24 @@ L90:
 
 	    if (*ncvt > 0) {
 		i__1 = m - ll + 1;
-		slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
+		dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
 			ll + vt_dim1], ldvt);
 	    }
 	    if (*nru > 0) {
 		i__1 = m - ll + 1;
-		slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
+		dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
 			+ 1], &u[ll * u_dim1 + 1], ldu);
 	    }
 	    if (*ncc > 0) {
 		i__1 = m - ll + 1;
-		slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
+		dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
 			+ 1], &c__[ll + c_dim1], ldc);
 	    }
 
 /*           Test convergence */
 
-	    if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) {
-		e[m - 1] = 0.f;
+	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
+		e[m - 1] = 0.;
 	    }
 
 	} else {
@@ -64983,18 +1175,18 @@ L90:
              Save cosines and sines for later singular vector updates
 */
 
-	    cs = 1.f;
-	    oldcs = 1.f;
+	    cs = 1.;
+	    oldcs = 1.;
 	    i__1 = ll + 1;
 	    for (i__ = m; i__ >= i__1; --i__) {
-		r__1 = d__[i__] * cs;
-		slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__);
+		d__1 = d__[i__] * cs;
+		dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__);
 		if (i__ < m) {
 		    e[i__] = oldsn * r__;
 		}
-		r__1 = oldcs * r__;
-		r__2 = d__[i__ - 1] * sn;
-		slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
+		d__1 = oldcs * r__;
+		d__2 = d__[i__ - 1] * sn;
+		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
 		work[i__ - ll] = cs;
 		work[i__ - ll + nm1] = -sn;
 		work[i__ - ll + nm12] = oldcs;
@@ -65009,24 +1201,24 @@ L90:
 
 	    if (*ncvt > 0) {
 		i__1 = m - ll + 1;
-		slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
+		dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
 			nm13 + 1], &vt[ll + vt_dim1], ldvt);
 	    }
 	    if (*nru > 0) {
 		i__1 = m - ll + 1;
-		slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
+		dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
 			 u_dim1 + 1], ldu);
 	    }
 	    if (*ncc > 0) {
 		i__1 = m - ll + 1;
-		slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
+		dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
 			ll + c_dim1], ldc);
 	    }
 
 /*           Test convergence */
 
-	    if ((r__1 = e[ll], dabs(r__1)) <= thresh) {
-		e[ll] = 0.f;
+	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
+		e[ll] = 0.;
 	    }
 	}
     } else {
@@ -65040,12 +1232,12 @@ L90:
              Save cosines and sines for later singular vector updates
 */
 
-	    f = ((r__1 = d__[ll], dabs(r__1)) - shift) * (r_sign(&c_b1011, &
-		    d__[ll]) + shift / d__[ll]);
+	    f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b15, &d__[
+		    ll]) + shift / d__[ll]);
 	    g = e[ll];
 	    i__1 = m - 1;
 	    for (i__ = ll; i__ <= i__1; ++i__) {
-		slartg_(&f, &g, &cosr, &sinr, &r__);
+		dlartg_(&f, &g, &cosr, &sinr, &r__);
 		if (i__ > ll) {
 		    e[i__ - 1] = r__;
 		}
@@ -65053,7 +1245,7 @@ L90:
 		e[i__] = cosr * e[i__] - sinr * d__[i__];
 		g = sinr * d__[i__ + 1];
 		d__[i__ + 1] = cosr * d__[i__ + 1];
-		slartg_(&f, &g, &cosl, &sinl, &r__);
+		dlartg_(&f, &g, &cosl, &sinl, &r__);
 		d__[i__] = r__;
 		f = cosl * e[i__] + sinl * d__[i__ + 1];
 		d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
@@ -65073,24 +1265,24 @@ L90:
 
 	    if (*ncvt > 0) {
 		i__1 = m - ll + 1;
-		slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
+		dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
 			ll + vt_dim1], ldvt);
 	    }
 	    if (*nru > 0) {
 		i__1 = m - ll + 1;
-		slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
+		dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
 			+ 1], &u[ll * u_dim1 + 1], ldu);
 	    }
 	    if (*ncc > 0) {
 		i__1 = m - ll + 1;
-		slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
+		dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
 			+ 1], &c__[ll + c_dim1], ldc);
 	    }
 
 /*           Test convergence */
 
-	    if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) {
-		e[m - 1] = 0.f;
+	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
+		e[m - 1] = 0.;
 	    }
 
 	} else {
@@ -65100,12 +1292,12 @@ L90:
              Save cosines and sines for later singular vector updates
 */
 
-	    f = ((r__1 = d__[m], dabs(r__1)) - shift) * (r_sign(&c_b1011, &
-		    d__[m]) + shift / d__[m]);
+	    f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b15, &d__[m]
+		    ) + shift / d__[m]);
 	    g = e[m - 1];
 	    i__1 = ll + 1;
 	    for (i__ = m; i__ >= i__1; --i__) {
-		slartg_(&f, &g, &cosr, &sinr, &r__);
+		dlartg_(&f, &g, &cosr, &sinr, &r__);
 		if (i__ < m) {
 		    e[i__] = r__;
 		}
@@ -65113,7 +1305,7 @@ L90:
 		e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
 		g = sinr * d__[i__ - 1];
 		d__[i__ - 1] = cosr * d__[i__ - 1];
-		slartg_(&f, &g, &cosl, &sinl, &r__);
+		dlartg_(&f, &g, &cosl, &sinl, &r__);
 		d__[i__] = r__;
 		f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
 		d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
@@ -65131,25 +1323,25 @@ L90:
 
 /*           Test convergence */
 
-	    if ((r__1 = e[ll], dabs(r__1)) <= thresh) {
-		e[ll] = 0.f;
+	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
+		e[ll] = 0.;
 	    }
 
 /*           Update singular vectors if desired */
 
 	    if (*ncvt > 0) {
 		i__1 = m - ll + 1;
-		slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
+		dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
 			nm13 + 1], &vt[ll + vt_dim1], ldvt);
 	    }
 	    if (*nru > 0) {
 		i__1 = m - ll + 1;
-		slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
+		dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
 			 u_dim1 + 1], ldu);
 	    }
 	    if (*ncc > 0) {
 		i__1 = m - ll + 1;
-		slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
+		dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
 			ll + c_dim1], ldc);
 	    }
 	}
@@ -65164,13 +1356,13 @@ L90:
 L160:
     i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	if (d__[i__] < 0.f) {
+	if (d__[i__] < 0.) {
 	    d__[i__] = -d__[i__];
 
 /*           Change sign of singular vectors, if desired */
 
 	    if (*ncvt > 0) {
-		sscal_(ncvt, &c_b1290, &vt[i__ + vt_dim1], ldvt);
+		dscal_(ncvt, &c_b151, &vt[i__ + vt_dim1], ldvt);
 	    }
 	}
 /* L170: */
@@ -65203,15 +1395,15 @@ L160:
 	    d__[isub] = d__[*n + 1 - i__];
 	    d__[*n + 1 - i__] = smin;
 	    if (*ncvt > 0) {
-		sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ +
+		dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ +
 			vt_dim1], ldvt);
 	    }
 	    if (*nru > 0) {
-		sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) *
+		dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) *
 			u_dim1 + 1], &c__1);
 	    }
 	    if (*ncc > 0) {
-		sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ +
+		dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ +
 			c_dim1], ldc);
 	    }
 	}
@@ -65225,7 +1417,7 @@ L200:
     *info = 0;
     i__1 = *n - 1;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	if (e[i__] != 0.f) {
+	if (e[i__] != 0.) {
 	    ++(*info);
 	}
 /* L210: */
@@ -65233,26 +1425,28 @@ L200:
 L220:
     return 0;
 
-/*     End of SBDSQR */
+/*     End of DBDSQR */
 
-} /* sbdsqr_ */
+} /* dbdsqr_ */
 
-/* Subroutine */ int sgebak_(char *job, char *side, integer *n, integer *ilo,
-	integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer
-	*info)
+/* Subroutine */ int dgebak_(char *job, char *side, integer *n, integer *ilo,
+	integer *ihi, doublereal *scale, integer *m, doublereal *v, integer *
+	ldv, integer *info)
 {
     /* System generated locals */
     integer v_dim1, v_offset, i__1;
 
     /* Local variables */
     static integer i__, k;
-    static real s;
+    static doublereal s;
     static integer ii;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
     static logical leftv;
-    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
-	    integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
     static logical rightv;
 
 
@@ -65266,9 +1460,9 @@ L220:
     Purpose
     =======
 
-    SGEBAK forms the right or left eigenvectors of a real general matrix
+    DGEBAK forms the right or left eigenvectors of a real general matrix
     by backward transformation on the computed eigenvectors of the
-    balanced matrix output by SGEBAL.
+    balanced matrix output by DGEBAL.
 
     Arguments
     =========
@@ -65280,7 +1474,7 @@ L220:
             = 'S', do backward transformation for scaling only;
             = 'B', do backward transformations for both permutation and
                    scaling.
-            JOB must be the same as the argument JOB supplied to SGEBAL.
+            JOB must be the same as the argument JOB supplied to DGEBAL.
 
     SIDE    (input) CHARACTER*1
             = 'R':  V contains right eigenvectors;
@@ -65291,19 +1485,19 @@ L220:
 
     ILO     (input) INTEGER
     IHI     (input) INTEGER
-            The integers ILO and IHI determined by SGEBAL.
+            The integers ILO and IHI determined by DGEBAL.
             1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
 
-    SCALE   (input) REAL array, dimension (N)
+    SCALE   (input) DOUBLE PRECISION array, dimension (N)
             Details of the permutation and scaling factors, as returned
-            by SGEBAL.
+            by DGEBAL.
 
     M       (input) INTEGER
             The number of columns of the matrix V.  M >= 0.
 
-    V       (input/output) REAL array, dimension (LDV,M)
+    V       (input/output) DOUBLE PRECISION array, dimension (LDV,M)
             On entry, the matrix of right or left eigenvectors to be
-            transformed, as returned by SHSEIN or STREVC.
+            transformed, as returned by DHSEIN or DTREVC.
             On exit, V is overwritten by the transformed eigenvectors.
 
     LDV     (input) INTEGER
@@ -65337,9 +1531,9 @@ L220:
 	*info = -2;
     } else if (*n < 0) {
 	*info = -3;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
 	*info = -4;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
 	*info = -5;
     } else if (*m < 0) {
 	*info = -7;
@@ -65348,7 +1542,7 @@ L220:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGEBAK", &i__1);
+	xerbla_("DGEBAK", &i__1);
 	return 0;
     }
 
@@ -65370,13 +1564,13 @@ L220:
 
 /*     Backward balance */
 
-    if ((lsame_(job, "S")) || (lsame_(job, "B"))) {
+    if (lsame_(job, "S") || lsame_(job, "B")) {
 
 	if (rightv) {
 	    i__1 = *ihi;
 	    for (i__ = *ilo; i__ <= i__1; ++i__) {
 		s = scale[i__];
-		sscal_(m, &s, &v[i__ + v_dim1], ldv);
+		dscal_(m, &s, &v[i__ + v_dim1], ldv);
 /* L10: */
 	    }
 	}
@@ -65384,8 +1578,8 @@ L220:
 	if (leftv) {
 	    i__1 = *ihi;
 	    for (i__ = *ilo; i__ <= i__1; ++i__) {
-		s = 1.f / scale[i__];
-		sscal_(m, &s, &v[i__ + v_dim1], ldv);
+		s = 1. / scale[i__];
+		dscal_(m, &s, &v[i__ + v_dim1], ldv);
 /* L20: */
 	    }
 	}
@@ -65400,7 +1594,7 @@ L220:
 */
 
 L30:
-    if ((lsame_(job, "P")) || (lsame_(job, "B"))) {
+    if (lsame_(job, "P") || lsame_(job, "B")) {
 	if (rightv) {
 	    i__1 = *n;
 	    for (ii = 1; ii <= i__1; ++ii) {
@@ -65411,11 +1605,11 @@ L30:
 		if (i__ < *ilo) {
 		    i__ = *ilo - ii;
 		}
-		k = scale[i__];
+		k = (integer) scale[i__];
 		if (k == i__) {
 		    goto L40;
 		}
-		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L40:
 		;
 	    }
@@ -65431,11 +1625,11 @@ L40:
 		if (i__ < *ilo) {
 		    i__ = *ilo - ii;
 		}
-		k = scale[i__];
+		k = (integer) scale[i__];
 		if (k == i__) {
 		    goto L50;
 		}
-		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L50:
 		;
 	    }
@@ -65444,29 +1638,31 @@ L50:
 
     return 0;
 
-/*     End of SGEBAK */
+/*     End of DGEBAK */
 
-} /* sgebak_ */
+} /* dgebak_ */
 
-/* Subroutine */ int sgebal_(char *job, integer *n, real *a, integer *lda,
-	integer *ilo, integer *ihi, real *scale, integer *info)
+/* Subroutine */ int dgebal_(char *job, integer *n, doublereal *a, integer *
+	lda, integer *ilo, integer *ihi, doublereal *scale, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Local variables */
-    static real c__, f, g;
+    static doublereal c__, f, g;
     static integer i__, j, k, l, m;
-    static real r__, s, ca, ra;
+    static doublereal r__, s, ca, ra;
     static integer ica, ira, iexc;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
-	    sswap_(integer *, real *, integer *, real *, integer *);
-    static real sfmin1, sfmin2, sfmax1, sfmax2;
-    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static doublereal sfmin1, sfmin2, sfmax1, sfmax2;
+
+    extern integer idamax_(integer *, doublereal *, integer *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer isamax_(integer *, real *, integer *);
     static logical noconv;
 
 
@@ -65480,7 +1676,7 @@ L50:
     Purpose
     =======
 
-    SGEBAL balances a general real matrix A.  This involves, first,
+    DGEBAL balances a general real matrix A.  This involves, first,
     permuting A by a similarity transformation to isolate eigenvalues
     in the first 1 to ILO-1 and last IHI+1 to N elements on the
     diagonal; and second, applying a diagonal similarity transformation
@@ -65504,7 +1700,7 @@ L50:
     N       (input) INTEGER
             The order of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the input matrix A.
             On exit,  A is overwritten by the balanced matrix.
             If JOB = 'N', A is not referenced.
@@ -65519,7 +1715,7 @@ L50:
             A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
             If JOB = 'N' or 'S', ILO = 1 and IHI = N.
 
-    SCALE   (output) REAL array, dimension (N)
+    SCALE   (output) DOUBLE PRECISION array, dimension (N)
             Details of the permutations and scaling factors applied to
             A.  If P(j) is the index of the row and column interchanged
             with row and column j and D(j) is the scaling factor
@@ -65587,7 +1783,7 @@ L50:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGEBAL", &i__1);
+	xerbla_("DGEBAL", &i__1);
 	return 0;
     }
 
@@ -65601,7 +1797,7 @@ L50:
     if (lsame_(job, "N")) {
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
-	    scale[i__] = 1.f;
+	    scale[i__] = 1.;
 /* L10: */
 	}
 	goto L210;
@@ -65618,14 +1814,14 @@ L50:
 /*     Row and column exchange. */
 
 L20:
-    scale[m] = (real) j;
+    scale[m] = (doublereal) j;
     if (j == m) {
 	goto L30;
     }
 
-    sswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
+    dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
     i__1 = *n - k + 1;
-    sswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
+    dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
 
 L30:
     switch (iexc) {
@@ -65649,7 +1845,7 @@ L50:
 	    if (i__ == j) {
 		goto L60;
 	    }
-	    if (a[j + i__ * a_dim1] != 0.f) {
+	    if (a[j + i__ * a_dim1] != 0.) {
 		goto L70;
 	    }
 L60:
@@ -65679,7 +1875,7 @@ L90:
 	    if (i__ == j) {
 		goto L100;
 	    }
-	    if (a[i__ + j * a_dim1] != 0.f) {
+	    if (a[i__ + j * a_dim1] != 0.) {
 		goto L110;
 	    }
 L100:
@@ -65696,7 +1892,7 @@ L110:
 L120:
     i__1 = l;
     for (i__ = k; i__ <= i__1; ++i__) {
-	scale[i__] = 1.f;
+	scale[i__] = 1.;
 /* L130: */
     }
 
@@ -65710,99 +1906,97 @@ L120:
        Iterative loop for norm reduction
 */
 
-    sfmin1 = slamch_("S") / slamch_("P");
-    sfmax1 = 1.f / sfmin1;
-    sfmin2 = sfmin1 * 8.f;
-    sfmax2 = 1.f / sfmin2;
+    sfmin1 = SAFEMINIMUM / PRECISION;
+    sfmax1 = 1. / sfmin1;
+    sfmin2 = sfmin1 * 8.;
+    sfmax2 = 1. / sfmin2;
 L140:
     noconv = FALSE_;
 
     i__1 = l;
     for (i__ = k; i__ <= i__1; ++i__) {
-	c__ = 0.f;
-	r__ = 0.f;
+	c__ = 0.;
+	r__ = 0.;
 
 	i__2 = l;
 	for (j = k; j <= i__2; ++j) {
 	    if (j == i__) {
 		goto L150;
 	    }
-	    c__ += (r__1 = a[j + i__ * a_dim1], dabs(r__1));
-	    r__ += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+	    c__ += (d__1 = a[j + i__ * a_dim1], abs(d__1));
+	    r__ += (d__1 = a[i__ + j * a_dim1], abs(d__1));
 L150:
 	    ;
 	}
-	ica = isamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
-	ca = (r__1 = a[ica + i__ * a_dim1], dabs(r__1));
+	ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
+	ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1));
 	i__2 = *n - k + 1;
-	ira = isamax_(&i__2, &a[i__ + k * a_dim1], lda);
-	ra = (r__1 = a[i__ + (ira + k - 1) * a_dim1], dabs(r__1));
+	ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda);
+	ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1));
 
 /*        Guard against zero C or R due to underflow. */
 
-	if ((c__ == 0.f) || (r__ == 0.f)) {
+	if (c__ == 0. || r__ == 0.) {
 	    goto L200;
 	}
-	g = r__ / 8.f;
-	f = 1.f;
+	g = r__ / 8.;
+	f = 1.;
 	s = c__ + r__;
 L160:
 /* Computing MAX */
-	r__1 = max(f,c__);
+	d__1 = max(f,c__);
 /* Computing MIN */
-	r__2 = min(r__,g);
-	if (((c__ >= g) || (dmax(r__1,ca) >= sfmax2)) || (dmin(r__2,ra) <=
-		sfmin2)) {
+	d__2 = min(r__,g);
+	if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) {
 	    goto L170;
 	}
-	f *= 8.f;
-	c__ *= 8.f;
-	ca *= 8.f;
-	r__ /= 8.f;
-	g /= 8.f;
-	ra /= 8.f;
+	f *= 8.;
+	c__ *= 8.;
+	ca *= 8.;
+	r__ /= 8.;
+	g /= 8.;
+	ra /= 8.;
 	goto L160;
 
 L170:
-	g = c__ / 8.f;
+	g = c__ / 8.;
 L180:
 /* Computing MIN */
-	r__1 = min(f,c__), r__1 = min(r__1,g);
-	if (((g < r__) || (dmax(r__,ra) >= sfmax2)) || (dmin(r__1,ca) <=
-		sfmin2)) {
+	d__1 = min(f,c__), d__1 = min(d__1,g);
+	if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) {
 	    goto L190;
 	}
-	f /= 8.f;
-	c__ /= 8.f;
-	g /= 8.f;
-	ca /= 8.f;
-	r__ *= 8.f;
-	ra *= 8.f;
+	f /= 8.;
+	c__ /= 8.;
+	g /= 8.;
+	ca /= 8.;
+	r__ *= 8.;
+	ra *= 8.;
 	goto L180;
 
 /*        Now balance. */
 
 L190:
-	if (c__ + r__ >= s * .95f) {
+	if (c__ + r__ >= s * .95) {
 	    goto L200;
 	}
-	if (f < 1.f && scale[i__] < 1.f) {
+	if (f < 1. && scale[i__] < 1.) {
 	    if (f * scale[i__] <= sfmin1) {
 		goto L200;
 	    }
 	}
-	if (f > 1.f && scale[i__] > 1.f) {
+	if (f > 1. && scale[i__] > 1.) {
 	    if (scale[i__] >= sfmax1 / f) {
 		goto L200;
 	    }
 	}
-	g = 1.f / f;
+	g = 1. / f;
 	scale[i__] *= f;
 	noconv = TRUE_;
 
 	i__2 = *n - k + 1;
-	sscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
-	sscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
+	dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
+	dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
 
 L200:
 	;
@@ -65818,22 +2012,23 @@ L210:
 
     return 0;
 
-/*     End of SGEBAL */
+/*     End of DGEBAL */
 
-} /* sgebal_ */
+} /* dgebal_ */
 
-/* Subroutine */ int sgebd2_(integer *m, integer *n, real *a, integer *lda,
-	real *d__, real *e, real *tauq, real *taup, real *work, integer *info)
+/* Subroutine */ int dgebd2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
+	taup, doublereal *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
     static integer i__;
-    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
-	    integer *, real *, real *, integer *, real *), xerbla_(
-	    char *, integer *), slarfg_(integer *, real *, real *,
-	    integer *, real *);
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dlarfg_(integer *, doublereal *,
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
 
 
 /*
@@ -65846,7 +2041,7 @@ L210:
     Purpose
     =======
 
-    SGEBD2 reduces a real general m by n matrix A to upper or lower
+    DGEBD2 reduces a real general m by n matrix A to upper or lower
     bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
 
     If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
@@ -65860,7 +2055,7 @@ L210:
     N       (input) INTEGER
             The number of columns in the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the m by n general matrix to be reduced.
             On exit,
             if m >= n, the diagonal and the first superdiagonal are
@@ -65882,24 +2077,24 @@ L210:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
 
-    D       (output) REAL array, dimension (min(M,N))
+    D       (output) DOUBLE PRECISION array, dimension (min(M,N))
             The diagonal elements of the bidiagonal matrix B:
             D(i) = A(i,i).
 
-    E       (output) REAL array, dimension (min(M,N)-1)
+    E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
             The off-diagonal elements of the bidiagonal matrix B:
             if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
             if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
 
-    TAUQ    (output) REAL array dimension (min(M,N))
+    TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
             The scalar factors of the elementary reflectors which
             represent the orthogonal matrix Q. See Further Details.
 
-    TAUP    (output) REAL array, dimension (min(M,N))
+    TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
             The scalar factors of the elementary reflectors which
             represent the orthogonal matrix P. See Further Details.
 
-    WORK    (workspace) REAL array, dimension (max(M,N))
+    WORK    (workspace) DOUBLE PRECISION array, dimension (max(M,N))
 
     INFO    (output) INTEGER
             = 0: successful exit.
@@ -65979,7 +2174,7 @@ L210:
     }
     if (*info < 0) {
 	i__1 = -(*info);
-	xerbla_("SGEBD2", &i__1);
+	xerbla_("DGEBD2", &i__1);
 	return 0;
     }
 
@@ -65995,16 +2190,16 @@ L210:
 	    i__2 = *m - i__ + 1;
 /* Computing MIN */
 	    i__3 = i__ + 1;
-	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
 		    a_dim1], &c__1, &tauq[i__]);
 	    d__[i__] = a[i__ + i__ * a_dim1];
-	    a[i__ + i__ * a_dim1] = 1.f;
+	    a[i__ + i__ * a_dim1] = 1.;
 
 /*           Apply H(i) to A(i:m,i+1:n) from the left */
 
 	    i__2 = *m - i__ + 1;
 	    i__3 = *n - i__;
-	    slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[
+	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[
 		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
 	    a[i__ + i__ * a_dim1] = d__[i__];
 
@@ -66018,21 +2213,21 @@ L210:
 		i__2 = *n - i__;
 /* Computing MIN */
 		i__3 = i__ + 2;
-		slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
+		dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
 			i__3,*n) * a_dim1], lda, &taup[i__]);
 		e[i__] = a[i__ + (i__ + 1) * a_dim1];
-		a[i__ + (i__ + 1) * a_dim1] = 1.f;
+		a[i__ + (i__ + 1) * a_dim1] = 1.;
 
 /*              Apply G(i) to A(i+1:m,i+1:n) from the right */
 
 		i__2 = *m - i__;
 		i__3 = *n - i__;
-		slarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1],
+		dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1],
 			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
 			lda, &work[1]);
 		a[i__ + (i__ + 1) * a_dim1] = e[i__];
 	    } else {
-		taup[i__] = 0.f;
+		taup[i__] = 0.;
 	    }
 /* L10: */
 	}
@@ -66048,10 +2243,10 @@ L210:
 	    i__2 = *n - i__ + 1;
 /* Computing MIN */
 	    i__3 = i__ + 1;
-	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
 		    a_dim1], lda, &taup[i__]);
 	    d__[i__] = a[i__ + i__ * a_dim1];
-	    a[i__ + i__ * a_dim1] = 1.f;
+	    a[i__ + i__ * a_dim1] = 1.;
 
 /*           Apply G(i) to A(i+1:m,i:n) from the right */
 
@@ -66059,7 +2254,7 @@ L210:
 	    i__3 = *n - i__ + 1;
 /* Computing MIN */
 	    i__4 = i__ + 1;
-	    slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
+	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
 		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
 	    a[i__ + i__ * a_dim1] = d__[i__];
 
@@ -66073,51 +2268,51 @@ L210:
 		i__2 = *m - i__;
 /* Computing MIN */
 		i__3 = i__ + 2;
-		slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
+		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
 			i__ * a_dim1], &c__1, &tauq[i__]);
 		e[i__] = a[i__ + 1 + i__ * a_dim1];
-		a[i__ + 1 + i__ * a_dim1] = 1.f;
+		a[i__ + 1 + i__ * a_dim1] = 1.;
 
 /*              Apply H(i) to A(i+1:m,i+1:n) from the left */
 
 		i__2 = *m - i__;
 		i__3 = *n - i__;
-		slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
+		dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
 			c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
 			lda, &work[1]);
 		a[i__ + 1 + i__ * a_dim1] = e[i__];
 	    } else {
-		tauq[i__] = 0.f;
+		tauq[i__] = 0.;
 	    }
 /* L20: */
 	}
     }
     return 0;
 
-/*     End of SGEBD2 */
+/*     End of DGEBD2 */
 
-} /* sgebd2_ */
+} /* dgebd2_ */
 
-/* Subroutine */ int sgebrd_(integer *m, integer *n, real *a, integer *lda,
-	real *d__, real *e, real *tauq, real *taup, real *work, integer *
-	lwork, integer *info)
+/* Subroutine */ int dgebrd_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
+	taup, doublereal *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
     static integer i__, j, nb, nx;
-    static real ws;
-    static integer nbmin, iinfo;
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
-    static integer minmn;
-    extern /* Subroutine */ int sgebd2_(integer *, integer *, real *, integer
-	    *, real *, real *, real *, real *, real *, integer *), slabrd_(
-	    integer *, integer *, integer *, real *, integer *, real *, real *
-	    , real *, real *, real *, integer *, real *, integer *), xerbla_(
-	    char *, integer *);
+    static doublereal ws;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer nbmin, iinfo, minmn;
+    extern /* Subroutine */ int dgebd2_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *), dlabrd_(integer *, integer *, integer *
+	    , doublereal *, integer *, doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *, integer *, doublereal *, integer *)
+	    , xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
     static integer ldwrkx, ldwrky, lwkopt;
@@ -66134,7 +2329,7 @@ L210:
     Purpose
     =======
 
-    SGEBRD reduces a general real M-by-N matrix A to upper or lower
+    DGEBRD reduces a general real M-by-N matrix A to upper or lower
     bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
 
     If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
@@ -66148,7 +2343,7 @@ L210:
     N       (input) INTEGER
             The number of columns in the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the M-by-N general matrix to be reduced.
             On exit,
             if m >= n, the diagonal and the first superdiagonal are
@@ -66170,24 +2365,24 @@ L210:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
 
-    D       (output) REAL array, dimension (min(M,N))
+    D       (output) DOUBLE PRECISION array, dimension (min(M,N))
             The diagonal elements of the bidiagonal matrix B:
             D(i) = A(i,i).
 
-    E       (output) REAL array, dimension (min(M,N)-1)
+    E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
             The off-diagonal elements of the bidiagonal matrix B:
             if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
             if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
 
-    TAUQ    (output) REAL array dimension (min(M,N))
+    TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
             The scalar factors of the elementary reflectors which
             represent the orthogonal matrix Q. See Further Details.
 
-    TAUP    (output) REAL array, dimension (min(M,N))
+    TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
             The scalar factors of the elementary reflectors which
             represent the orthogonal matrix P. See Further Details.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -66270,11 +2465,11 @@ L210:
     /* Function Body */
     *info = 0;
 /* Computing MAX */
-    i__1 = 1, i__2 = ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
+    i__1 = 1, i__2 = ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
 	    ftnlen)6, (ftnlen)1);
     nb = max(i__1,i__2);
     lwkopt = (*m + *n) * nb;
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     lquery = *lwork == -1;
     if (*m < 0) {
 	*info = -1;
@@ -66291,7 +2486,7 @@ L210:
     }
     if (*info < 0) {
 	i__1 = -(*info);
-	xerbla_("SGEBRD", &i__1);
+	xerbla_("DGEBRD", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -66301,11 +2496,11 @@ L210:
 
     minmn = min(*m,*n);
     if (minmn == 0) {
-	work[1] = 1.f;
+	work[1] = 1.;
 	return 0;
     }
 
-    ws = (real) max(*m,*n);
+    ws = (doublereal) max(*m,*n);
     ldwrkx = *m;
     ldwrky = *n;
 
@@ -66316,22 +2511,22 @@ L210:
 
    Computing MAX
 */
-	i__1 = nb, i__2 = ilaenv_(&c__3, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
+	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = max(i__1,i__2);
 
 /*        Determine when to switch from blocked to unblocked code. */
 
 	if (nx < minmn) {
-	    ws = (real) ((*m + *n) * nb);
-	    if ((real) (*lwork) < ws) {
+	    ws = (doublereal) ((*m + *n) * nb);
+	    if ((doublereal) (*lwork) < ws) {
 
 /*
                 Not enough work space for the optimal NB, consider using
                 a smaller block size.
 */
 
-		nbmin = ilaenv_(&c__2, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
+		nbmin = ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
 			ftnlen)6, (ftnlen)1);
 		if (*lwork >= (*m + *n) * nbmin) {
 		    nb = *lwork / (*m + *n);
@@ -66357,7 +2552,7 @@ L210:
 
 	i__3 = *m - i__ + 1;
 	i__4 = *n - i__ + 1;
-	slabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
+	dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
 		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx
 		* nb + 1], &ldwrky);
 
@@ -66368,14 +2563,14 @@ L210:
 
 	i__3 = *m - i__ - nb + 1;
 	i__4 = *n - i__ - nb + 1;
-	sgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b1290, &a[
+	dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b151, &a[
 		i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], &
-		ldwrky, &c_b1011, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+		ldwrky, &c_b15, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
 	i__3 = *m - i__ - nb + 1;
 	i__4 = *n - i__ - nb + 1;
-	sgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b1290, &
+	dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b151, &
 		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
-		c_b1011, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+		c_b15, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
 
 /*        Copy diagonal and off-diagonal elements of B back into A */
 
@@ -66401,74 +2596,82 @@ L210:
 
     i__2 = *m - i__ + 1;
     i__1 = *n - i__ + 1;
-    sgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
+    dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
 	    tauq[i__], &taup[i__], &work[1], &iinfo);
     work[1] = ws;
     return 0;
 
-/*     End of SGEBRD */
+/*     End of DGEBRD */
 
-} /* sgebrd_ */
+} /* dgebrd_ */
 
-/* Subroutine */ int sgeev_(char *jobvl, char *jobvr, integer *n, real *a,
-	integer *lda, real *wr, real *wi, real *vl, integer *ldvl, real *vr,
-	integer *ldvr, real *work, integer *lwork, integer *info)
+/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal *
+	a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl,
+	integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work,
+	integer *lwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
 	    i__2, i__3, i__4;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
     static integer i__, k;
-    static real r__, cs, sn;
+    static doublereal r__, cs, sn;
     static integer ihi;
-    static real scl;
+    static doublereal scl;
     static integer ilo;
-    static real dum[1], eps;
+    static doublereal dum[1], eps;
     static integer ibal;
     static char side[1];
     static integer maxb;
-    static real anrm;
-    static integer ierr, itau, iwrk, nout;
-    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
-	    integer *, real *, real *);
-    extern doublereal snrm2_(integer *, real *, integer *);
+    static doublereal anrm;
+    static integer ierr, itau;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer iwrk, nout;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
-    extern doublereal slapy2_(real *, real *);
-    extern /* Subroutine */ int slabad_(real *, real *);
+    extern doublereal dlapy2_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_(
+	    char *, char *, integer *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, integer *, integer *),
+	    dgebal_(char *, integer *, doublereal *, integer *, integer *,
+	    integer *, doublereal *, integer *);
     static logical scalea;
-    static real cscale;
-    extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *,
-	    integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *,
-	    integer *, integer *, real *, integer *);
-    extern doublereal slamch_(char *), slange_(char *, integer *,
-	    integer *, real *, integer *, real *);
-    extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real
-	    *, integer *, real *, real *, integer *, integer *), xerbla_(char
-	    *, integer *);
+
+    static doublereal cscale;
+    extern doublereal dlange_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *);
+    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dlascl_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *),
+	    dlartg_(doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *), xerbla_(char *, integer *);
+    static logical select[1];
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    static logical select[1];
-    static real bignum;
-    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *);
-    extern integer isamax_(integer *, real *, integer *);
-    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
-	    integer *, real *, integer *), slartg_(real *, real *,
-	    real *, real *, real *), sorghr_(integer *, integer *, integer *,
-	    real *, integer *, real *, real *, integer *, integer *), shseqr_(
-	    char *, char *, integer *, integer *, integer *, real *, integer *
-	    , real *, real *, real *, integer *, real *, integer *, integer *), strevc_(char *, char *, logical *, integer *,
-	    real *, integer *, real *, integer *, real *, integer *, integer *
-	    , integer *, real *, integer *);
+    static doublereal bignum;
+    extern /* Subroutine */ int dorghr_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dhseqr_(char *, char *, integer *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, integer *), dtrevc_(char *, char *, logical *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, doublereal *, integer *);
     static integer minwrk, maxwrk;
     static logical wantvl;
-    static real smlnum;
+    static doublereal smlnum;
     static integer hswork;
     static logical lquery, wantvr;
 
@@ -66483,7 +2686,7 @@ L210:
     Purpose
     =======
 
-    SGEEV computes for an N-by-N real nonsymmetric matrix A, the
+    DGEEV computes for an N-by-N real nonsymmetric matrix A, the
     eigenvalues and, optionally, the left and/or right eigenvectors.
 
     The right eigenvector v(j) of A satisfies
@@ -66510,22 +2713,22 @@ L210:
     N       (input) INTEGER
             The order of the matrix A. N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the N-by-N matrix A.
             On exit, A has been overwritten.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
-    WR      (output) REAL array, dimension (N)
-    WI      (output) REAL array, dimension (N)
+    WR      (output) DOUBLE PRECISION array, dimension (N)
+    WI      (output) DOUBLE PRECISION array, dimension (N)
             WR and WI contain the real and imaginary parts,
             respectively, of the computed eigenvalues.  Complex
             conjugate pairs of eigenvalues appear consecutively
             with the eigenvalue having the positive imaginary part
             first.
 
-    VL      (output) REAL array, dimension (LDVL,N)
+    VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
             If JOBVL = 'V', the left eigenvectors u(j) are stored one
             after another in the columns of VL, in the same order
             as their eigenvalues.
@@ -66540,7 +2743,7 @@ L210:
             The leading dimension of the array VL.  LDVL >= 1; if
             JOBVL = 'V', LDVL >= N.
 
-    VR      (output) REAL array, dimension (LDVR,N)
+    VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
             If JOBVR = 'V', the right eigenvectors v(j) are stored one
             after another in the columns of VR, in the same order
             as their eigenvalues.
@@ -66555,7 +2758,7 @@ L210:
             The leading dimension of the array VR.  LDVR >= 1; if
             JOBVR = 'V', LDVR >= N.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -66609,9 +2812,9 @@ L210:
 	*info = -3;
     } else if (*lda < max(1,*n)) {
 	*info = -5;
-    } else if ((*ldvl < 1) || (wantvl && *ldvl < *n)) {
+    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
 	*info = -9;
-    } else if ((*ldvr < 1) || (wantvr && *ldvr < *n)) {
+    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
 	*info = -11;
     }
 
@@ -66622,33 +2825,33 @@ L210:
          as well as the preferred amount for good performance.
          NB refers to the optimal block size for the immediately
          following subroutine, as returned by ILAENV.
-         HSWORK refers to the workspace preferred by SHSEQR, as
+         HSWORK refers to the workspace preferred by DHSEQR, as
          calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
          the worst case.)
 */
 
     minwrk = 1;
-    if (*info == 0 && ((*lwork >= 1) || (lquery))) {
-	maxwrk = ((*n) << (1)) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1,
-		n, &c__0, (ftnlen)6, (ftnlen)1);
+    if (*info == 0 && (*lwork >= 1 || lquery)) {
+	maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, &
+		c__0, (ftnlen)6, (ftnlen)1);
 	if (! wantvl && ! wantvr) {
 /* Computing MAX */
 	    i__1 = 1, i__2 = *n * 3;
 	    minwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "SHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
+	    i__1 = ilaenv_(&c__8, "DHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
 		    6, (ftnlen)2);
 	    maxb = max(i__1,2);
 /*
    Computing MIN
    Computing MAX
 */
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "SHSEQR", "EN", n, &c__1, n, &
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "EN", n, &c__1, n, &
 		    c_n1, (ftnlen)6, (ftnlen)2);
 	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
 	    k = min(i__1,i__2);
 /* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = (*n) << (1);
+	    i__1 = k * (k + 2), i__2 = *n << 1;
 	    hswork = max(i__1,i__2);
 /* Computing MAX */
 	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
@@ -66656,43 +2859,43 @@ L210:
 	    maxwrk = max(i__1,i__2);
 	} else {
 /* Computing MAX */
-	    i__1 = 1, i__2 = (*n) << (2);
+	    i__1 = 1, i__2 = *n << 2;
 	    minwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = ((*n) << (1)) + (*n - 1) * ilaenv_(&c__1,
-		    "SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+	    i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "DOR"
+		    "GHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
 	    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "SHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
+	    i__1 = ilaenv_(&c__8, "DHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
 		    6, (ftnlen)2);
 	    maxb = max(i__1,2);
 /*
    Computing MIN
    Computing MAX
 */
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "SHSEQR", "SV", n, &c__1, n, &
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "SV", n, &c__1, n, &
 		    c_n1, (ftnlen)6, (ftnlen)2);
 	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
 	    k = min(i__1,i__2);
 /* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = (*n) << (1);
+	    i__1 = k * (k + 2), i__2 = *n << 1;
 	    hswork = max(i__1,i__2);
 /* Computing MAX */
 	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
 		    hswork;
 	    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = (*n) << (2);
+	    i__1 = maxwrk, i__2 = *n << 2;
 	    maxwrk = max(i__1,i__2);
 	}
-	work[1] = (real) maxwrk;
+	work[1] = (doublereal) maxwrk;
     }
     if (*lwork < minwrk && ! lquery) {
 	*info = -13;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGEEV ", &i__1);
+	xerbla_("DGEEV ", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -66706,18 +2909,18 @@ L210:
 
 /*     Get machine constants */
 
-    eps = slamch_("P");
-    smlnum = slamch_("S");
-    bignum = 1.f / smlnum;
-    slabad_(&smlnum, &bignum);
+    eps = PRECISION;
+    smlnum = SAFEMINIMUM;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
     smlnum = sqrt(smlnum) / eps;
-    bignum = 1.f / smlnum;
+    bignum = 1. / smlnum;
 
 /*     Scale A if max element outside range [SMLNUM,BIGNUM] */
 
-    anrm = slange_("M", n, n, &a[a_offset], lda, dum);
+    anrm = dlange_("M", n, n, &a[a_offset], lda, dum);
     scalea = FALSE_;
-    if (anrm > 0.f && anrm < smlnum) {
+    if (anrm > 0. && anrm < smlnum) {
 	scalea = TRUE_;
 	cscale = smlnum;
     } else if (anrm > bignum) {
@@ -66725,7 +2928,7 @@ L210:
 	cscale = bignum;
     }
     if (scalea) {
-	slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+	dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
 		ierr);
     }
 
@@ -66735,7 +2938,7 @@ L210:
 */
 
     ibal = 1;
-    sgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
+    dgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
 
 /*
        Reduce to upper Hessenberg form
@@ -66745,7 +2948,7 @@ L210:
     itau = ibal + *n;
     iwrk = itau + *n;
     i__1 = *lwork - iwrk + 1;
-    sgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+    dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
 	     &ierr);
 
     if (wantvl) {
@@ -66756,7 +2959,7 @@ L210:
 */
 
 	*(unsigned char *)side = 'L';
-	slacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
+	dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
 		;
 
 /*
@@ -66765,7 +2968,7 @@ L210:
 */
 
 	i__1 = *lwork - iwrk + 1;
-	sorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
+	dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
 		 &i__1, &ierr);
 
 /*
@@ -66775,7 +2978,7 @@ L210:
 
 	iwrk = itau;
 	i__1 = *lwork - iwrk + 1;
-	shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+	dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
 		vl[vl_offset], ldvl, &work[iwrk], &i__1, info);
 
 	if (wantvr) {
@@ -66786,7 +2989,7 @@ L210:
 */
 
 	    *(unsigned char *)side = 'B';
-	    slacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
+	    dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
 	}
 
     } else if (wantvr) {
@@ -66797,7 +3000,7 @@ L210:
 */
 
 	*(unsigned char *)side = 'R';
-	slacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
+	dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
 		;
 
 /*
@@ -66806,7 +3009,7 @@ L210:
 */
 
 	i__1 = *lwork - iwrk + 1;
-	sorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
+	dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
 		 &i__1, &ierr);
 
 /*
@@ -66816,7 +3019,7 @@ L210:
 
 	iwrk = itau;
 	i__1 = *lwork - iwrk + 1;
-	shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+	dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
 		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
 
     } else {
@@ -66828,24 +3031,24 @@ L210:
 
 	iwrk = itau;
 	i__1 = *lwork - iwrk + 1;
-	shseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+	dhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
 		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
     }
 
-/*     If INFO > 0 from SHSEQR, then quit */
+/*     If INFO > 0 from DHSEQR, then quit */
 
     if (*info > 0) {
 	goto L50;
     }
 
-    if ((wantvl) || (wantvr)) {
+    if (wantvl || wantvr) {
 
 /*
           Compute left and/or right eigenvectors
           (Workspace: need 4*N)
 */
 
-	strevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl,
+	dtrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl,
 		 &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr);
     }
 
@@ -66856,37 +3059,37 @@ L210:
           (Workspace: need N)
 */
 
-	sgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl,
+	dgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl,
 		 &ierr);
 
 /*        Normalize left eigenvectors and make largest component real */
 
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (wi[i__] == 0.f) {
-		scl = 1.f / snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
-		sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
-	    } else if (wi[i__] > 0.f) {
-		r__1 = snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
-		r__2 = snrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
-		scl = 1.f / slapy2_(&r__1, &r__2);
-		sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
-		sscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+	    if (wi[i__] == 0.) {
+		scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.) {
+		d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		scl = 1. / dlapy2_(&d__1, &d__2);
+		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
 		i__2 = *n;
 		for (k = 1; k <= i__2; ++k) {
 /* Computing 2nd power */
-		    r__1 = vl[k + i__ * vl_dim1];
+		    d__1 = vl[k + i__ * vl_dim1];
 /* Computing 2nd power */
-		    r__2 = vl[k + (i__ + 1) * vl_dim1];
-		    work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2;
+		    d__2 = vl[k + (i__ + 1) * vl_dim1];
+		    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
 /* L10: */
 		}
-		k = isamax_(n, &work[iwrk], &c__1);
-		slartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1],
+		k = idamax_(n, &work[iwrk], &c__1);
+		dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1],
 			&cs, &sn, &r__);
-		srot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) *
+		drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) *
 			vl_dim1 + 1], &c__1, &cs, &sn);
-		vl[k + (i__ + 1) * vl_dim1] = 0.f;
+		vl[k + (i__ + 1) * vl_dim1] = 0.;
 	    }
 /* L20: */
 	}
@@ -66899,37 +3102,37 @@ L210:
           (Workspace: need N)
 */
 
-	sgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr,
+	dgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr,
 		 &ierr);
 
 /*        Normalize right eigenvectors and make largest component real */
 
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (wi[i__] == 0.f) {
-		scl = 1.f / snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
-		sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
-	    } else if (wi[i__] > 0.f) {
-		r__1 = snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
-		r__2 = snrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
-		scl = 1.f / slapy2_(&r__1, &r__2);
-		sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
-		sscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+	    if (wi[i__] == 0.) {
+		scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.) {
+		d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		scl = 1. / dlapy2_(&d__1, &d__2);
+		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
 		i__2 = *n;
 		for (k = 1; k <= i__2; ++k) {
 /* Computing 2nd power */
-		    r__1 = vr[k + i__ * vr_dim1];
+		    d__1 = vr[k + i__ * vr_dim1];
 /* Computing 2nd power */
-		    r__2 = vr[k + (i__ + 1) * vr_dim1];
-		    work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2;
+		    d__2 = vr[k + (i__ + 1) * vr_dim1];
+		    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
 /* L30: */
 		}
-		k = isamax_(n, &work[iwrk], &c__1);
-		slartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1],
+		k = idamax_(n, &work[iwrk], &c__1);
+		dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1],
 			&cs, &sn, &r__);
-		srot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) *
+		drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) *
 			vr_dim1 + 1], &c__1, &cs, &sn);
-		vr[k + (i__ + 1) * vr_dim1] = 0.f;
+		vr[k + (i__ + 1) * vr_dim1] = 0.;
 	    }
 /* L40: */
 	}
@@ -66943,44 +3146,45 @@ L50:
 /* Computing MAX */
 	i__3 = *n - *info;
 	i__2 = max(i__3,1);
-	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info +
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info +
 		1], &i__2, &ierr);
 	i__1 = *n - *info;
 /* Computing MAX */
 	i__3 = *n - *info;
 	i__2 = max(i__3,1);
-	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info +
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info +
 		1], &i__2, &ierr);
 	if (*info > 0) {
 	    i__1 = ilo - 1;
-	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1],
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1],
 		    n, &ierr);
 	    i__1 = ilo - 1;
-	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1],
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1],
 		    n, &ierr);
 	}
     }
 
-    work[1] = (real) maxwrk;
+    work[1] = (doublereal) maxwrk;
     return 0;
 
-/*     End of SGEEV */
+/*     End of DGEEV */
 
-} /* sgeev_ */
+} /* dgeev_ */
 
-/* Subroutine */ int sgehd2_(integer *n, integer *ilo, integer *ihi, real *a,
-	integer *lda, real *tau, real *work, integer *info)
+/* Subroutine */ int dgehd2_(integer *n, integer *ilo, integer *ihi,
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+	integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__;
-    static real aii;
-    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
-	    integer *, real *, real *, integer *, real *), xerbla_(
-	    char *, integer *), slarfg_(integer *, real *, real *,
-	    integer *, real *);
+    static doublereal aii;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dlarfg_(integer *, doublereal *,
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
 
 
 /*
@@ -66993,7 +3197,7 @@ L50:
     Purpose
     =======
 
-    SGEHD2 reduces a real general matrix A to upper Hessenberg form H by
+    DGEHD2 reduces a real general matrix A to upper Hessenberg form H by
     an orthogonal similarity transformation:  Q' * A * Q = H .
 
     Arguments
@@ -67006,11 +3210,11 @@ L50:
     IHI     (input) INTEGER
             It is assumed that A is already upper triangular in rows
             and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to SGEBAL; otherwise they should be
+            set by a previous call to DGEBAL; otherwise they should be
             set to 1 and N respectively. See Further Details.
             1 <= ILO <= IHI <= max(1,N).
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the n by n general matrix to be reduced.
             On exit, the upper triangle and the first subdiagonal of A
             are overwritten with the upper Hessenberg matrix H, and the
@@ -67021,11 +3225,11 @@ L50:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
-    TAU     (output) REAL array, dimension (N-1)
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace) REAL array, dimension (N)
+    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
 
     INFO    (output) INTEGER
             = 0:  successful exit.
@@ -67081,16 +3285,16 @@ L50:
     *info = 0;
     if (*n < 0) {
 	*info = -1;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
 	*info = -2;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
 	*info = -3;
     } else if (*lda < max(1,*n)) {
 	*info = -5;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGEHD2", &i__1);
+	xerbla_("DGEHD2", &i__1);
 	return 0;
     }
 
@@ -67102,22 +3306,22 @@ L50:
 	i__2 = *ihi - i__;
 /* Computing MIN */
 	i__3 = i__ + 2;
-	slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+	dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
 		a_dim1], &c__1, &tau[i__]);
 	aii = a[i__ + 1 + i__ * a_dim1];
-	a[i__ + 1 + i__ * a_dim1] = 1.f;
+	a[i__ + 1 + i__ * a_dim1] = 1.;
 
 /*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
 
 	i__2 = *ihi - i__;
-	slarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+	dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
 		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
 
 /*        Apply H(i) to A(i+1:ihi,i+1:n) from the left */
 
 	i__2 = *ihi - i__;
 	i__3 = *n - i__;
-	slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+	dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
 		i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
 
 	a[i__ + 1 + i__ * a_dim1] = aii;
@@ -67126,31 +3330,34 @@ L50:
 
     return 0;
 
-/*     End of SGEHD2 */
+/*     End of DGEHD2 */
 
-} /* sgehd2_ */
+} /* dgehd2_ */
 
-/* Subroutine */ int sgehrd_(integer *n, integer *ilo, integer *ihi, real *a,
-	integer *lda, real *tau, real *work, integer *lwork, integer *info)
+/* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi,
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+	integer *lwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
     static integer i__;
-    static real t[4160]	/* was [65][64] */;
+    static doublereal t[4160]	/* was [65][64] */;
     static integer ib;
-    static real ei;
-    static integer nb, nh, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *), sgehd2_(integer *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *), slarfb_(
-	    char *, char *, char *, char *, integer *, integer *, integer *,
-	    real *, integer *, real *, integer *, real *, integer *, real *,
-	    integer *), slahrd_(integer *,
-	    integer *, integer *, real *, integer *, real *, real *, integer *
-	    , real *, integer *), xerbla_(char *, integer *);
+    static doublereal ei;
+    static integer nb, nh, nx, iws;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int dgehd2_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *),
+	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, integer *), dlahrd_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
     static integer ldwork, lwkopt;
@@ -67167,7 +3374,7 @@ L50:
     Purpose
     =======
 
-    SGEHRD reduces a real general matrix A to upper Hessenberg form H by
+    DGEHRD reduces a real general matrix A to upper Hessenberg form H by
     an orthogonal similarity transformation:  Q' * A * Q = H .
 
     Arguments
@@ -67180,11 +3387,11 @@ L50:
     IHI     (input) INTEGER
             It is assumed that A is already upper triangular in rows
             and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to SGEBAL; otherwise they should be
+            set by a previous call to DGEBAL; otherwise they should be
             set to 1 and N respectively. See Further Details.
             1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the N-by-N general matrix to be reduced.
             On exit, the upper triangle and the first subdiagonal of A
             are overwritten with the upper Hessenberg matrix H, and the
@@ -67195,12 +3402,12 @@ L50:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
-    TAU     (output) REAL array, dimension (N-1)
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
             The scalar factors of the elementary reflectors (see Further
             Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
             zero.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -67266,17 +3473,17 @@ L50:
     /* Function Body */
     *info = 0;
 /* Computing MIN */
-    i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
+    i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
 	    ftnlen)6, (ftnlen)1);
     nb = min(i__1,i__2);
     lwkopt = *n * nb;
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     lquery = *lwork == -1;
     if (*n < 0) {
 	*info = -1;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
 	*info = -2;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
 	*info = -3;
     } else if (*lda < max(1,*n)) {
 	*info = -5;
@@ -67285,7 +3492,7 @@ L50:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGEHRD", &i__1);
+	xerbla_("DGEHRD", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -67295,12 +3502,12 @@ L50:
 
     i__1 = *ilo - 1;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	tau[i__] = 0.f;
+	tau[i__] = 0.;
 /* L10: */
     }
     i__1 = *n - 1;
     for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
-	tau[i__] = 0.f;
+	tau[i__] = 0.;
 /* L20: */
     }
 
@@ -67308,7 +3515,7 @@ L50:
 
     nh = *ihi - *ilo + 1;
     if (nh <= 1) {
-	work[1] = 1.f;
+	work[1] = 1.;
 	return 0;
     }
 
@@ -67317,7 +3524,7 @@ L50:
 
    Computing MIN
 */
-    i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
+    i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
 	    ftnlen)6, (ftnlen)1);
     nb = min(i__1,i__2);
     nbmin = 2;
@@ -67330,7 +3537,7 @@ L50:
 
    Computing MAX
 */
-	i__1 = nb, i__2 = ilaenv_(&c__3, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
+	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = max(i__1,i__2);
 	if (nx < nh) {
@@ -67347,7 +3554,7 @@ L50:
 
    Computing MAX
 */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEHRD", " ", n, ilo, ihi, &
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = max(i__1,i__2);
 		if (*lwork >= *n * nbmin) {
@@ -67360,7 +3567,7 @@ L50:
     }
     ldwork = *n;
 
-    if ((nb < nbmin) || (nb >= nh)) {
+    if (nb < nbmin || nb >= nh) {
 
 /*        Use unblocked code below */
 
@@ -67383,7 +3590,7 @@ L50:
              which performs the reduction, and also the matrix Y = A*V*T
 */
 
-	    slahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
+	    dlahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
 		    c__65, &work[1], &ldwork);
 
 /*
@@ -67393,11 +3600,11 @@ L50:
 */
 
 	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
-	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.f;
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.;
 	    i__3 = *ihi - i__ - ib + 1;
-	    sgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b1290, &
+	    dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b151, &
 		    work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
-		    c_b1011, &a[(i__ + ib) * a_dim1 + 1], lda);
+		    c_b15, &a[(i__ + ib) * a_dim1 + 1], lda);
 	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
 
 /*
@@ -67407,7 +3614,7 @@ L50:
 
 	    i__3 = *ihi - i__;
 	    i__4 = *n - i__ - ib + 1;
-	    slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+	    dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
 		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[
 		    i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork);
 /* L30: */
@@ -67416,28 +3623,28 @@ L50:
 
 /*     Use unblocked code to reduce the rest of the matrix */
 
-    sgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
-    work[1] = (real) iws;
+    dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    work[1] = (doublereal) iws;
 
     return 0;
 
-/*     End of SGEHRD */
+/*     End of DGEHRD */
 
-} /* sgehrd_ */
+} /* dgehrd_ */
 
-/* Subroutine */ int sgelq2_(integer *m, integer *n, real *a, integer *lda,
-	real *tau, real *work, integer *info)
+/* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__, k;
-    static real aii;
-    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
-	    integer *, real *, real *, integer *, real *), xerbla_(
-	    char *, integer *), slarfg_(integer *, real *, real *,
-	    integer *, real *);
+    static doublereal aii;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dlarfg_(integer *, doublereal *,
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
 
 
 /*
@@ -67450,7 +3657,7 @@ L50:
     Purpose
     =======
 
-    SGELQ2 computes an LQ factorization of a real m by n matrix A:
+    DGELQ2 computes an LQ factorization of a real m by n matrix A:
     A = L * Q.
 
     Arguments
@@ -67462,7 +3669,7 @@ L50:
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the m by n matrix A.
             On exit, the elements on and below the diagonal of the array
             contain the m by min(m,n) lower trapezoidal matrix L (L is
@@ -67473,11 +3680,11 @@ L50:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
 
-    TAU     (output) REAL array, dimension (min(M,N))
+    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace) REAL array, dimension (M)
+    WORK    (workspace) DOUBLE PRECISION array, dimension (M)
 
     INFO    (output) INTEGER
             = 0: successful exit
@@ -67522,7 +3729,7 @@ L50:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGELQ2", &i__1);
+	xerbla_("DGELQ2", &i__1);
 	return 0;
     }
 
@@ -67536,17 +3743,17 @@ L50:
 	i__2 = *n - i__ + 1;
 /* Computing MIN */
 	i__3 = i__ + 1;
-	slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1]
+	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1]
 		, lda, &tau[i__]);
 	if (i__ < *m) {
 
 /*           Apply H(i) to A(i+1:m,i:n) from the right */
 
 	    aii = a[i__ + i__ * a_dim1];
-	    a[i__ + i__ * a_dim1] = 1.f;
+	    a[i__ + i__ * a_dim1] = 1.;
 	    i__2 = *m - i__;
 	    i__3 = *n - i__ + 1;
-	    slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
+	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
 		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
 	    a[i__ + i__ * a_dim1] = aii;
 	}
@@ -67554,26 +3761,26 @@ L50:
     }
     return 0;
 
-/*     End of SGELQ2 */
+/*     End of DGELQ2 */
 
-} /* sgelq2_ */
+} /* dgelq2_ */
 
-/* Subroutine */ int sgelqf_(integer *m, integer *n, real *a, integer *lda,
-	real *tau, real *work, integer *lwork, integer *info)
+/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
     static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int sgelq2_(integer *, integer *, real *, integer
-	    *, real *, real *, integer *), slarfb_(char *, char *, char *,
-	    char *, integer *, integer *, integer *, real *, integer *, real *
-	    , integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *), dlarfb_(char *,
+	     char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
-	    real *, integer *, real *, real *, integer *);
     static integer ldwork, lwkopt;
     static logical lquery;
 
@@ -67588,7 +3795,7 @@ L50:
     Purpose
     =======
 
-    SGELQF computes an LQ factorization of a real M-by-N matrix A:
+    DGELQF computes an LQ factorization of a real M-by-N matrix A:
     A = L * Q.
 
     Arguments
@@ -67600,7 +3807,7 @@ L50:
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the M-by-N matrix A.
             On exit, the elements on and below the diagonal of the array
             contain the m-by-min(m,n) lower trapezoidal matrix L (L is
@@ -67611,11 +3818,11 @@ L50:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
 
-    TAU     (output) REAL array, dimension (min(M,N))
+    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -67662,10 +3869,10 @@ L50:
 
     /* Function Body */
     *info = 0;
-    nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+    nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
     lwkopt = *m * nb;
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     lquery = *lwork == -1;
     if (*m < 0) {
 	*info = -1;
@@ -67678,7 +3885,7 @@ L50:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGELQF", &i__1);
+	xerbla_("DGELQF", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -67688,7 +3895,7 @@ L50:
 
     k = min(*m,*n);
     if (k == 0) {
-	work[1] = 1.f;
+	work[1] = 1.;
 	return 0;
     }
 
@@ -67702,7 +3909,7 @@ L50:
 
    Computing MAX
 */
-	i__1 = 0, i__2 = ilaenv_(&c__3, "SGELQF", " ", m, n, &c_n1, &c_n1, (
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = max(i__1,i__2);
 	if (nx < k) {
@@ -67720,7 +3927,7 @@ L50:
 
 		nb = *lwork / ldwork;
 /* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "SGELQF", " ", m, n, &c_n1, &
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = max(i__1,i__2);
 	    }
@@ -67744,7 +3951,7 @@ L50:
 */
 
 	    i__3 = *n - i__ + 1;
-	    sgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+	    dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
 		    1], &iinfo);
 	    if (i__ + ib <= *m) {
 
@@ -67754,14 +3961,14 @@ L50:
 */
 
 		i__3 = *n - i__ + 1;
-		slarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ *
+		dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ *
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 
 /*              Apply H to A(i+ib:m,i:n) from the right */
 
 		i__3 = *m - i__ - ib + 1;
 		i__4 = *n - i__ + 1;
-		slarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
+		dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
 			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
 			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
 			1], &ldwork);
@@ -67777,20 +3984,21 @@ L50:
     if (i__ <= k) {
 	i__2 = *m - i__ + 1;
 	i__1 = *n - i__ + 1;
-	sgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+	dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
 		, &iinfo);
     }
 
-    work[1] = (real) iws;
+    work[1] = (doublereal) iws;
     return 0;
 
-/*     End of SGELQF */
+/*     End of DGELQF */
 
-} /* sgelqf_ */
+} /* dgelqf_ */
 
-/* Subroutine */ int sgelsd_(integer *m, integer *n, integer *nrhs, real *a,
-	integer *lda, real *b, integer *ldb, real *s, real *rcond, integer *
-	rank, real *work, integer *lwork, integer *iwork, integer *info)
+/* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs,
+	doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *
+	s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork,
+	 integer *iwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
@@ -67800,45 +4008,46 @@ L50:
 
     /* Local variables */
     static integer ie, il, mm;
-    static real eps, anrm, bnrm;
+    static doublereal eps, anrm, bnrm;
     static integer itau, nlvl, iascl, ibscl;
-    static real sfmin;
+    static doublereal sfmin;
     static integer minmn, maxmn, itaup, itauq, mnthr, nwork;
-    extern /* Subroutine */ int slabad_(real *, real *), sgebrd_(integer *,
-	    integer *, real *, integer *, real *, real *, real *, real *,
-	    real *, integer *, integer *);
-    extern doublereal slamch_(char *), slange_(char *, integer *,
-	    integer *, real *, integer *, real *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebrd_(
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *);
+    extern doublereal dlamch_(char *), dlange_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *, integer *),
+	    dlalsd_(char *, integer *, integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *, integer *), dlascl_(char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    integer *, doublereal *, integer *, integer *), dgeqrf_(
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *), xerbla_(char *,
+	    integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    static real bignum;
-    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer
-	    *, real *, real *, integer *, integer *), slalsd_(char *, integer
-	    *, integer *, integer *, real *, real *, real *, integer *, real *
-	    , integer *, real *, integer *, integer *), slascl_(char *
-	    , integer *, integer *, real *, real *, integer *, integer *,
-	    real *, integer *, integer *);
+    static doublereal bignum;
+    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, integer *);
     static integer wlalsd;
-    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer
-	    *, real *, real *, integer *, integer *), slacpy_(char *, integer
-	    *, integer *, real *, integer *, real *, integer *),
-	    slaset_(char *, integer *, integer *, real *, real *, real *,
-	    integer *);
+    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
     static integer ldwork;
-    extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *,
-	    integer *, integer *, real *, integer *, real *, real *, integer *
-	    , real *, integer *, integer *);
+    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
     static integer minwrk, maxwrk;
-    static real smlnum;
-    extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *, real *,
-	    integer *, integer *);
+    static doublereal smlnum;
     static logical lquery;
     static integer smlsiz;
-    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *, real *,
-	    integer *, integer *);
 
 
 /*
@@ -67851,7 +4060,7 @@ L50:
     Purpose
     =======
 
-    SGELSD computes the minimum-norm solution to a real linear least
+    DGELSD computes the minimum-norm solution to a real linear least
     squares problem:
         minimize 2-norm(| b - A*x |)
     using the singular value decomposition (SVD) of A. A is an M-by-N
@@ -67894,14 +4103,14 @@ L50:
             The number of right hand sides, i.e., the number of columns
             of the matrices B and X. NRHS >= 0.
 
-    A       (input) REAL array, dimension (LDA,N)
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the M-by-N matrix A.
             On exit, A has been destroyed.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
 
-    B       (input/output) REAL array, dimension (LDB,NRHS)
+    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
             On entry, the M-by-NRHS right hand side matrix B.
             On exit, B is overwritten by the N-by-NRHS solution
             matrix X.  If m >= n and RANK = n, the residual
@@ -67911,11 +4120,11 @@ L50:
     LDB     (input) INTEGER
             The leading dimension of the array B. LDB >= max(1,max(M,N)).
 
-    S       (output) REAL array, dimension (min(M,N))
+    S       (output) DOUBLE PRECISION array, dimension (min(M,N))
             The singular values of A in decreasing order.
             The condition number of A in the 2-norm = S(1)/S(min(m,n)).
 
-    RCOND   (input) REAL
+    RCOND   (input) DOUBLE PRECISION
             RCOND is used to determine the effective rank of A.
             Singular values S(i) <= RCOND*S(1) are treated as zero.
             If RCOND < 0, machine precision is used instead.
@@ -67924,7 +4133,7 @@ L50:
             The effective rank of A, i.e., the number of singular values
             which are greater than RCOND*S(1).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -67946,7 +4155,6 @@ L50:
             this value as the first entry of the WORK array, and no error
             message related to LWORK is issued by XERBLA.
 
-
     IWORK   (workspace) INTEGER array, dimension (LIWORK)
             LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
             where MINMN = MIN( M,N ).
@@ -67987,7 +4195,7 @@ L50:
     *info = 0;
     minmn = min(*m,*n);
     maxmn = max(*m,*n);
-    mnthr = ilaenv_(&c__6, "SGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
+    mnthr = ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
     lquery = *lwork == -1;
     if (*m < 0) {
@@ -68002,7 +4210,7 @@ L50:
 	*info = -7;
     }
 
-    smlsiz = ilaenv_(&c__9, "SGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
+    smlsiz = ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
 	    ftnlen)6, (ftnlen)1);
 
 /*
@@ -68017,7 +4225,8 @@ L50:
     minwrk = 1;
     minmn = max(1,minmn);
 /* Computing MAX */
-    i__1 = (integer) (log((real) minmn / (real) (smlsiz + 1)) / log(2.f)) + 1;
+    i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) /
+	    log(2.)) + 1;
     nlvl = max(i__1,0);
 
     if (*info == 0) {
@@ -68029,11 +4238,11 @@ L50:
 
 	    mm = *n;
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m,
+	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m,
 		    n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 	    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "SORMQR", "LT",
+	    i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "DORMQR", "LT",
 		    m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
 	    maxwrk = max(i__1,i__2);
 	}
@@ -68044,21 +4253,21 @@ L50:
 
    Computing MAX
 */
-	    i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "SGEBRD"
+	    i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "DGEBRD"
 		    , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 	    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "SORMBR",
+	    i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "DORMBR",
 		    "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
 	    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "SORMBR",
+	    i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "DORMBR",
 		     "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
 	    maxwrk = max(i__1,i__2);
 /* Computing 2nd power */
 	    i__1 = smlsiz + 1;
-	    wlalsd = *n * 9 + ((*n) << (1)) * smlsiz + ((*n) << (3)) * nlvl +
-		    *n * *nrhs + i__1 * i__1;
+	    wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * *
+		    nrhs + i__1 * i__1;
 /* Computing MAX */
 	    i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
 	    maxwrk = max(i__1,i__2);
@@ -68070,8 +4279,8 @@ L50:
 	if (*n > *m) {
 /* Computing 2nd power */
 	    i__1 = smlsiz + 1;
-	    wlalsd = *m * 9 + ((*m) << (1)) * smlsiz + ((*m) << (3)) * nlvl +
-		    *m * *nrhs + i__1 * i__1;
+	    wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * *
+		    nrhs + i__1 * i__1;
 	    if (*n >= mnthr) {
 
 /*
@@ -68079,21 +4288,21 @@ L50:
                 than rows.
 */
 
-		maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1,
+		maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1,
 			&c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + ((*m) << (1))
-			* ilaenv_(&c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) *
+			ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (
 			ftnlen)6, (ftnlen)1);
 		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + *nrhs *
-			ilaenv_(&c__1, "SORMBR", "QLT", m, nrhs, m, &c_n1, (
-			ftnlen)6, (ftnlen)3);
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(&
+			c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, (
+			ftnlen)3);
 		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + (*m - 1) *
-			ilaenv_(&c__1, "SORMBR", "PLN", m, nrhs, m, &c_n1, (
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) *
+			ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1, (
 			ftnlen)6, (ftnlen)3);
 		maxwrk = max(i__1,i__2);
 		if (*nrhs > 1) {
@@ -68102,28 +4311,28 @@ L50:
 		    maxwrk = max(i__1,i__2);
 		} else {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = *m * *m + ((*m) << (1));
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
 		    maxwrk = max(i__1,i__2);
 		}
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "SORMLQ",
+		i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "DORMLQ",
 			"LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2);
 		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + wlalsd;
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd;
 		maxwrk = max(i__1,i__2);
 	    } else {
 
 /*              Path 2 - remaining underdetermined cases. */
 
-		maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "SGEBRD", " ", m,
+		maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "DGEBRD", " ", m,
 			 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "SORMBR"
+		i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "DORMBR"
 			, "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR",
+		i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR",
 			"PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3);
 		maxwrk = max(i__1,i__2);
 /* Computing MAX */
@@ -68136,7 +4345,7 @@ L50:
 	    minwrk = max(i__1,i__2);
 	}
 	minwrk = min(minwrk,maxwrk);
-	work[1] = (real) maxwrk;
+	work[1] = (doublereal) maxwrk;
 	if (*lwork < minwrk && ! lquery) {
 	    *info = -12;
 	}
@@ -68144,7 +4353,7 @@ L50:
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGELSD", &i__1);
+	xerbla_("DGELSD", &i__1);
 	return 0;
     } else if (lquery) {
 	goto L10;
@@ -68152,65 +4361,64 @@ L50:
 
 /*     Quick return if possible. */
 
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	*rank = 0;
 	return 0;
     }
 
 /*     Get machine parameters. */
 
-    eps = slamch_("P");
-    sfmin = slamch_("S");
+    eps = PRECISION;
+    sfmin = SAFEMINIMUM;
     smlnum = sfmin / eps;
-    bignum = 1.f / smlnum;
-    slabad_(&smlnum, &bignum);
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
 
 /*     Scale A if max entry outside range [SMLNUM,BIGNUM]. */
 
-    anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]);
+    anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]);
     iascl = 0;
-    if (anrm > 0.f && anrm < smlnum) {
+    if (anrm > 0. && anrm < smlnum) {
 
 /*        Scale matrix norm up to SMLNUM. */
 
-	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda,
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda,
 		info);
 	iascl = 1;
     } else if (anrm > bignum) {
 
 /*        Scale matrix norm down to BIGNUM. */
 
-	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda,
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda,
 		info);
 	iascl = 2;
-    } else if (anrm == 0.f) {
+    } else if (anrm == 0.) {
 
 /*        Matrix all zero. Return zero solution. */
 
 	i__1 = max(*m,*n);
-	slaset_("F", &i__1, nrhs, &c_b320, &c_b320, &b[b_offset], ldb);
-	slaset_("F", &minmn, &c__1, &c_b320, &c_b320, &s[1], &c__1)
-		;
+	dlaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[b_offset], ldb);
+	dlaset_("F", &minmn, &c__1, &c_b29, &c_b29, &s[1], &c__1);
 	*rank = 0;
 	goto L10;
     }
 
 /*     Scale B if max entry outside range [SMLNUM,BIGNUM]. */
 
-    bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
+    bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
     ibscl = 0;
-    if (bnrm > 0.f && bnrm < smlnum) {
+    if (bnrm > 0. && bnrm < smlnum) {
 
 /*        Scale matrix norm up to SMLNUM. */
 
-	slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
+	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
 		 info);
 	ibscl = 1;
     } else if (bnrm > bignum) {
 
 /*        Scale matrix norm down to BIGNUM. */
 
-	slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
+	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
 		 info);
 	ibscl = 2;
     }
@@ -68219,7 +4427,7 @@ L50:
 
     if (*m < *n) {
 	i__1 = *n - *m;
-	slaset_("F", &i__1, nrhs, &c_b320, &c_b320, &b[*m + 1 + b_dim1], ldb);
+	dlaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[*m + 1 + b_dim1], ldb);
     }
 
 /*     Overdetermined case. */
@@ -68243,7 +4451,7 @@ L50:
 */
 
 	    i__1 = *lwork - nwork + 1;
-	    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+	    dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
 		     info);
 
 /*
@@ -68252,7 +4460,7 @@ L50:
 */
 
 	    i__1 = *lwork - nwork + 1;
-	    sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
+	    dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
 		    b_offset], ldb, &work[nwork], &i__1, info);
 
 /*           Zero out below R. */
@@ -68260,7 +4468,7 @@ L50:
 	    if (*n > 1) {
 		i__1 = *n - 1;
 		i__2 = *n - 1;
-		slaset_("L", &i__1, &i__2, &c_b320, &c_b320, &a[a_dim1 + 2],
+		dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &a[a_dim1 + 2],
 			lda);
 	    }
 	}
@@ -68276,7 +4484,7 @@ L50:
 */
 
 	i__1 = *lwork - nwork + 1;
-	sgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+	dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
 		work[itaup], &work[nwork], &i__1, info);
 
 /*
@@ -68285,12 +4493,12 @@ L50:
 */
 
 	i__1 = *lwork - nwork + 1;
-	sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq],
+	dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq],
 		&b[b_offset], ldb, &work[nwork], &i__1, info);
 
 /*        Solve the bidiagonal least squares problem. */
 
-	slalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb,
+	dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb,
 		rcond, rank, &work[nwork], &iwork[1], info);
 	if (*info != 0) {
 	    goto L10;
@@ -68299,15 +4507,14 @@ L50:
 /*        Multiply B by right bidiagonalizing vectors of R. */
 
 	i__1 = *lwork - nwork + 1;
-	sormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
+	dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
 		b[b_offset], ldb, &work[nwork], &i__1, info);
 
     } else /* if(complicated condition) */ {
 /* Computing MAX */
-	i__1 = *m, i__2 = ((*m) << (1)) - 4, i__1 = max(i__1,i__2), i__1 =
-		max(i__1,*nrhs), i__2 = *n - *m * 3;
-	if (*n >= mnthr && *lwork >= ((*m) << (2)) + *m * *m + max(i__1,i__2))
-		 {
+	i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max(
+		i__1,*nrhs), i__2 = *n - *m * 3;
+	if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) {
 
 /*
           Path 2a - underdetermined, with many more columns than rows
@@ -68319,10 +4526,10 @@ L50:
    Computing MAX
    Computing MAX
 */
-	    i__3 = *m, i__4 = ((*m) << (1)) - 4, i__3 = max(i__3,i__4), i__3 =
-		     max(i__3,*nrhs), i__4 = *n - *m * 3;
-	    i__1 = ((*m) << (2)) + *m * *lda + max(i__3,i__4), i__2 = *m * *
-		    lda + *m + *m * *nrhs;
+	    i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 =
+		    max(i__3,*nrhs), i__4 = *n - *m * 3;
+	    i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda +
+		    *m + *m * *nrhs;
 	    if (*lwork >= max(i__1,i__2)) {
 		ldwork = *lda;
 	    }
@@ -68335,16 +4542,16 @@ L50:
 */
 
 	    i__1 = *lwork - nwork + 1;
-	    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+	    dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
 		     info);
 	    il = nwork;
 
 /*        Copy L to WORK(IL), zeroing out above its diagonal. */
 
-	    slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
+	    dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
 	    i__1 = *m - 1;
 	    i__2 = *m - 1;
-	    slaset_("U", &i__1, &i__2, &c_b320, &c_b320, &work[il + ldwork], &
+	    dlaset_("U", &i__1, &i__2, &c_b29, &c_b29, &work[il + ldwork], &
 		    ldwork);
 	    ie = il + ldwork * *m;
 	    itauq = ie + *m;
@@ -68357,7 +4564,7 @@ L50:
 */
 
 	    i__1 = *lwork - nwork + 1;
-	    sgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq],
+	    dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq],
 		    &work[itaup], &work[nwork], &i__1, info);
 
 /*
@@ -68366,12 +4573,12 @@ L50:
 */
 
 	    i__1 = *lwork - nwork + 1;
-	    sormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
+	    dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
 		    itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);
 
 /*        Solve the bidiagonal least squares problem. */
 
-	    slalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
+	    dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
 		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
 	    if (*info != 0) {
 		goto L10;
@@ -68380,13 +4587,13 @@ L50:
 /*        Multiply B by right bidiagonalizing vectors of L. */
 
 	    i__1 = *lwork - nwork + 1;
-	    sormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
+	    dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
 		    itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);
 
 /*        Zero out below first M rows of B. */
 
 	    i__1 = *n - *m;
-	    slaset_("F", &i__1, nrhs, &c_b320, &c_b320, &b[*m + 1 + b_dim1],
+	    dlaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[*m + 1 + b_dim1],
 		    ldb);
 	    nwork = itau + *m;
 
@@ -68396,7 +4603,7 @@ L50:
 */
 
 	    i__1 = *lwork - nwork + 1;
-	    sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
+	    dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
 		    b_offset], ldb, &work[nwork], &i__1, info);
 
 	} else {
@@ -68414,7 +4621,7 @@ L50:
 */
 
 	    i__1 = *lwork - nwork + 1;
-	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
 		    work[itaup], &work[nwork], &i__1, info);
 
 /*
@@ -68423,12 +4630,12 @@ L50:
 */
 
 	    i__1 = *lwork - nwork + 1;
-	    sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
+	    dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
 		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
 
 /*        Solve the bidiagonal least squares problem. */
 
-	    slalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
+	    dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
 		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
 	    if (*info != 0) {
 		goto L10;
@@ -68437,7 +4644,7 @@ L50:
 /*        Multiply B by right bidiagonalizing vectors of A. */
 
 	    i__1 = *lwork - nwork + 1;
-	    sormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
+	    dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
 		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
 
 	}
@@ -68446,45 +4653,45 @@ L50:
 /*     Undo scaling. */
 
     if (iascl == 1) {
-	slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
 		 info);
-	slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+	dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
 		minmn, info);
     } else if (iascl == 2) {
-	slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
 		 info);
-	slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+	dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
 		minmn, info);
     }
     if (ibscl == 1) {
-	slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
+	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
 		 info);
     } else if (ibscl == 2) {
-	slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
+	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
 		 info);
     }
 
 L10:
-    work[1] = (real) maxwrk;
+    work[1] = (doublereal) maxwrk;
     return 0;
 
-/*     End of SGELSD */
+/*     End of DGELSD */
 
-} /* sgelsd_ */
+} /* dgelsd_ */
 
-/* Subroutine */ int sgeqr2_(integer *m, integer *n, real *a, integer *lda,
-	real *tau, real *work, integer *info)
+/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__, k;
-    static real aii;
-    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
-	    integer *, real *, real *, integer *, real *), xerbla_(
-	    char *, integer *), slarfg_(integer *, real *, real *,
-	    integer *, real *);
+    static doublereal aii;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dlarfg_(integer *, doublereal *,
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
 
 
 /*
@@ -68497,7 +4704,7 @@ L10:
     Purpose
     =======
 
-    SGEQR2 computes a QR factorization of a real m by n matrix A:
+    DGEQR2 computes a QR factorization of a real m by n matrix A:
     A = Q * R.
 
     Arguments
@@ -68509,7 +4716,7 @@ L10:
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the m by n matrix A.
             On exit, the elements on and above the diagonal of the array
             contain the min(m,n) by n upper trapezoidal matrix R (R is
@@ -68520,11 +4727,11 @@ L10:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
 
-    TAU     (output) REAL array, dimension (min(M,N))
+    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace) REAL array, dimension (N)
+    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
 
     INFO    (output) INTEGER
             = 0: successful exit
@@ -68569,7 +4776,7 @@ L10:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGEQR2", &i__1);
+	xerbla_("DGEQR2", &i__1);
 	return 0;
     }
 
@@ -68583,17 +4790,17 @@ L10:
 	i__2 = *m - i__ + 1;
 /* Computing MIN */
 	i__3 = i__ + 1;
-	slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1]
+	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1]
 		, &c__1, &tau[i__]);
 	if (i__ < *n) {
 
 /*           Apply H(i) to A(i:m,i+1:n) from the left */
 
 	    aii = a[i__ + i__ * a_dim1];
-	    a[i__ + i__ * a_dim1] = 1.f;
+	    a[i__ + i__ * a_dim1] = 1.;
 	    i__2 = *m - i__ + 1;
 	    i__3 = *n - i__;
-	    slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
+	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
 		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
 	    a[i__ + i__ * a_dim1] = aii;
 	}
@@ -68601,26 +4808,26 @@ L10:
     }
     return 0;
 
-/*     End of SGEQR2 */
+/*     End of DGEQR2 */
 
-} /* sgeqr2_ */
+} /* dgeqr2_ */
 
-/* Subroutine */ int sgeqrf_(integer *m, integer *n, real *a, integer *lda,
-	real *tau, real *work, integer *lwork, integer *info)
+/* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
     static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer
-	    *, real *, real *, integer *), slarfb_(char *, char *, char *,
-	    char *, integer *, integer *, integer *, real *, integer *, real *
-	    , integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *), dlarfb_(char *,
+	     char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
-	    real *, integer *, real *, real *, integer *);
     static integer ldwork, lwkopt;
     static logical lquery;
 
@@ -68635,7 +4842,7 @@ L10:
     Purpose
     =======
 
-    SGEQRF computes a QR factorization of a real M-by-N matrix A:
+    DGEQRF computes a QR factorization of a real M-by-N matrix A:
     A = Q * R.
 
     Arguments
@@ -68647,7 +4854,7 @@ L10:
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the M-by-N matrix A.
             On exit, the elements on and above the diagonal of the array
             contain the min(M,N)-by-N upper trapezoidal matrix R (R is
@@ -68659,11 +4866,11 @@ L10:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
 
-    TAU     (output) REAL array, dimension (min(M,N))
+    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -68710,10 +4917,10 @@ L10:
 
     /* Function Body */
     *info = 0;
-    nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
     lwkopt = *n * nb;
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     lquery = *lwork == -1;
     if (*m < 0) {
 	*info = -1;
@@ -68726,7 +4933,7 @@ L10:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGEQRF", &i__1);
+	xerbla_("DGEQRF", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -68736,7 +4943,7 @@ L10:
 
     k = min(*m,*n);
     if (k == 0) {
-	work[1] = 1.f;
+	work[1] = 1.;
 	return 0;
     }
 
@@ -68750,7 +4957,7 @@ L10:
 
    Computing MAX
 */
-	i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, (
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = max(i__1,i__2);
 	if (nx < k) {
@@ -68768,7 +4975,7 @@ L10:
 
 		nb = *lwork / ldwork;
 /* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, &
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, &
 			c_n1, (ftnlen)6, (ftnlen)1);
 		nbmin = max(i__1,i__2);
 	    }
@@ -68792,7 +4999,7 @@ L10:
 */
 
 	    i__3 = *m - i__ + 1;
-	    sgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+	    dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
 		    1], &iinfo);
 	    if (i__ + ib <= *n) {
 
@@ -68802,14 +5009,14 @@ L10:
 */
 
 		i__3 = *m - i__ + 1;
-		slarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ *
+		dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ *
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 
 /*              Apply H' to A(i:m,i+ib:n) from the left */
 
 		i__3 = *m - i__ + 1;
 		i__4 = *n - i__ - ib + 1;
-		slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+		dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
 			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
 			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib
 			+ 1], &ldwork);
@@ -68825,20 +5032,21 @@ L10:
     if (i__ <= k) {
 	i__2 = *m - i__ + 1;
 	i__1 = *n - i__ + 1;
-	sgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+	dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
 		, &iinfo);
     }
 
-    work[1] = (real) iws;
+    work[1] = (doublereal) iws;
     return 0;
 
-/*     End of SGEQRF */
+/*     End of DGEQRF */
 
-} /* sgeqrf_ */
+} /* dgeqrf_ */
 
-/* Subroutine */ int sgesdd_(char *jobz, integer *m, integer *n, real *a,
-	integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt,
-	 real *work, integer *lwork, integer *iwork, integer *info)
+/* Subroutine */ int dgesdd_(char *jobz, integer *m, integer *n, doublereal *
+	a, integer *lda, doublereal *s, doublereal *u, integer *ldu,
+	doublereal *vt, integer *ldvt, doublereal *work, integer *lwork,
+	integer *iwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
@@ -68849,53 +5057,50 @@ L10:
 
     /* Local variables */
     static integer i__, ie, il, ir, iu, blk;
-    static real dum[1], eps;
+    static doublereal dum[1], eps;
     static integer ivt, iscl;
-    static real anrm;
+    static doublereal anrm;
     static integer idum[1], ierr, itau;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
     extern logical lsame_(char *, char *);
-    static integer chunk;
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
-    static integer minmn, wrkbl, itaup, itauq, mnthr;
+    static integer chunk, minmn, wrkbl, itaup, itauq, mnthr;
     static logical wntqa;
     static integer nwork;
     static logical wntqn, wntqo, wntqs;
+    extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal
+	    *, doublereal *, doublereal *, integer *, doublereal *, integer *,
+	     doublereal *, integer *, doublereal *, integer *, integer *), dgebrd_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *);
+    extern doublereal dlamch_(char *), dlange_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *);
     static integer bdspac;
-    extern /* Subroutine */ int sbdsdc_(char *, char *, integer *, real *,
-	    real *, real *, integer *, real *, integer *, real *, integer *,
-	    real *, integer *, integer *), sgebrd_(integer *,
-	    integer *, real *, integer *, real *, real *, real *, real *,
-	    real *, integer *, integer *);
-    extern doublereal slamch_(char *), slange_(char *, integer *,
-	    integer *, real *, integer *, real *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *, integer *),
+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, integer *, doublereal *, integer *, integer *),
+	     dgeqrf_(integer *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *, integer *), dlacpy_(char *,
+	     integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *), dlaset_(char *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *),
+	    xerbla_(char *, integer *), dorgbr_(char *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    static real bignum;
-    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer
-	    *, real *, real *, integer *, integer *), slascl_(char *, integer
-	    *, integer *, real *, real *, integer *, integer *, real *,
-	    integer *, integer *), sgeqrf_(integer *, integer *, real
-	    *, integer *, real *, real *, integer *, integer *), slacpy_(char
-	    *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *,
-	    real *, integer *), sorgbr_(char *, integer *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *, integer *
-	    );
-    static integer ldwrkl;
-    extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *,
-	    integer *, integer *, real *, integer *, real *, real *, integer *
-	    , real *, integer *, integer *);
-    static integer ldwrkr, minwrk, ldwrku, maxwrk;
-    extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real
-	    *, integer *, real *, real *, integer *, integer *);
-    static integer ldwkvt;
-    static real smlnum;
-    static logical wntqas;
-    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real
-	    *, integer *, real *, real *, integer *, integer *);
-    static logical lquery;
+    static doublereal bignum;
+    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, integer *), dorglq_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dorgqr_(integer *, integer *, integer *, doublereal *,
+	     integer *, doublereal *, doublereal *, integer *, integer *);
+    static integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt;
+    static doublereal smlnum;
+    static logical wntqas, lquery;
 
 
 /*
@@ -68908,7 +5113,7 @@ L10:
     Purpose
     =======
 
-    SGESDD computes the singular value decomposition (SVD) of a real
+    DGESDD computes the singular value decomposition (SVD) of a real
     M-by-N matrix A, optionally computing the left and right singular
     vectors.  If singular vectors are desired, it uses a
     divide-and-conquer algorithm.
@@ -68957,7 +5162,7 @@ L10:
     N       (input) INTEGER
             The number of columns of the input matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the M-by-N matrix A.
             On exit,
             if JOBZ = 'O',  A is overwritten with the first N columns
@@ -68971,10 +5176,10 @@ L10:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
 
-    S       (output) REAL array, dimension (min(M,N))
+    S       (output) DOUBLE PRECISION array, dimension (min(M,N))
             The singular values of A, sorted so that S(i) >= S(i+1).
 
-    U       (output) REAL array, dimension (LDU,UCOL)
+    U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
             UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
             UCOL = min(M,N) if JOBZ = 'S'.
             If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
@@ -68987,7 +5192,7 @@ L10:
             The leading dimension of the array U.  LDU >= 1; if
             JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
 
-    VT      (output) REAL array, dimension (LDVT,N)
+    VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
             If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
             N-by-N orthogonal matrix V**T;
             if JOBZ = 'S', VT contains the first min(M,N) rows of
@@ -68999,7 +5204,7 @@ L10:
             JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
             if JOBZ = 'S', LDVT >= min(M,N).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
 
     LWORK   (input) INTEGER
@@ -69021,7 +5226,7 @@ L10:
     INFO    (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  SBDSDC did not converge, updating process failed.
+            > 0:  DBDSDC did not converge, updating process failed.
 
     Further Details
     ===============
@@ -69053,17 +5258,17 @@ L10:
     /* Function Body */
     *info = 0;
     minmn = min(*m,*n);
-    mnthr = (integer) (minmn * 11.f / 6.f);
+    mnthr = (integer) (minmn * 11. / 6.);
     wntqa = lsame_(jobz, "A");
     wntqs = lsame_(jobz, "S");
-    wntqas = (wntqa) || (wntqs);
+    wntqas = wntqa || wntqs;
     wntqo = lsame_(jobz, "O");
     wntqn = lsame_(jobz, "N");
     minwrk = 1;
     maxwrk = 1;
     lquery = *lwork == -1;
 
-    if (! ((((wntqa) || (wntqs)) || (wntqo)) || (wntqn))) {
+    if (! (wntqa || wntqs || wntqo || wntqn)) {
 	*info = -1;
     } else if (*m < 0) {
 	*info = -2;
@@ -69071,11 +5276,11 @@ L10:
 	*info = -3;
     } else if (*lda < max(1,*m)) {
 	*info = -5;
-    } else if (((*ldu < 1) || (wntqas && *ldu < *m)) || (wntqo && *m < *n && *
-	    ldu < *m)) {
+    } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < *
+	    m) {
 	*info = -8;
-    } else if ((((*ldvt < 1) || (wntqa && *ldvt < *n)) || (wntqs && *ldvt <
-	    minmn)) || (wntqo && *m >= *n && *ldvt < *n)) {
+    } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn ||
+	    wntqo && *m >= *n && *ldvt < *n) {
 	*info = -10;
     }
 
@@ -69091,24 +5296,24 @@ L10:
     if (*info == 0 && *m > 0 && *n > 0) {
 	if (*m >= *n) {
 
-/*           Compute space needed for SBDSDC */
+/*           Compute space needed for DBDSDC */
 
 	    if (wntqn) {
 		bdspac = *n * 7;
 	    } else {
-		bdspac = *n * 3 * *n + ((*n) << (2));
+		bdspac = *n * 3 * *n + (*n << 2);
 	    }
 	    if (*m >= mnthr) {
 		if (wntqn) {
 
 /*                 Path 1 (M much larger than N, JOBZ='N') */
 
-		    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
-			    c__1, "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
 		    i__1 = wrkbl, i__2 = bdspac + *n;
@@ -69118,51 +5323,51 @@ L10:
 
 /*                 Path 2 (M much larger than N, JOBZ='O') */
 
-		    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR",
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR",
 			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
-			    c__1, "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
 			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
 			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
 		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
 		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + ((*n) << (1)) * *n;
-		    minwrk = bdspac + ((*n) << (1)) * *n + *n * 3;
+		    maxwrk = wrkbl + (*n << 1) * *n;
+		    minwrk = bdspac + (*n << 1) * *n + *n * 3;
 		} else if (wntqs) {
 
 /*                 Path 3 (M much larger than N, JOBZ='S') */
 
-		    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR",
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR",
 			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
-			    c__1, "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
 			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
 			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
@@ -69174,23 +5379,23 @@ L10:
 
 /*                 Path 4 (M much larger than N, JOBZ='A') */
 
-		    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "SORGQR",
+		    i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "DORGQR",
 			    " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
-			    c__1, "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
 			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
 			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
@@ -69203,7 +5408,7 @@ L10:
 
 /*              Path 5 (M at least N, but not much larger) */
 
-		wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m,
+		wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m,
 			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 		if (wntqn) {
 /* Computing MAX */
@@ -69212,11 +5417,11 @@ L10:
 		    minwrk = *n * 3 + max(*m,bdspac);
 		} else if (wntqo) {
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
 			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
 			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
@@ -69228,11 +5433,11 @@ L10:
 		    minwrk = *n * 3 + max(i__1,i__2);
 		} else if (wntqs) {
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
 			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
 			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
@@ -69241,11 +5446,11 @@ L10:
 		    minwrk = *n * 3 + max(*m,bdspac);
 		} else if (wntqa) {
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
 			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
@@ -69256,24 +5461,24 @@ L10:
 	    }
 	} else {
 
-/*           Compute space needed for SBDSDC */
+/*           Compute space needed for DBDSDC */
 
 	    if (wntqn) {
 		bdspac = *m * 7;
 	    } else {
-		bdspac = *m * 3 * *m + ((*m) << (2));
+		bdspac = *m * 3 * *m + (*m << 2);
 	    }
 	    if (*n >= mnthr) {
 		if (wntqn) {
 
 /*                 Path 1t (N much larger than M, JOBZ='N') */
 
-		    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
-			    c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
 		    i__1 = wrkbl, i__2 = bdspac + *m;
@@ -69283,51 +5488,51 @@ L10:
 
 /*                 Path 2t (N much larger than M, JOBZ='O') */
 
-		    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "SORGLQ",
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ",
 			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
-			    c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
 		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
 		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + ((*m) << (1)) * *m;
-		    minwrk = bdspac + ((*m) << (1)) * *m + *m * 3;
+		    maxwrk = wrkbl + (*m << 1) * *m;
+		    minwrk = bdspac + (*m << 1) * *m + *m * 3;
 		} else if (wntqs) {
 
 /*                 Path 3t (N much larger than M, JOBZ='S') */
 
-		    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "SORGLQ",
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ",
 			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
-			    c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
@@ -69339,23 +5544,23 @@ L10:
 
 /*                 Path 4t (N much larger than M, JOBZ='A') */
 
-		    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "SORGLQ",
+		    i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "DORGLQ",
 			    " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
-			    c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
-			    6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
@@ -69368,7 +5573,7 @@ L10:
 
 /*              Path 5t (N greater than M, but not much larger) */
 
-		wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m,
+		wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m,
 			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 		if (wntqn) {
 /* Computing MAX */
@@ -69377,11 +5582,11 @@ L10:
 		    minwrk = *m * 3 + max(*n,bdspac);
 		} else if (wntqo) {
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
@@ -69393,11 +5598,11 @@ L10:
 		    minwrk = *m * 3 + max(i__1,i__2);
 		} else if (wntqs) {
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
@@ -69406,11 +5611,11 @@ L10:
 		    minwrk = *m * 3 + max(*n,bdspac);
 		} else if (wntqa) {
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
 			    , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
@@ -69420,7 +5625,7 @@ L10:
 		}
 	    }
 	}
-	work[1] = (real) maxwrk;
+	work[1] = (doublereal) maxwrk;
     }
 
     if (*lwork < minwrk && ! lquery) {
@@ -69428,7 +5633,7 @@ L10:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGESDD", &i__1);
+	xerbla_("DGESDD", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -69436,30 +5641,30 @@ L10:
 
 /*     Quick return if possible */
 
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	if (*lwork >= 1) {
-	    work[1] = 1.f;
+	    work[1] = 1.;
 	}
 	return 0;
     }
 
 /*     Get machine constants */
 
-    eps = slamch_("P");
-    smlnum = sqrt(slamch_("S")) / eps;
-    bignum = 1.f / smlnum;
+    eps = PRECISION;
+    smlnum = sqrt(SAFEMINIMUM) / eps;
+    bignum = 1. / smlnum;
 
 /*     Scale A if max element outside range [SMLNUM,BIGNUM] */
 
-    anrm = slange_("M", m, n, &a[a_offset], lda, dum);
+    anrm = dlange_("M", m, n, &a[a_offset], lda, dum);
     iscl = 0;
-    if (anrm > 0.f && anrm < smlnum) {
+    if (anrm > 0. && anrm < smlnum) {
 	iscl = 1;
-	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
 		ierr);
     } else if (anrm > bignum) {
 	iscl = 1;
-	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
 		ierr);
     }
 
@@ -69489,14 +5694,14 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
 			i__1, &ierr);
 
 /*              Zero out below R */
 
 		i__1 = *n - 1;
 		i__2 = *n - 1;
-		slaset_("L", &i__1, &i__2, &c_b320, &c_b320, &a[a_dim1 + 2],
+		dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &a[a_dim1 + 2],
 			lda);
 		ie = 1;
 		itauq = ie + *n;
@@ -69509,7 +5714,7 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
 			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
 		nwork = ie + *n;
 
@@ -69518,7 +5723,7 @@ L10:
                 (Workspace: need N+BDSPAC)
 */
 
-		sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+		dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
 			 dum, idum, &work[nwork], &iwork[1], info);
 
 	    } else if (wntqo) {
@@ -69547,15 +5752,15 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
 			i__1, &ierr);
 
 /*              Copy R to WORK(IR), zeroing out below it */
 
-		slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
 		i__1 = *n - 1;
 		i__2 = *n - 1;
-		slaset_("L", &i__1, &i__2, &c_b320, &c_b320, &work[ir + 1], &
+		dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &work[ir + 1], &
 			ldwrkr);
 
 /*
@@ -69564,7 +5769,7 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+		dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
 			 &i__1, &ierr);
 		ie = itau;
 		itauq = ie + *n;
@@ -69577,7 +5782,7 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+		dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
 			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
 
 /*              WORK(IU) is N by N */
@@ -69592,7 +5797,7 @@ L10:
                 (Workspace: need N+N*N+BDSPAC)
 */
 
-		sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
 			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
 			info);
 
@@ -69603,10 +5808,10 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+		dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
 			itauq], &work[iu], n, &work[nwork], &i__1, &ierr);
 		i__1 = *lwork - nwork + 1;
-		sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+		dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
 			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
 			ierr);
 
@@ -69623,9 +5828,9 @@ L10:
 /* Computing MIN */
 		    i__3 = *m - i__ + 1;
 		    chunk = min(i__3,ldwrkr);
-		    sgemm_("N", "N", &chunk, n, n, &c_b1011, &a[i__ + a_dim1],
-			     lda, &work[iu], n, &c_b320, &work[ir], &ldwrkr);
-		    slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+		    dgemm_("N", "N", &chunk, n, n, &c_b15, &a[i__ + a_dim1],
+			    lda, &work[iu], n, &c_b29, &work[ir], &ldwrkr);
+		    dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
 			    a_dim1], lda);
 /* L10: */
 		}
@@ -69652,15 +5857,15 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
 			i__2, &ierr);
 
 /*              Copy R to WORK(IR), zeroing out below it */
 
-		slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
 		i__2 = *n - 1;
 		i__1 = *n - 1;
-		slaset_("L", &i__2, &i__1, &c_b320, &c_b320, &work[ir + 1], &
+		dlaset_("L", &i__2, &i__1, &c_b29, &c_b29, &work[ir + 1], &
 			ldwrkr);
 
 /*
@@ -69669,7 +5874,7 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+		dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
 			 &i__2, &ierr);
 		ie = itau;
 		itauq = ie + *n;
@@ -69682,7 +5887,7 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+		dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
 			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
 
 /*
@@ -69692,7 +5897,7 @@ L10:
                 (Workspace: need N+BDSPAC)
 */
 
-		sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
 			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
 			info);
 
@@ -69703,11 +5908,11 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+		dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
 			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
 
 		i__2 = *lwork - nwork + 1;
-		sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+		dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
 			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
 			ierr);
 
@@ -69717,9 +5922,9 @@ L10:
                 (Workspace: need N*N)
 */
 
-		slacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
-		sgemm_("N", "N", m, n, n, &c_b1011, &a[a_offset], lda, &work[
-			ir], &ldwrkr, &c_b320, &u[u_offset], ldu);
+		dlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
+		dgemm_("N", "N", m, n, n, &c_b15, &a[a_offset], lda, &work[ir]
+			, &ldwrkr, &c_b29, &u[u_offset], ldu);
 
 	    } else if (wntqa) {
 
@@ -69743,23 +5948,23 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
 			i__2, &ierr);
-		slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+		dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
 
 /*
                 Generate Q in U
                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
 */
 		i__2 = *lwork - nwork + 1;
-		sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
+		dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
 			 &i__2, &ierr);
 
 /*              Produce R in A, zeroing out other entries */
 
 		i__2 = *n - 1;
 		i__1 = *n - 1;
-		slaset_("L", &i__2, &i__1, &c_b320, &c_b320, &a[a_dim1 + 2],
+		dlaset_("L", &i__2, &i__1, &c_b29, &c_b29, &a[a_dim1 + 2],
 			lda);
 		ie = itau;
 		itauq = ie + *n;
@@ -69772,7 +5977,7 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
 			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
 
 /*
@@ -69782,7 +5987,7 @@ L10:
                 (Workspace: need N+N*N+BDSPAC)
 */
 
-		sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
 			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
 			info);
 
@@ -69793,11 +5998,11 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
+		dormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
 			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
 			ierr);
 		i__2 = *lwork - nwork + 1;
-		sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
 			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
 			ierr);
 
@@ -69807,12 +6012,12 @@ L10:
                 (Workspace: need N*N)
 */
 
-		sgemm_("N", "N", m, n, n, &c_b1011, &u[u_offset], ldu, &work[
-			iu], &ldwrku, &c_b320, &a[a_offset], lda);
+		dgemm_("N", "N", m, n, n, &c_b15, &u[u_offset], ldu, &work[iu]
+			, &ldwrku, &c_b29, &a[a_offset], lda);
 
 /*              Copy left singular vectors of A from A to U */
 
-		slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+		dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
 
 	    }
 
@@ -69836,7 +6041,7 @@ L10:
 */
 
 	    i__2 = *lwork - nwork + 1;
-	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
 		    work[itaup], &work[nwork], &i__2, &ierr);
 	    if (wntqn) {
 
@@ -69845,7 +6050,7 @@ L10:
                 (Workspace: need N+BDSPAC)
 */
 
-		sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+		dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
 			 dum, idum, &work[nwork], &iwork[1], info);
 	    } else if (wntqo) {
 		iu = nwork;
@@ -69855,7 +6060,7 @@ L10:
 
 		    ldwrku = *m;
 		    nwork = iu + ldwrku * *n;
-		    slaset_("F", m, n, &c_b320, &c_b320, &work[iu], &ldwrku);
+		    dlaset_("F", m, n, &c_b29, &c_b29, &work[iu], &ldwrku);
 		} else {
 
 /*                 WORK( IU ) is N by N */
@@ -69877,7 +6082,7 @@ L10:
                 (Workspace: need N+N*N+BDSPAC)
 */
 
-		sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, &
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, &
 			vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[
 			1], info);
 
@@ -69887,7 +6092,7 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
 			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
 			ierr);
 
@@ -69899,13 +6104,13 @@ L10:
 */
 
 		    i__2 = *lwork - nwork + 1;
-		    sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+		    dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
 			    itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
 			    ierr);
 
 /*                 Copy left singular vectors of A from WORK(IU) to A */
 
-		    slacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
+		    dlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
 		} else {
 
 /*
@@ -69914,7 +6119,7 @@ L10:
 */
 
 		    i__2 = *lwork - nwork + 1;
-		    sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
+		    dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
 			    work[nwork], &i__2, &ierr);
 
 /*
@@ -69931,10 +6136,10 @@ L10:
 /* Computing MIN */
 			i__3 = *m - i__ + 1;
 			chunk = min(i__3,ldwrkr);
-			sgemm_("N", "N", &chunk, n, n, &c_b1011, &a[i__ +
-				a_dim1], lda, &work[iu], &ldwrku, &c_b320, &
+			dgemm_("N", "N", &chunk, n, n, &c_b15, &a[i__ +
+				a_dim1], lda, &work[iu], &ldwrku, &c_b29, &
 				work[ir], &ldwrkr);
-			slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+			dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
 				a_dim1], lda);
 /* L20: */
 		    }
@@ -69949,8 +6154,8 @@ L10:
                 (Workspace: need N+BDSPAC)
 */
 
-		slaset_("F", m, n, &c_b320, &c_b320, &u[u_offset], ldu);
-		sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+		dlaset_("F", m, n, &c_b29, &c_b29, &u[u_offset], ldu);
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
 			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
 			info);
 
@@ -69961,10 +6166,10 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+		dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
 			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
 		i__1 = *lwork - nwork + 1;
-		sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
 			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
 			ierr);
 	    } else if (wntqa) {
@@ -69976,8 +6181,8 @@ L10:
                 (Workspace: need N+BDSPAC)
 */
 
-		slaset_("F", m, m, &c_b320, &c_b320, &u[u_offset], ldu);
-		sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+		dlaset_("F", m, m, &c_b29, &c_b29, &u[u_offset], ldu);
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
 			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
 			info);
 
@@ -69985,8 +6190,8 @@ L10:
 
 		i__1 = *m - *n;
 		i__2 = *m - *n;
-		slaset_("F", &i__1, &i__2, &c_b320, &c_b1011, &u[*n + 1 + (*n
-			+ 1) * u_dim1], ldu);
+		dlaset_("F", &i__1, &i__2, &c_b29, &c_b15, &u[*n + 1 + (*n +
+			1) * u_dim1], ldu);
 
 /*
                 Overwrite U by left singular vectors of A and VT
@@ -69995,10 +6200,10 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
 			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
 		i__1 = *lwork - nwork + 1;
-		sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+		dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
 			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
 			ierr);
 	    }
@@ -70031,15 +6236,15 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
 			i__1, &ierr);
 
 /*              Zero out above L */
 
 		i__1 = *m - 1;
 		i__2 = *m - 1;
-		slaset_("U", &i__1, &i__2, &c_b320, &c_b320, &a[((a_dim1) << (
-			1)) + 1], lda);
+		dlaset_("U", &i__1, &i__2, &c_b29, &c_b29, &a[(a_dim1 << 1) +
+			1], lda);
 		ie = 1;
 		itauq = ie + *m;
 		itaup = itauq + *m;
@@ -70051,7 +6256,7 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
 			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
 		nwork = ie + *m;
 
@@ -70060,7 +6265,7 @@ L10:
                 (Workspace: need M+BDSPAC)
 */
 
-		sbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+		dbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
 			 dum, idum, &work[nwork], &iwork[1], info);
 
 	    } else if (wntqo) {
@@ -70095,16 +6300,16 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
 			i__1, &ierr);
 
 /*              Copy L to WORK(IL), zeroing about above it */
 
-		slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
 		i__1 = *m - 1;
 		i__2 = *m - 1;
-		slaset_("U", &i__1, &i__2, &c_b320, &c_b320, &work[il +
-			ldwrkl], &ldwrkl);
+		dlaset_("U", &i__1, &i__2, &c_b29, &c_b29, &work[il + ldwrkl],
+			 &ldwrkl);
 
 /*
                 Generate Q in A
@@ -70112,7 +6317,7 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+		dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
 			 &i__1, &ierr);
 		ie = itau;
 		itauq = ie + *m;
@@ -70125,7 +6330,7 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+		dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
 			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
 
 /*
@@ -70135,7 +6340,7 @@ L10:
                 (Workspace: need M+M*M+BDSPAC)
 */
 
-		sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
 			work[ivt], m, dum, idum, &work[nwork], &iwork[1],
 			info);
 
@@ -70146,10 +6351,10 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+		dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
 			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
 		i__1 = *lwork - nwork + 1;
-		sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+		dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
 			itaup], &work[ivt], m, &work[nwork], &i__1, &ierr);
 
 /*
@@ -70165,10 +6370,10 @@ L10:
 /* Computing MIN */
 		    i__3 = *n - i__ + 1;
 		    blk = min(i__3,chunk);
-		    sgemm_("N", "N", m, &blk, m, &c_b1011, &work[ivt], m, &a[
-			    i__ * a_dim1 + 1], lda, &c_b320, &work[il], &
+		    dgemm_("N", "N", m, &blk, m, &c_b15, &work[ivt], m, &a[
+			    i__ * a_dim1 + 1], lda, &c_b29, &work[il], &
 			    ldwrkl);
-		    slacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
+		    dlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
 			    + 1], lda);
 /* L30: */
 		}
@@ -70195,16 +6400,16 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
 			i__2, &ierr);
 
 /*              Copy L to WORK(IL), zeroing out above it */
 
-		slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
 		i__2 = *m - 1;
 		i__1 = *m - 1;
-		slaset_("U", &i__2, &i__1, &c_b320, &c_b320, &work[il +
-			ldwrkl], &ldwrkl);
+		dlaset_("U", &i__2, &i__1, &c_b29, &c_b29, &work[il + ldwrkl],
+			 &ldwrkl);
 
 /*
                 Generate Q in A
@@ -70212,7 +6417,7 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+		dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
 			 &i__2, &ierr);
 		ie = itau;
 		itauq = ie + *m;
@@ -70225,7 +6430,7 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+		dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
 			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
 
 /*
@@ -70235,7 +6440,7 @@ L10:
                 (Workspace: need M+BDSPAC)
 */
 
-		sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
 			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
 			info);
 
@@ -70246,10 +6451,10 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+		dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
 			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
 		i__2 = *lwork - nwork + 1;
-		sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+		dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
 			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
 			ierr);
 
@@ -70259,9 +6464,9 @@ L10:
                 (Workspace: need M*M)
 */
 
-		slacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
-		sgemm_("N", "N", m, n, m, &c_b1011, &work[il], &ldwrkl, &a[
-			a_offset], lda, &c_b320, &vt[vt_offset], ldvt);
+		dlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
+		dgemm_("N", "N", m, n, m, &c_b15, &work[il], &ldwrkl, &a[
+			a_offset], lda, &c_b29, &vt[vt_offset], ldvt);
 
 	    } else if (wntqa) {
 
@@ -70285,9 +6490,9 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
 			i__2, &ierr);
-		slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
 
 /*
                 Generate Q in VT
@@ -70295,15 +6500,15 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
+		dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
 			nwork], &i__2, &ierr);
 
 /*              Produce L in A, zeroing out other entries */
 
 		i__2 = *m - 1;
 		i__1 = *m - 1;
-		slaset_("U", &i__2, &i__1, &c_b320, &c_b320, &a[((a_dim1) << (
-			1)) + 1], lda);
+		dlaset_("U", &i__2, &i__1, &c_b29, &c_b29, &a[(a_dim1 << 1) +
+			1], lda);
 		ie = itau;
 		itauq = ie + *m;
 		itaup = itauq + *m;
@@ -70315,7 +6520,7 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
 			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
 
 /*
@@ -70325,7 +6530,7 @@ L10:
                 (Workspace: need M+M*M+BDSPAC)
 */
 
-		sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
 			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
 			, info);
 
@@ -70336,10 +6541,10 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
+		dormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
 			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
 		i__2 = *lwork - nwork + 1;
-		sormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[
+		dormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[
 			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
 			ierr);
 
@@ -70349,12 +6554,12 @@ L10:
                 (Workspace: need M*M)
 */
 
-		sgemm_("N", "N", m, n, m, &c_b1011, &work[ivt], &ldwkvt, &vt[
-			vt_offset], ldvt, &c_b320, &a[a_offset], lda);
+		dgemm_("N", "N", m, n, m, &c_b15, &work[ivt], &ldwkvt, &vt[
+			vt_offset], ldvt, &c_b29, &a[a_offset], lda);
 
 /*              Copy right singular vectors of A from A to VT */
 
-		slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
 
 	    }
 
@@ -70378,7 +6583,7 @@ L10:
 */
 
 	    i__2 = *lwork - nwork + 1;
-	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
 		    work[itaup], &work[nwork], &i__2, &ierr);
 	    if (wntqn) {
 
@@ -70387,7 +6592,7 @@ L10:
                 (Workspace: need M+BDSPAC)
 */
 
-		sbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+		dbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
 			 dum, idum, &work[nwork], &iwork[1], info);
 	    } else if (wntqo) {
 		ldwkvt = *m;
@@ -70396,7 +6601,7 @@ L10:
 
 /*                 WORK( IVT ) is M by N */
 
-		    slaset_("F", m, n, &c_b320, &c_b320, &work[ivt], &ldwkvt);
+		    dlaset_("F", m, n, &c_b29, &c_b29, &work[ivt], &ldwkvt);
 		    nwork = ivt + ldwkvt * *n;
 		} else {
 
@@ -70417,7 +6622,7 @@ L10:
                 (Workspace: need M*M+BDSPAC)
 */
 
-		sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
 			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
 			, info);
 
@@ -70427,7 +6632,7 @@ L10:
 */
 
 		i__2 = *lwork - nwork + 1;
-		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
 			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
 
 		if (*lwork >= *m * *n + *m * 3 + bdspac) {
@@ -70438,13 +6643,13 @@ L10:
 */
 
 		    i__2 = *lwork - nwork + 1;
-		    sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+		    dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
 			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2,
 			    &ierr);
 
 /*                 Copy right singular vectors of A from WORK(IVT) to A */
 
-		    slacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
+		    dlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
 		} else {
 
 /*
@@ -70453,7 +6658,7 @@ L10:
 */
 
 		    i__2 = *lwork - nwork + 1;
-		    sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
+		    dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
 			    work[nwork], &i__2, &ierr);
 
 /*
@@ -70470,10 +6675,10 @@ L10:
 /* Computing MIN */
 			i__3 = *n - i__ + 1;
 			blk = min(i__3,chunk);
-			sgemm_("N", "N", m, &blk, m, &c_b1011, &work[ivt], &
-				ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b320, &
+			dgemm_("N", "N", m, &blk, m, &c_b15, &work[ivt], &
+				ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b29, &
 				work[il], m);
-			slacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 +
+			dlacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 +
 				1], lda);
 /* L40: */
 		    }
@@ -70487,8 +6692,8 @@ L10:
                 (Workspace: need M+BDSPAC)
 */
 
-		slaset_("F", m, n, &c_b320, &c_b320, &vt[vt_offset], ldvt);
-		sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+		dlaset_("F", m, n, &c_b29, &c_b29, &vt[vt_offset], ldvt);
+		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
 			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
 			info);
 
@@ -70499,10 +6704,10 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
 			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
 		i__1 = *lwork - nwork + 1;
-		sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+		dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
 			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
 			ierr);
 	    } else if (wntqa) {
@@ -70514,8 +6719,8 @@ L10:
                 (Workspace: need M+BDSPAC)
 */
 
-		slaset_("F", n, n, &c_b320, &c_b320, &vt[vt_offset], ldvt);
-		sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+		dlaset_("F", n, n, &c_b29, &c_b29, &vt[vt_offset], ldvt);
+		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
 			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
 			info);
 
@@ -70523,8 +6728,8 @@ L10:
 
 		i__1 = *n - *m;
 		i__2 = *n - *m;
-		slaset_("F", &i__1, &i__2, &c_b320, &c_b1011, &vt[*m + 1 + (*
-			m + 1) * vt_dim1], ldvt);
+		dlaset_("F", &i__1, &i__2, &c_b29, &c_b15, &vt[*m + 1 + (*m +
+			1) * vt_dim1], ldvt);
 
 /*
                 Overwrite U by left singular vectors of A and VT
@@ -70533,10 +6738,10 @@ L10:
 */
 
 		i__1 = *lwork - nwork + 1;
-		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
 			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
 		i__1 = *lwork - nwork + 1;
-		sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+		dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
 			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
 			ierr);
 	    }
@@ -70549,36 +6754,35 @@ L10:
 
     if (iscl == 1) {
 	if (anrm > bignum) {
-	    slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+	    dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
 		    minmn, &ierr);
 	}
 	if (anrm < smlnum) {
-	    slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+	    dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
 		    minmn, &ierr);
 	}
     }
 
 /*     Return optimal workspace in WORK(1) */
 
-    work[1] = (real) maxwrk;
+    work[1] = (doublereal) maxwrk;
 
     return 0;
 
-/*     End of SGESDD */
+/*     End of DGESDD */
 
-} /* sgesdd_ */
+} /* dgesdd_ */
 
-/* Subroutine */ int sgesv_(integer *n, integer *nrhs, real *a, integer *lda,
-	integer *ipiv, real *b, integer *ldb, integer *info)
+/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer
+	*lda, integer *ipiv, doublereal *b, integer *ldb, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, b_dim1, b_offset, i__1;
 
     /* Local variables */
-    extern /* Subroutine */ int xerbla_(char *, integer *), sgetrf_(
-	    integer *, integer *, real *, integer *, integer *, integer *),
-	    sgetrs_(char *, integer *, integer *, real *, integer *, integer *
-	    , real *, integer *, integer *);
+    extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *), dgetrs_(char *, integer *, integer *, doublereal *,
+	    integer *, integer *, doublereal *, integer *, integer *);
 
 
 /*
@@ -70591,7 +6795,7 @@ L10:
     Purpose
     =======
 
-    SGESV computes the solution to a real system of linear equations
+    DGESV computes the solution to a real system of linear equations
        A * X = B,
     where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
 
@@ -70613,7 +6817,7 @@ L10:
             The number of right hand sides, i.e., the number of columns
             of the matrix B.  NRHS >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the N-by-N coefficient matrix A.
             On exit, the factors L and U from the factorization
             A = P*L*U; the unit diagonal elements of L are not stored.
@@ -70625,7 +6829,7 @@ L10:
             The pivot indices that define the permutation matrix P;
             row i of the matrix was interchanged with row IPIV(i).
 
-    B       (input/output) REAL array, dimension (LDB,NRHS)
+    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
             On entry, the N-by-NRHS matrix of right hand side matrix B.
             On exit, if INFO = 0, the N-by-NRHS solution matrix X.
 
@@ -70667,40 +6871,42 @@ L10:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGESV ", &i__1);
+	xerbla_("DGESV ", &i__1);
 	return 0;
     }
 
 /*     Compute the LU factorization of A. */
 
-    sgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
+    dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
     if (*info == 0) {
 
 /*        Solve the system A*X = B, overwriting B with X. */
 
-	sgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
+	dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
 		b_offset], ldb, info);
     }
     return 0;
 
-/*     End of SGESV */
+/*     End of DGESV */
 
-} /* sgesv_ */
+} /* dgesv_ */
 
-/* Subroutine */ int sgetf2_(integer *m, integer *n, real *a, integer *lda,
-	integer *ipiv, integer *info)
+/* Subroutine */ int dgetf2_(integer *m, integer *n, doublereal *a, integer *
+	lda, integer *ipiv, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
-    real r__1;
+    doublereal d__1;
 
     /* Local variables */
     static integer j, jp;
-    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
-	    integer *, real *, integer *, real *, integer *), sscal_(integer *
-	    , real *, real *, integer *), sswap_(integer *, real *, integer *,
-	     real *, integer *), xerbla_(char *, integer *);
-    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *), dscal_(integer *, doublereal *, doublereal *, integer
+	    *), dswap_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
 
 
 /*
@@ -70713,7 +6919,7 @@ L10:
     Purpose
     =======
 
-    SGETF2 computes an LU factorization of a general m-by-n matrix A
+    DGETF2 computes an LU factorization of a general m-by-n matrix A
     using partial pivoting with row interchanges.
 
     The factorization has the form
@@ -70733,7 +6939,7 @@ L10:
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the m by n matrix to be factored.
             On exit, the factors L and U from the factorization
             A = P*L*U; the unit diagonal elements of L are not stored.
@@ -70776,13 +6982,13 @@ L10:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGETF2", &i__1);
+	xerbla_("DGETF2", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -70792,22 +6998,22 @@ L10:
 /*        Find pivot and test for singularity. */
 
 	i__2 = *m - j + 1;
-	jp = j - 1 + isamax_(&i__2, &a[j + j * a_dim1], &c__1);
+	jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1);
 	ipiv[j] = jp;
-	if (a[jp + j * a_dim1] != 0.f) {
+	if (a[jp + j * a_dim1] != 0.) {
 
 /*           Apply the interchange to columns 1:N. */
 
 	    if (jp != j) {
-		sswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
+		dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
 	    }
 
 /*           Compute elements J+1:M of J-th column. */
 
 	    if (j < *m) {
 		i__2 = *m - j;
-		r__1 = 1.f / a[j + j * a_dim1];
-		sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
+		d__1 = 1. / a[j + j * a_dim1];
+		dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
 	    }
 
 	} else if (*info == 0) {
@@ -70821,7 +7027,7 @@ L10:
 
 	    i__2 = *m - j;
 	    i__3 = *n - j;
-	    sger_(&i__2, &i__3, &c_b1290, &a[j + 1 + j * a_dim1], &c__1, &a[j
+	    dger_(&i__2, &i__3, &c_b151, &a[j + 1 + j * a_dim1], &c__1, &a[j
 		    + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1],
 		    lda);
 	}
@@ -70829,29 +7035,31 @@ L10:
     }
     return 0;
 
-/*     End of SGETF2 */
+/*     End of DGETF2 */
 
-} /* sgetf2_ */
+} /* dgetf2_ */
 
-/* Subroutine */ int sgetrf_(integer *m, integer *n, real *a, integer *lda,
-	integer *ipiv, integer *info)
+/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer *
+	lda, integer *ipiv, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
 
     /* Local variables */
-    static integer i__, j, jb, nb, iinfo;
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *), strsm_(char *, char *, char *,
-	     char *, integer *, integer *, real *, real *, integer *, real *,
-	    integer *), sgetf2_(integer *,
-	    integer *, real *, integer *, integer *, integer *), xerbla_(char
-	    *, integer *);
+    static integer i__, j, jb, nb;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer iinfo;
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), dgetf2_(
+	    integer *, integer *, doublereal *, integer *, integer *, integer
+	    *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer
-	    *, integer *, integer *, integer *);
+    extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *,
+	    integer *, integer *, integer *, integer *);
 
 
 /*
@@ -70864,7 +7072,7 @@ L10:
     Purpose
     =======
 
-    SGETRF computes an LU factorization of a general M-by-N matrix A
+    DGETRF computes an LU factorization of a general M-by-N matrix A
     using partial pivoting with row interchanges.
 
     The factorization has the form
@@ -70884,7 +7092,7 @@ L10:
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the M-by-N matrix to be factored.
             On exit, the factors L and U from the factorization
             A = P*L*U; the unit diagonal elements of L are not stored.
@@ -70927,25 +7135,25 @@ L10:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGETRF", &i__1);
+	xerbla_("DGETRF", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
 /*     Determine the block size for this environment. */
 
-    nb = ilaenv_(&c__1, "SGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+    nb = ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
-    if ((nb <= 1) || (nb >= min(*m,*n))) {
+    if (nb <= 1 || nb >= min(*m,*n)) {
 
 /*        Use unblocked code. */
 
-	sgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
+	dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
     } else {
 
 /*        Use blocked code. */
@@ -70963,7 +7171,7 @@ L10:
 */
 
 	    i__3 = *m - j + 1;
-	    sgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
+	    dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
 
 /*           Adjust INFO and the pivot indices. */
 
@@ -70982,7 +7190,7 @@ L10:
 
 	    i__3 = j - 1;
 	    i__4 = j + jb - 1;
-	    slaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
+	    dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
 
 	    if (j + jb <= *n) {
 
@@ -70990,14 +7198,14 @@ L10:
 
 		i__3 = *n - j - jb + 1;
 		i__4 = j + jb - 1;
-		slaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
+		dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
 			ipiv[1], &c__1);
 
 /*              Compute block row of U. */
 
 		i__3 = *n - j - jb + 1;
-		strsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
-			c_b1011, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
+		dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
+			c_b15, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
 			a_dim1], lda);
 		if (j + jb <= *m) {
 
@@ -71005,10 +7213,10 @@ L10:
 
 		    i__3 = *m - j - jb + 1;
 		    i__4 = *n - j - jb + 1;
-		    sgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
-			    &c_b1290, &a[j + jb + j * a_dim1], lda, &a[j + (j
-			    + jb) * a_dim1], lda, &c_b1011, &a[j + jb + (j +
-			    jb) * a_dim1], lda);
+		    dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
+			    &c_b151, &a[j + jb + j * a_dim1], lda, &a[j + (j
+			    + jb) * a_dim1], lda, &c_b15, &a[j + jb + (j + jb)
+			     * a_dim1], lda);
 		}
 	    }
 /* L20: */
@@ -71016,24 +7224,25 @@ L10:
     }
     return 0;
 
-/*     End of SGETRF */
+/*     End of DGETRF */
 
-} /* sgetrf_ */
+} /* dgetrf_ */
 
-/* Subroutine */ int sgetrs_(char *trans, integer *n, integer *nrhs, real *a,
-	integer *lda, integer *ipiv, real *b, integer *ldb, integer *info)
+/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs,
+	doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *
+	ldb, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, b_dim1, b_offset, i__1;
 
     /* Local variables */
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int strsm_(char *, char *, char *, char *,
-	    integer *, integer *, real *, real *, integer *, real *, integer *
-	    ), xerbla_(char *, integer *);
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), xerbla_(
+	    char *, integer *), dlaswp_(integer *, doublereal *,
+	    integer *, integer *, integer *, integer *, integer *);
     static logical notran;
-    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer
-	    *, integer *, integer *, integer *);
 
 
 /*
@@ -71046,10 +7255,10 @@ L10:
     Purpose
     =======
 
-    SGETRS solves a system of linear equations
+    DGETRS solves a system of linear equations
        A * X = B  or  A' * X = B
     with a general N-by-N matrix A using the LU factorization computed
-    by SGETRF.
+    by DGETRF.
 
     Arguments
     =========
@@ -71067,18 +7276,18 @@ L10:
             The number of right hand sides, i.e., the number of columns
             of the matrix B.  NRHS >= 0.
 
-    A       (input) REAL array, dimension (LDA,N)
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
             The factors L and U from the factorization A = P*L*U
-            as computed by SGETRF.
+            as computed by DGETRF.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
     IPIV    (input) INTEGER array, dimension (N)
-            The pivot indices from SGETRF; for 1<=i<=N, row i of the
+            The pivot indices from DGETRF; for 1<=i<=N, row i of the
             matrix was interchanged with row IPIV(i).
 
-    B       (input/output) REAL array, dimension (LDB,NRHS)
+    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
             On entry, the right hand side matrix B.
             On exit, the solution matrix X.
 
@@ -71121,13 +7330,13 @@ L10:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SGETRS", &i__1);
+	xerbla_("DGETRS", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
-    if ((*n == 0) || (*nrhs == 0)) {
+    if (*n == 0 || *nrhs == 0) {
 	return 0;
     }
 
@@ -71139,17 +7348,17 @@ L10:
           Apply row interchanges to the right hand sides.
 */
 
-	slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
+	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
 
 /*        Solve L*X = B, overwriting B with X. */
 
-	strsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b1011, &a[
+	dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b15, &a[
 		a_offset], lda, &b[b_offset], ldb);
 
 /*        Solve U*X = B, overwriting B with X. */
 
-	strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b1011,
-		 &a[a_offset], lda, &b[b_offset], ldb);
+	dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b15, &
+		a[a_offset], lda, &b[b_offset], ldb);
     } else {
 
 /*
@@ -71158,34 +7367,35 @@ L10:
           Solve U'*X = B, overwriting B with X.
 */
 
-	strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b1011, &
-		a[a_offset], lda, &b[b_offset], ldb);
+	dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[
+		a_offset], lda, &b[b_offset], ldb);
 
 /*        Solve L'*X = B, overwriting B with X. */
 
-	strsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b1011, &a[
+	dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b15, &a[
 		a_offset], lda, &b[b_offset], ldb);
 
 /*        Apply row interchanges to the solution vectors. */
 
-	slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
+	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
     }
 
     return 0;
 
-/*     End of SGETRS */
+/*     End of DGETRS */
 
-} /* sgetrs_ */
+} /* dgetrs_ */
 
-/* Subroutine */ int shseqr_(char *job, char *compz, integer *n, integer *ilo,
-	 integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__,
-	 integer *ldz, real *work, integer *lwork, integer *info)
+/* Subroutine */ int dhseqr_(char *job, char *compz, integer *n, integer *ilo,
+	 integer *ihi, doublereal *h__, integer *ldh, doublereal *wr,
+	doublereal *wi, doublereal *z__, integer *ldz, doublereal *work,
+	integer *lwork, integer *info)
 {
     /* System generated locals */
     address a__1[2];
     integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4,
 	    i__5;
-    real r__1, r__2;
+    doublereal d__1, d__2;
     char ch__1[2];
 
     /* Builtin functions */
@@ -71193,43 +7403,48 @@ L10:
 
     /* Local variables */
     static integer i__, j, k, l;
-    static real s[225]	/* was [15][15] */, v[16];
+    static doublereal s[225]	/* was [15][15] */, v[16];
     static integer i1, i2, ii, nh, nr, ns, nv;
-    static real vv[16];
+    static doublereal vv[16];
     static integer itn;
-    static real tau;
+    static doublereal tau;
     static integer its;
-    static real ulp, tst1;
+    static doublereal ulp, tst1;
     static integer maxb;
-    static real absw;
+    static doublereal absw;
     static integer ierr;
-    static real unfl, temp, ovfl;
+    static doublereal unfl, temp, ovfl;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
     static integer itemp;
-    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
-	    real *, integer *, real *, integer *, real *, real *, integer *);
-    static logical initz, wantt;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical initz, wantt, wantz;
+    extern doublereal dlapy2_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
+
+    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
+	     integer *, doublereal *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern doublereal dlanhs_(char *, integer *, doublereal *, integer *,
+	    doublereal *);
+    extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *), dlacpy_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlaset_(char *,
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
 	    integer *);
-    static logical wantz;
-    extern doublereal slapy2_(real *, real *);
-    extern /* Subroutine */ int slabad_(real *, real *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
-	    real *);
-    extern integer isamax_(integer *, real *, integer *);
-    extern doublereal slanhs_(char *, integer *, real *, integer *, real *);
-    extern /* Subroutine */ int slahqr_(logical *, logical *, integer *,
-	    integer *, integer *, real *, integer *, real *, real *, integer *
-	    , integer *, real *, integer *, integer *), slacpy_(char *,
-	    integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *,
-	    real *, integer *), slarfx_(char *, integer *, integer *,
-	    real *, real *, real *, integer *, real *);
-    static real smlnum;
+    extern /* Subroutine */ int xerbla_(char *, integer *), dlarfx_(
+	    char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *, doublereal *);
+    static doublereal smlnum;
     static logical lquery;
 
 
@@ -71243,7 +7458,7 @@ L10:
     Purpose
     =======
 
-    SHSEQR computes the eigenvalues of a real upper Hessenberg matrix H
+    DHSEQR computes the eigenvalues of a real upper Hessenberg matrix H
     and, optionally, the matrices T and Z from the Schur decomposition
     H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur
     form), and Z is the orthogonal matrix of Schur vectors.
@@ -71274,13 +7489,13 @@ L10:
     IHI     (input) INTEGER
             It is assumed that H is already upper triangular in rows
             and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to SGEBAL, and then passed to SGEHRD
-            when the matrix output by SGEBAL is reduced to Hessenberg
+            set by a previous call to DGEBAL, and then passed to SGEHRD
+            when the matrix output by DGEBAL is reduced to Hessenberg
             form. Otherwise ILO and IHI should be set to 1 and N
             respectively.
             1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
 
-    H       (input/output) REAL array, dimension (LDH,N)
+    H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
             On entry, the upper Hessenberg matrix H.
             On exit, if JOB = 'S', H contains the upper quasi-triangular
             matrix T from the Schur decomposition (the Schur form);
@@ -71292,8 +7507,8 @@ L10:
     LDH     (input) INTEGER
             The leading dimension of the array H. LDH >= max(1,N).
 
-    WR      (output) REAL array, dimension (N)
-    WI      (output) REAL array, dimension (N)
+    WR      (output) DOUBLE PRECISION array, dimension (N)
+    WI      (output) DOUBLE PRECISION array, dimension (N)
             The real and imaginary parts, respectively, of the computed
             eigenvalues. If two eigenvalues are computed as a complex
             conjugate pair, they are stored in consecutive elements of
@@ -71304,21 +7519,21 @@ L10:
             diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and
             WI(i+1) = -WI(i).
 
-    Z       (input/output) REAL array, dimension (LDZ,N)
+    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
             If COMPZ = 'N': Z is not referenced.
             If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
             contains the orthogonal matrix Z of the Schur vectors of H.
             If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
             which is assumed to be equal to the unit matrix except for
             the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
-            Normally Q is the orthogonal matrix generated by SORGHR after
-            the call to SGEHRD which formed the Hessenberg matrix H.
+            Normally Q is the orthogonal matrix generated by DORGHR after
+            the call to DGEHRD which formed the Hessenberg matrix H.
 
     LDZ     (input) INTEGER
             The leading dimension of the array Z.
             LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -71332,7 +7547,7 @@ L10:
     INFO    (output) INTEGER
             = 0:  successful exit
             < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, SHSEQR failed to compute all of the
+            > 0:  if INFO = i, DHSEQR failed to compute all of the
                   eigenvalues in a total of 30*(IHI-ILO+1) iterations;
                   elements 1:ilo-1 and i+1:n of WR and WI contain those
                   eigenvalues which have been successfully computed.
@@ -71357,10 +7572,10 @@ L10:
     /* Function Body */
     wantt = lsame_(job, "S");
     initz = lsame_(compz, "I");
-    wantz = (initz) || (lsame_(compz, "V"));
+    wantz = initz || lsame_(compz, "V");
 
     *info = 0;
-    work[1] = (real) max(1,*n);
+    work[1] = (doublereal) max(1,*n);
     lquery = *lwork == -1;
     if (! lsame_(job, "E") && ! wantt) {
 	*info = -1;
@@ -71368,20 +7583,20 @@ L10:
 	*info = -2;
     } else if (*n < 0) {
 	*info = -3;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
 	*info = -4;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
 	*info = -5;
     } else if (*ldh < max(1,*n)) {
 	*info = -7;
-    } else if ((*ldz < 1) || (wantz && *ldz < max(1,*n))) {
+    } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) {
 	*info = -11;
     } else if (*lwork < max(1,*n) && ! lquery) {
 	*info = -13;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SHSEQR", &i__1);
+	xerbla_("DHSEQR", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -71390,21 +7605,21 @@ L10:
 /*     Initialize Z, if necessary */
 
     if (initz) {
-	slaset_("Full", n, n, &c_b320, &c_b1011, &z__[z_offset], ldz);
+	dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz);
     }
 
-/*     Store the eigenvalues isolated by SGEBAL. */
+/*     Store the eigenvalues isolated by DGEBAL. */
 
     i__1 = *ilo - 1;
     for (i__ = 1; i__ <= i__1; ++i__) {
 	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.f;
+	wi[i__] = 0.;
 /* L10: */
     }
     i__1 = *n;
     for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
 	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.f;
+	wi[i__] = 0.;
 /* L20: */
     }
 
@@ -71415,7 +7630,7 @@ L10:
     }
     if (*ilo == *ihi) {
 	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
-	wi[*ilo] = 0.f;
+	wi[*ilo] = 0.;
 	return 0;
     }
 
@@ -71428,7 +7643,7 @@ L10:
     for (j = *ilo; j <= i__1; ++j) {
 	i__2 = *n;
 	for (i__ = j + 2; i__ <= i__2; ++i__) {
-	    h__[i__ + j * h_dim1] = 0.f;
+	    h__[i__ + j * h_dim1] = 0.;
 /* L30: */
 	}
 /* L40: */
@@ -71443,19 +7658,19 @@ L10:
     i__3[0] = 1, a__1[0] = job;
     i__3[1] = 1, a__1[1] = compz;
     s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-    ns = ilaenv_(&c__4, "SHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+    ns = ilaenv_(&c__4, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
 	    ftnlen)2);
 /* Writing concatenation */
     i__3[0] = 1, a__1[0] = job;
     i__3[1] = 1, a__1[1] = compz;
     s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-    maxb = ilaenv_(&c__8, "SHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+    maxb = ilaenv_(&c__8, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
 	    ftnlen)2);
-    if (((ns <= 2) || (ns > nh)) || (maxb >= nh)) {
+    if (ns <= 2 || ns > nh || maxb >= nh) {
 
 /*        Use the standard double-shift algorithm */
 
-	slahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
+	dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
 		1], ilo, ihi, &z__[z_offset], ldz, info);
 	return 0;
     }
@@ -71471,10 +7686,10 @@ L10:
        If norm(H) <= sqrt(OVFL), overflow should not occur.
 */
 
-    unfl = slamch_("Safe minimum");
-    ovfl = 1.f / unfl;
-    slabad_(&unfl, &ovfl);
-    ulp = slamch_("Precision");
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
     smlnum = unfl * (nh / ulp);
 
 /*
@@ -71520,16 +7735,16 @@ L50:
 
 	i__2 = l + 1;
 	for (k = i__; k >= i__2; --k) {
-	    tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2
-		    = h__[k + k * h_dim1], dabs(r__2));
-	    if (tst1 == 0.f) {
+	    tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
+		     h__[k + k * h_dim1], abs(d__2));
+	    if (tst1 == 0.) {
 		i__4 = i__ - l + 1;
-		tst1 = slanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1]
+		tst1 = dlanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1]
 			);
 	    }
 /* Computing MAX */
-	    r__2 = ulp * tst1;
-	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2,
+	    d__2 = ulp * tst1;
+	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2,
 		    smlnum)) {
 		goto L70;
 	    }
@@ -71541,7 +7756,7 @@ L70:
 
 /*           H(L,L-1) is negligible. */
 
-	    h__[l + (l - 1) * h_dim1] = 0.f;
+	    h__[l + (l - 1) * h_dim1] = 0.;
 	}
 
 /*        Exit from loop if a submatrix of order <= MAXB has split off. */
@@ -71561,37 +7776,37 @@ L70:
 	    i2 = i__;
 	}
 
-	if ((its == 20) || (its == 30)) {
+	if (its == 20 || its == 30) {
 
 /*           Exceptional shifts. */
 
 	    i__2 = i__;
 	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
-		wr[ii] = ((r__1 = h__[ii + (ii - 1) * h_dim1], dabs(r__1)) + (
-			r__2 = h__[ii + ii * h_dim1], dabs(r__2))) * 1.5f;
-		wi[ii] = 0.f;
+		wr[ii] = ((d__1 = h__[ii + (ii - 1) * h_dim1], abs(d__1)) + (
+			d__2 = h__[ii + ii * h_dim1], abs(d__2))) * 1.5;
+		wi[ii] = 0.;
 /* L80: */
 	    }
 	} else {
 
 /*           Use eigenvalues of trailing submatrix of order NS as shifts. */
 
-	    slacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
+	    dlacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
 		    h_dim1], ldh, s, &c__15);
-	    slahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ -
+	    dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ -
 		    ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset],
 		    ldz, &ierr);
 	    if (ierr > 0) {
 
 /*
-                If SLAHQR failed to compute all NS eigenvalues, use the
+                If DLAHQR failed to compute all NS eigenvalues, use the
                 unconverged diagonal elements as the remaining shifts.
 */
 
 		i__2 = ierr;
 		for (ii = 1; ii <= i__2; ++ii) {
 		    wr[i__ - ns + ii] = s[ii + ii * 15 - 16];
-		    wi[i__ - ns + ii] = 0.f;
+		    wi[i__ - ns + ii] = 0.;
 /* L90: */
 		}
 	    }
@@ -71604,50 +7819,50 @@ L70:
           stored in the local array V.
 */
 
-	v[0] = 1.f;
+	v[0] = 1.;
 	i__2 = ns + 1;
 	for (ii = 2; ii <= i__2; ++ii) {
-	    v[ii - 1] = 0.f;
+	    v[ii - 1] = 0.;
 /* L100: */
 	}
 	nv = 1;
 	i__2 = i__;
 	for (j = i__ - ns + 1; j <= i__2; ++j) {
-	    if (wi[j] >= 0.f) {
-		if (wi[j] == 0.f) {
+	    if (wi[j] >= 0.) {
+		if (wi[j] == 0.) {
 
 /*                 real shift */
 
 		    i__4 = nv + 1;
-		    scopy_(&i__4, v, &c__1, vv, &c__1);
+		    dcopy_(&i__4, v, &c__1, vv, &c__1);
 		    i__4 = nv + 1;
-		    r__1 = -wr[j];
-		    sgemv_("No transpose", &i__4, &nv, &c_b1011, &h__[l + l *
-			    h_dim1], ldh, vv, &c__1, &r__1, v, &c__1);
+		    d__1 = -wr[j];
+		    dgemv_("No transpose", &i__4, &nv, &c_b15, &h__[l + l *
+			    h_dim1], ldh, vv, &c__1, &d__1, v, &c__1);
 		    ++nv;
-		} else if (wi[j] > 0.f) {
+		} else if (wi[j] > 0.) {
 
 /*                 complex conjugate pair of shifts */
 
 		    i__4 = nv + 1;
-		    scopy_(&i__4, v, &c__1, vv, &c__1);
+		    dcopy_(&i__4, v, &c__1, vv, &c__1);
 		    i__4 = nv + 1;
-		    r__1 = wr[j] * -2.f;
-		    sgemv_("No transpose", &i__4, &nv, &c_b1011, &h__[l + l *
-			    h_dim1], ldh, v, &c__1, &r__1, vv, &c__1);
+		    d__1 = wr[j] * -2.;
+		    dgemv_("No transpose", &i__4, &nv, &c_b15, &h__[l + l *
+			    h_dim1], ldh, v, &c__1, &d__1, vv, &c__1);
 		    i__4 = nv + 1;
-		    itemp = isamax_(&i__4, vv, &c__1);
+		    itemp = idamax_(&i__4, vv, &c__1);
 /* Computing MAX */
-		    r__2 = (r__1 = vv[itemp - 1], dabs(r__1));
-		    temp = 1.f / dmax(r__2,smlnum);
+		    d__2 = (d__1 = vv[itemp - 1], abs(d__1));
+		    temp = 1. / max(d__2,smlnum);
 		    i__4 = nv + 1;
-		    sscal_(&i__4, &temp, vv, &c__1);
-		    absw = slapy2_(&wr[j], &wi[j]);
+		    dscal_(&i__4, &temp, vv, &c__1);
+		    absw = dlapy2_(&wr[j], &wi[j]);
 		    temp = temp * absw * absw;
 		    i__4 = nv + 2;
 		    i__5 = nv + 1;
-		    sgemv_("No transpose", &i__4, &i__5, &c_b1011, &h__[l + l
-			    * h_dim1], ldh, vv, &c__1, &temp, v, &c__1);
+		    dgemv_("No transpose", &i__4, &i__5, &c_b15, &h__[l + l *
+			    h_dim1], ldh, vv, &c__1, &temp, v, &c__1);
 		    nv += 2;
 		}
 
@@ -71656,19 +7871,19 @@ L70:
                 reset it to the unit vector.
 */
 
-		itemp = isamax_(&nv, v, &c__1);
-		temp = (r__1 = v[itemp - 1], dabs(r__1));
-		if (temp == 0.f) {
-		    v[0] = 1.f;
+		itemp = idamax_(&nv, v, &c__1);
+		temp = (d__1 = v[itemp - 1], abs(d__1));
+		if (temp == 0.) {
+		    v[0] = 1.;
 		    i__4 = nv;
 		    for (ii = 2; ii <= i__4; ++ii) {
-			v[ii - 1] = 0.f;
+			v[ii - 1] = 0.;
 /* L110: */
 		    }
 		} else {
-		    temp = dmax(temp,smlnum);
-		    r__1 = 1.f / temp;
-		    sscal_(&nv, &r__1, v, &c__1);
+		    temp = max(temp,smlnum);
+		    d__1 = 1. / temp;
+		    dscal_(&nv, &d__1, v, &c__1);
 		}
 	    }
 /* L120: */
@@ -71694,18 +7909,18 @@ L70:
 	    i__4 = ns + 1, i__5 = i__ - k + 1;
 	    nr = min(i__4,i__5);
 	    if (k > l) {
-		scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+		dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
 	    }
-	    slarfg_(&nr, v, &v[1], &c__1, &tau);
+	    dlarfg_(&nr, v, &v[1], &c__1, &tau);
 	    if (k > l) {
 		h__[k + (k - 1) * h_dim1] = v[0];
 		i__4 = i__;
 		for (ii = k + 1; ii <= i__4; ++ii) {
-		    h__[ii + (k - 1) * h_dim1] = 0.f;
+		    h__[ii + (k - 1) * h_dim1] = 0.;
 /* L130: */
 		}
 	    }
-	    v[0] = 1.f;
+	    v[0] = 1.;
 
 /*
              Apply G from the left to transform the rows of the matrix in
@@ -71713,7 +7928,7 @@ L70:
 */
 
 	    i__4 = i2 - k + 1;
-	    slarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, &
+	    dlarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, &
 		    work[1]);
 
 /*
@@ -71724,14 +7939,14 @@ L70:
 */
 	    i__5 = k + nr;
 	    i__4 = min(i__5,i__) - i1 + 1;
-	    slarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
+	    dlarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
 		    &work[1]);
 
 	    if (wantz) {
 
 /*              Accumulate transformations in the matrix Z */
 
-		slarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
+		dlarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
 			ldz, &work[1]);
 	    }
 /* L140: */
@@ -71752,7 +7967,7 @@ L160:
        off. Use the double-shift QR algorithm to handle it.
 */
 
-    slahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1],
+    dlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1],
 	    ilo, ihi, &z__[z_offset], ldz, info);
     if (*info > 0) {
 	return 0;
@@ -71768,17 +7983,17 @@ L160:
     goto L50;
 
 L170:
-    work[1] = (real) max(1,*n);
+    work[1] = (doublereal) max(1,*n);
     return 0;
 
-/*     End of SHSEQR */
+/*     End of DHSEQR */
 
-} /* shseqr_ */
+} /* dhseqr_ */
 
-/* Subroutine */ int slabad_(real *small, real *large)
+/* Subroutine */ int dlabad_(doublereal *small, doublereal *large)
 {
     /* Builtin functions */
-    double r_lg10(real *), sqrt(doublereal);
+    double d_lg10(doublereal *), sqrt(doublereal);
 
 
 /*
@@ -71791,25 +8006,25 @@ L170:
     Purpose
     =======
 
-    SLABAD takes as input the values computed by SLAMCH for underflow and
+    DLABAD takes as input the values computed by DLAMCH for underflow and
     overflow, and returns the square root of each of these values if the
     log of LARGE is sufficiently large.  This subroutine is intended to
     identify machines with a large exponent range, such as the Crays, and
     redefine the underflow and overflow limits to be the square roots of
-    the values computed by SLAMCH.  This subroutine is needed because
-    SLAMCH does not compensate for poor arithmetic in the upper half of
+    the values computed by DLAMCH.  This subroutine is needed because
+    DLAMCH does not compensate for poor arithmetic in the upper half of
     the exponent range, as is found on a Cray.
 
     Arguments
     =========
 
-    SMALL   (input/output) REAL
-            On entry, the underflow threshold as computed by SLAMCH.
+    SMALL   (input/output) DOUBLE PRECISION
+            On entry, the underflow threshold as computed by DLAMCH.
             On exit, if LOG10(LARGE) is sufficiently large, the square
             root of SMALL, otherwise unchanged.
 
-    LARGE   (input/output) REAL
-            On entry, the overflow threshold as computed by SLAMCH.
+    LARGE   (input/output) DOUBLE PRECISION
+            On entry, the overflow threshold as computed by DLAMCH.
             On exit, if LOG10(LARGE) is sufficiently large, the square
             root of LARGE, otherwise unchanged.
 
@@ -71820,20 +8035,21 @@ L170:
        SMALL and LARGE to avoid overflow and underflow problems.
 */
 
-    if (r_lg10(large) > 2e3f) {
+    if (d_lg10(large) > 2e3) {
 	*small = sqrt(*small);
 	*large = sqrt(*large);
     }
 
     return 0;
 
-/*     End of SLABAD */
+/*     End of DLABAD */
 
-} /* slabad_ */
+} /* dlabad_ */
 
-/* Subroutine */ int slabrd_(integer *m, integer *n, integer *nb, real *a,
-	integer *lda, real *d__, real *e, real *tauq, real *taup, real *x,
-	integer *ldx, real *y, integer *ldy)
+/* Subroutine */ int dlabrd_(integer *m, integer *n, integer *nb, doublereal *
+	a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq,
+	doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer
+	*ldy)
 {
     /* System generated locals */
     integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2,
@@ -71841,10 +8057,11 @@ L170:
 
     /* Local variables */
     static integer i__;
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
-	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
-	    real *, integer *, real *, real *, integer *), slarfg_(
-	    integer *, real *, real *, integer *, real *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dgemv_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *), dlarfg_(integer *, doublereal *,
+	     doublereal *, integer *, doublereal *);
 
 
 /*
@@ -71857,7 +8074,7 @@ L170:
     Purpose
     =======
 
-    SLABRD reduces the first NB rows and columns of a real general
+    DLABRD reduces the first NB rows and columns of a real general
     m by n matrix A to upper or lower bidiagonal form by an orthogonal
     transformation Q' * A * P, and returns the matrices X and Y which
     are needed to apply the transformation to the unreduced part of A.
@@ -71865,7 +8082,7 @@ L170:
     If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
     bidiagonal form.
 
-    This is an auxiliary routine called by SGEBRD
+    This is an auxiliary routine called by DGEBRD
 
     Arguments
     =========
@@ -71879,7 +8096,7 @@ L170:
     NB      (input) INTEGER
             The number of leading rows and columns of A to be reduced.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the m by n general matrix to be reduced.
             On exit, the first NB rows and columns of the matrix are
             overwritten; the rest of the array is unchanged.
@@ -71900,30 +8117,30 @@ L170:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
 
-    D       (output) REAL array, dimension (NB)
+    D       (output) DOUBLE PRECISION array, dimension (NB)
             The diagonal elements of the first NB rows and columns of
             the reduced matrix.  D(i) = A(i,i).
 
-    E       (output) REAL array, dimension (NB)
+    E       (output) DOUBLE PRECISION array, dimension (NB)
             The off-diagonal elements of the first NB rows and columns of
             the reduced matrix.
 
-    TAUQ    (output) REAL array dimension (NB)
+    TAUQ    (output) DOUBLE PRECISION array dimension (NB)
             The scalar factors of the elementary reflectors which
             represent the orthogonal matrix Q. See Further Details.
 
-    TAUP    (output) REAL array, dimension (NB)
+    TAUP    (output) DOUBLE PRECISION array, dimension (NB)
             The scalar factors of the elementary reflectors which
             represent the orthogonal matrix P. See Further Details.
 
-    X       (output) REAL array, dimension (LDX,NB)
+    X       (output) DOUBLE PRECISION array, dimension (LDX,NB)
             The m-by-nb matrix X required to update the unreduced part
             of A.
 
     LDX     (input) INTEGER
             The leading dimension of the array X. LDX >= M.
 
-    Y       (output) REAL array, dimension (LDY,NB)
+    Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
             The n-by-nb matrix Y required to update the unreduced part
             of A.
 
@@ -71995,7 +8212,7 @@ L170:
     y -= y_offset;
 
     /* Function Body */
-    if ((*m <= 0) || (*n <= 0)) {
+    if (*m <= 0 || *n <= 0) {
 	return 0;
     }
 
@@ -72010,67 +8227,66 @@ L170:
 
 	    i__2 = *m - i__ + 1;
 	    i__3 = i__ - 1;
-	    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[i__ + a_dim1],
-		    lda, &y[i__ + y_dim1], ldy, &c_b1011, &a[i__ + i__ *
-		    a_dim1], &c__1);
+	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + a_dim1],
+		    lda, &y[i__ + y_dim1], ldy, &c_b15, &a[i__ + i__ * a_dim1]
+		    , &c__1);
 	    i__2 = *m - i__ + 1;
 	    i__3 = i__ - 1;
-	    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &x[i__ + x_dim1],
-		    ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b1011, &a[i__ + i__ *
-		     a_dim1], &c__1);
+	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + x_dim1],
+		    ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b15, &a[i__ + i__ *
+		    a_dim1], &c__1);
 
 /*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
 
 	    i__2 = *m - i__ + 1;
 /* Computing MIN */
 	    i__3 = i__ + 1;
-	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
 		    a_dim1], &c__1, &tauq[i__]);
 	    d__[i__] = a[i__ + i__ * a_dim1];
 	    if (i__ < *n) {
-		a[i__ + i__ * a_dim1] = 1.f;
+		a[i__ + i__ * a_dim1] = 1.;
 
 /*              Compute Y(i+1:n,i) */
 
 		i__2 = *m - i__ + 1;
 		i__3 = *n - i__;
-		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[i__ + (i__ + 1)
-			 * a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &
-			c_b320, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + (i__ + 1) *
+			 a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b29,
+			&y[i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *m - i__ + 1;
 		i__3 = i__ - 1;
-		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[i__ + a_dim1],
-			lda, &a[i__ + i__ * a_dim1], &c__1, &c_b320, &y[i__ *
+		dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + a_dim1],
+			lda, &a[i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ *
 			y_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &y[i__ + 1 +
-			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b1011, &
-			y[i__ + 1 + i__ * y_dim1], &c__1);
+		dgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b15, &y[
+			i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *m - i__ + 1;
 		i__3 = i__ - 1;
-		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &x[i__ + x_dim1],
-			ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b320, &y[i__ *
+		dgemv_("Transpose", &i__2, &i__3, &c_b15, &x[i__ + x_dim1],
+			ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ *
 			y_dim1 + 1], &c__1);
 		i__2 = i__ - 1;
 		i__3 = *n - i__;
-		sgemv_("Transpose", &i__2, &i__3, &c_b1290, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
-			c_b1011, &y[i__ + 1 + i__ * y_dim1], &c__1)
-			;
+		dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b15,
+			 &y[i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *n - i__;
-		sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+		dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
 
 /*              Update A(i,i+1:n) */
 
 		i__2 = *n - i__;
-		sgemv_("No transpose", &i__2, &i__, &c_b1290, &y[i__ + 1 +
-			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b1011, &a[i__
-			+ (i__ + 1) * a_dim1], lda);
+		dgemv_("No transpose", &i__2, &i__, &c_b151, &y[i__ + 1 +
+			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b15, &a[i__ +
+			(i__ + 1) * a_dim1], lda);
 		i__2 = i__ - 1;
 		i__3 = *n - i__;
-		sgemv_("Transpose", &i__2, &i__3, &c_b1290, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b1011, &a[
+		dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b15, &a[
 			i__ + (i__ + 1) * a_dim1], lda);
 
 /*              Generate reflection P(i) to annihilate A(i,i+2:n) */
@@ -72078,38 +8294,38 @@ L170:
 		i__2 = *n - i__;
 /* Computing MIN */
 		i__3 = i__ + 2;
-		slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
+		dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
 			i__3,*n) * a_dim1], lda, &taup[i__]);
 		e[i__] = a[i__ + (i__ + 1) * a_dim1];
-		a[i__ + (i__ + 1) * a_dim1] = 1.f;
+		a[i__ + (i__ + 1) * a_dim1] = 1.;
 
 /*              Compute X(i+1:m,i) */
 
 		i__2 = *m - i__;
 		i__3 = *n - i__;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1011, &a[i__ + 1 + (
+		dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + (
 			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
-			 lda, &c_b320, &x[i__ + 1 + i__ * x_dim1], &c__1);
+			 lda, &c_b29, &x[i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = *n - i__;
-		sgemv_("Transpose", &i__2, &i__, &c_b1011, &y[i__ + 1 +
-			y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b320, &x[i__ * x_dim1 + 1], &c__1);
+		dgemv_("Transpose", &i__2, &i__, &c_b15, &y[i__ + 1 + y_dim1],
+			 ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b29, &x[
+			i__ * x_dim1 + 1], &c__1);
 		i__2 = *m - i__;
-		sgemv_("No transpose", &i__2, &i__, &c_b1290, &a[i__ + 1 +
-			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b1011, &
-			x[i__ + 1 + i__ * x_dim1], &c__1);
+		dgemv_("No transpose", &i__2, &i__, &c_b151, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = i__ - 1;
 		i__3 = *n - i__;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1011, &a[(i__ + 1) *
+		dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) *
 			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b320, &x[i__ * x_dim1 + 1], &c__1);
+			c_b29, &x[i__ * x_dim1 + 1], &c__1);
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &x[i__ + 1 +
-			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b1011, &
-			x[i__ + 1 + i__ * x_dim1], &c__1);
+		dgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = *m - i__;
-		sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+		dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
 	    }
 /* L10: */
 	}
@@ -72124,119 +8340,117 @@ L170:
 
 	    i__2 = *n - i__ + 1;
 	    i__3 = i__ - 1;
-	    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &y[i__ + y_dim1],
-		    ldy, &a[i__ + a_dim1], lda, &c_b1011, &a[i__ + i__ *
-		    a_dim1], lda);
+	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + y_dim1],
+		    ldy, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1]
+		    , lda);
 	    i__2 = i__ - 1;
 	    i__3 = *n - i__ + 1;
-	    sgemv_("Transpose", &i__2, &i__3, &c_b1290, &a[i__ * a_dim1 + 1],
-		    lda, &x[i__ + x_dim1], ldx, &c_b1011, &a[i__ + i__ *
-		    a_dim1], lda);
+	    dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[i__ * a_dim1 + 1],
+		    lda, &x[i__ + x_dim1], ldx, &c_b15, &a[i__ + i__ * a_dim1]
+		    , lda);
 
 /*           Generate reflection P(i) to annihilate A(i,i+1:n) */
 
 	    i__2 = *n - i__ + 1;
 /* Computing MIN */
 	    i__3 = i__ + 1;
-	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
 		    a_dim1], lda, &taup[i__]);
 	    d__[i__] = a[i__ + i__ * a_dim1];
 	    if (i__ < *m) {
-		a[i__ + i__ * a_dim1] = 1.f;
+		a[i__ + i__ * a_dim1] = 1.;
 
 /*              Compute X(i+1:m,i) */
 
 		i__2 = *m - i__;
 		i__3 = *n - i__ + 1;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1011, &a[i__ + 1 +
-			i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &
-			c_b320, &x[i__ + 1 + i__ * x_dim1], &c__1)
-			;
+		dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + i__
+			* a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b29, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = *n - i__ + 1;
 		i__3 = i__ - 1;
-		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &y[i__ + y_dim1],
-			ldy, &a[i__ + i__ * a_dim1], lda, &c_b320, &x[i__ *
+		dgemv_("Transpose", &i__2, &i__3, &c_b15, &y[i__ + y_dim1],
+			ldy, &a[i__ + i__ * a_dim1], lda, &c_b29, &x[i__ *
 			x_dim1 + 1], &c__1);
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[i__ + 1 +
-			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b1011, &
-			x[i__ + 1 + i__ * x_dim1], &c__1);
+		dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = i__ - 1;
 		i__3 = *n - i__ + 1;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1011, &a[i__ *
-			a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, &
-			c_b320, &x[i__ * x_dim1 + 1], &c__1);
+		dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ * a_dim1
+			+ 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b29, &x[
+			i__ * x_dim1 + 1], &c__1);
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &x[i__ + 1 +
-			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b1011, &
-			x[i__ + 1 + i__ * x_dim1], &c__1);
+		dgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = *m - i__;
-		sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+		dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
 
 /*              Update A(i+1:m,i) */
 
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[i__ + 1 +
-			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b1011, &a[i__
-			+ 1 + i__ * a_dim1], &c__1);
+		dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 +
+			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b15, &a[i__ +
+			1 + i__ * a_dim1], &c__1);
 		i__2 = *m - i__;
-		sgemv_("No transpose", &i__2, &i__, &c_b1290, &x[i__ + 1 +
-			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b1011, &
-			a[i__ + 1 + i__ * a_dim1], &c__1);
+		dgemv_("No transpose", &i__2, &i__, &c_b151, &x[i__ + 1 +
+			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b15, &a[
+			i__ + 1 + i__ * a_dim1], &c__1);
 
 /*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
 
 		i__2 = *m - i__;
 /* Computing MIN */
 		i__3 = i__ + 2;
-		slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
+		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
 			i__ * a_dim1], &c__1, &tauq[i__]);
 		e[i__] = a[i__ + 1 + i__ * a_dim1];
-		a[i__ + 1 + i__ * a_dim1] = 1.f;
+		a[i__ + 1 + i__ * a_dim1] = 1.;
 
 /*              Compute Y(i+1:n,i) */
 
 		i__2 = *m - i__;
 		i__3 = *n - i__;
-		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[i__ + 1 + (i__
-			+ 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &
-			c__1, &c_b320, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + (i__ +
+			1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1,
+			&c_b29, &y[i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
-		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[i__ + 1 +
-			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b320, &y[i__ * y_dim1 + 1], &c__1);
+		dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1]
+			, lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &y[
+			i__ * y_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &y[i__ + 1 +
-			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b1011, &
-			y[i__ + 1 + i__ * y_dim1], &c__1);
+		dgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b15, &y[
+			i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *m - i__;
-		sgemv_("Transpose", &i__2, &i__, &c_b1011, &x[i__ + 1 +
-			x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b320, &y[i__ * y_dim1 + 1], &c__1);
+		dgemv_("Transpose", &i__2, &i__, &c_b15, &x[i__ + 1 + x_dim1],
+			 ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &y[
+			i__ * y_dim1 + 1], &c__1);
 		i__2 = *n - i__;
-		sgemv_("Transpose", &i__, &i__2, &c_b1290, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
-			c_b1011, &y[i__ + 1 + i__ * y_dim1], &c__1)
-			;
+		dgemv_("Transpose", &i__, &i__2, &c_b151, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b15,
+			 &y[i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *n - i__;
-		sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+		dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
 	    }
 /* L20: */
 	}
     }
     return 0;
 
-/*     End of SLABRD */
+/*     End of DLABRD */
 
-} /* slabrd_ */
+} /* dlabrd_ */
 
-/* Subroutine */ int slacpy_(char *uplo, integer *m, integer *n, real *a,
-	integer *lda, real *b, integer *ldb)
+/* Subroutine */ int dlacpy_(char *uplo, integer *m, integer *n, doublereal *
+	a, integer *lda, doublereal *b, integer *ldb)
 {
     /* System generated locals */
     integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
@@ -72256,7 +8470,7 @@ L170:
     Purpose
     =======
 
-    SLACPY copies all or part of a two-dimensional matrix A to another
+    DLACPY copies all or part of a two-dimensional matrix A to another
     matrix B.
 
     Arguments
@@ -72274,7 +8488,7 @@ L170:
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input) REAL array, dimension (LDA,N)
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
             The m by n matrix A.  If UPLO = 'U', only the upper triangle
             or trapezoid is accessed; if UPLO = 'L', only the lower
             triangle or trapezoid is accessed.
@@ -72282,7 +8496,7 @@ L170:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
 
-    B       (output) REAL array, dimension (LDB,N)
+    B       (output) DOUBLE PRECISION array, dimension (LDB,N)
             On exit, B = A in the locations specified by UPLO.
 
     LDB     (input) INTEGER
@@ -72334,14 +8548,14 @@ L170:
     }
     return 0;
 
-/*     End of SLACPY */
+/*     End of DLACPY */
 
-} /* slacpy_ */
+} /* dlacpy_ */
 
-/* Subroutine */ int sladiv_(real *a, real *b, real *c__, real *d__, real *p,
-	real *q)
+/* Subroutine */ int dladiv_(doublereal *a, doublereal *b, doublereal *c__,
+	doublereal *d__, doublereal *p, doublereal *q)
 {
-    static real e, f;
+    static doublereal e, f;
 
 
 /*
@@ -72354,7 +8568,7 @@ L170:
     Purpose
     =======
 
-    SLADIV performs complex division in  real arithmetic
+    DLADIV performs complex division in  real arithmetic
 
                           a + i*b
                p + i*q = ---------
@@ -72366,21 +8580,21 @@ L170:
     Arguments
     =========
 
-    A       (input) REAL
-    B       (input) REAL
-    C       (input) REAL
-    D       (input) REAL
+    A       (input) DOUBLE PRECISION
+    B       (input) DOUBLE PRECISION
+    C       (input) DOUBLE PRECISION
+    D       (input) DOUBLE PRECISION
             The scalars a, b, c, and d in the above expression.
 
-    P       (output) REAL
-    Q       (output) REAL
+    P       (output) DOUBLE PRECISION
+    Q       (output) DOUBLE PRECISION
             The scalars p and q in the above expression.
 
     =====================================================================
 */
 
 
-    if (dabs(*d__) < dabs(*c__)) {
+    if (abs(*d__) < abs(*c__)) {
 	e = *d__ / *c__;
 	f = *c__ + *d__ * e;
 	*p = (*a + *b * e) / f;
@@ -72394,20 +8608,21 @@ L170:
 
     return 0;
 
-/*     End of SLADIV */
+/*     End of DLADIV */
 
-} /* sladiv_ */
+} /* dladiv_ */
 
-/* Subroutine */ int slae2_(real *a, real *b, real *c__, real *rt1, real *rt2)
+/* Subroutine */ int dlae2_(doublereal *a, doublereal *b, doublereal *c__,
+	doublereal *rt1, doublereal *rt2)
 {
     /* System generated locals */
-    real r__1;
+    doublereal d__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static real ab, df, tb, sm, rt, adf, acmn, acmx;
+    static doublereal ab, df, tb, sm, rt, adf, acmn, acmx;
 
 
 /*
@@ -72420,7 +8635,7 @@ L170:
     Purpose
     =======
 
-    SLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
+    DLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
        [  A   B  ]
        [  B   C  ].
     On return, RT1 is the eigenvalue of larger absolute value, and RT2
@@ -72429,19 +8644,19 @@ L170:
     Arguments
     =========
 
-    A       (input) REAL
+    A       (input) DOUBLE PRECISION
             The (1,1) element of the 2-by-2 matrix.
 
-    B       (input) REAL
+    B       (input) DOUBLE PRECISION
             The (1,2) and (2,1) elements of the 2-by-2 matrix.
 
-    C       (input) REAL
+    C       (input) DOUBLE PRECISION
             The (2,2) element of the 2-by-2 matrix.
 
-    RT1     (output) REAL
+    RT1     (output) DOUBLE PRECISION
             The eigenvalue of larger absolute value.
 
-    RT2     (output) REAL
+    RT2     (output) DOUBLE PRECISION
             The eigenvalue of smaller absolute value.
 
     Further Details
@@ -72466,10 +8681,10 @@ L170:
 
     sm = *a + *c__;
     df = *a - *c__;
-    adf = dabs(df);
+    adf = abs(df);
     tb = *b + *b;
-    ab = dabs(tb);
-    if (dabs(*a) > dabs(*c__)) {
+    ab = abs(tb);
+    if (abs(*a) > abs(*c__)) {
 	acmx = *a;
 	acmn = *c__;
     } else {
@@ -72478,20 +8693,20 @@ L170:
     }
     if (adf > ab) {
 /* Computing 2nd power */
-	r__1 = ab / adf;
-	rt = adf * sqrt(r__1 * r__1 + 1.f);
+	d__1 = ab / adf;
+	rt = adf * sqrt(d__1 * d__1 + 1.);
     } else if (adf < ab) {
 /* Computing 2nd power */
-	r__1 = adf / ab;
-	rt = ab * sqrt(r__1 * r__1 + 1.f);
+	d__1 = adf / ab;
+	rt = ab * sqrt(d__1 * d__1 + 1.);
     } else {
 
 /*        Includes case AB=ADF=0 */
 
-	rt = ab * sqrt(2.f);
+	rt = ab * sqrt(2.);
     }
-    if (sm < 0.f) {
-	*rt1 = (sm - rt) * .5f;
+    if (sm < 0.) {
+	*rt1 = (sm - rt) * .5;
 
 /*
           Order of execution important.
@@ -72500,8 +8715,8 @@ L170:
 */
 
 	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else if (sm > 0.f) {
-	*rt1 = (sm + rt) * .5f;
+    } else if (sm > 0.) {
+	*rt1 = (sm + rt) * .5;
 
 /*
           Order of execution important.
@@ -72514,22 +8729,23 @@ L170:
 
 /*        Includes case RT1 = RT2 = 0 */
 
-	*rt1 = rt * .5f;
-	*rt2 = rt * -.5f;
+	*rt1 = rt * .5;
+	*rt2 = rt * -.5;
     }
     return 0;
 
-/*     End of SLAE2 */
+/*     End of DLAE2 */
 
-} /* slae2_ */
+} /* dlae2_ */
 
-/* Subroutine */ int slaed0_(integer *icompq, integer *qsiz, integer *n, real
-	*d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs,
-	real *work, integer *iwork, integer *info)
+/* Subroutine */ int dlaed0_(integer *icompq, integer *qsiz, integer *n,
+	doublereal *d__, doublereal *e, doublereal *q, integer *ldq,
+	doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork,
+	integer *info)
 {
     /* System generated locals */
     integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
-    real r__1;
+    doublereal d__1;
 
     /* Builtin functions */
     double log(doublereal);
@@ -72537,31 +8753,35 @@ L170:
 
     /* Local variables */
     static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2;
-    static real temp;
+    static doublereal temp;
     static integer curr;
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
-    static integer iperm, indxq, iwrem;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *);
-    static integer iqptr, tlvls;
-    extern /* Subroutine */ int slaed1_(integer *, real *, real *, integer *,
-	    integer *, real *, integer *, real *, integer *, integer *),
-	    slaed7_(integer *, integer *, integer *, integer *, integer *,
-	    integer *, real *, real *, integer *, integer *, real *, integer *
-	    , real *, integer *, integer *, integer *, integer *, integer *,
-	    real *, real *, integer *, integer *);
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer iperm;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer indxq, iwrem;
+    extern /* Subroutine */ int dlaed1_(integer *, doublereal *, doublereal *,
+	     integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *);
+    static integer iqptr;
+    extern /* Subroutine */ int dlaed7_(integer *, integer *, integer *,
+	    integer *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, integer *, integer *, doublereal
+	    *, doublereal *, integer *, integer *);
+    static integer tlvls;
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *);
     static integer igivcl;
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    static integer igivnm, submat;
-    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
-	    integer *, real *, integer *);
-    static integer curprb, subpbs, igivpt, curlvl, matsiz, iprmpt, smlsiz;
-    extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *,
-	    real *, integer *, real *, integer *);
+    static integer igivnm, submat, curprb, subpbs, igivpt;
+    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *);
+    static integer curlvl, matsiz, iprmpt, smlsiz;
 
 
 /*
@@ -72574,7 +8794,7 @@ L170:
     Purpose
     =======
 
-    SLAED0 computes all eigenvalues and corresponding eigenvectors of a
+    DLAED0 computes all eigenvalues and corresponding eigenvectors of a
     symmetric tridiagonal matrix using the divide and conquer method.
 
     Arguments
@@ -72595,15 +8815,15 @@ L170:
     N      (input) INTEGER
            The dimension of the symmetric tridiagonal matrix.  N >= 0.
 
-    D      (input/output) REAL array, dimension (N)
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
            On entry, the main diagonal of the tridiagonal matrix.
            On exit, its eigenvalues.
 
-    E      (input) REAL array, dimension (N-1)
+    E      (input) DOUBLE PRECISION array, dimension (N-1)
            The off-diagonal elements of the tridiagonal matrix.
            On exit, E has been destroyed.
 
-    Q      (input/output) REAL array, dimension (LDQ, N)
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
            On entry, Q must contain an N-by-N orthogonal matrix.
            If ICOMPQ = 0    Q is not referenced.
            If ICOMPQ = 1    On entry, Q is a subset of the columns of the
@@ -72619,7 +8839,7 @@ L170:
            The leading dimension of the array Q.  If eigenvectors are
            desired, then  LDQ >= max(1,N).  In any case,  LDQ >= 1.
 
-    QSTORE (workspace) REAL array, dimension (LDQS, N)
+    QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N)
            Referenced only when ICOMPQ = 1.  Used to store parts of
            the eigenvector matrix when the updating matrix multiplies
            take place.
@@ -72628,7 +8848,7 @@ L170:
            The leading dimension of the array QSTORE.  If ICOMPQ = 1,
            then  LDQS >= max(1,N).  In any case,  LDQS >= 1.
 
-    WORK   (workspace) REAL array,
+    WORK   (workspace) DOUBLE PRECISION array,
            If ICOMPQ = 0 or 1, the dimension of WORK must be at least
                        1 + 3*N + 2*N*lg N + 2*N**2
                        ( lg( N ) = smallest integer k
@@ -72679,7 +8899,7 @@ L170:
     /* Function Body */
     *info = 0;
 
-    if ((*icompq < 0) || (*icompq > 2)) {
+    if (*icompq < 0 || *icompq > 2) {
 	*info = -1;
     } else if (*icompq == 1 && *qsiz < max(0,*n)) {
 	*info = -2;
@@ -72692,7 +8912,7 @@ L170:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLAED0", &i__1);
+	xerbla_("DLAED0", &i__1);
 	return 0;
     }
 
@@ -72702,7 +8922,7 @@ L170:
 	return 0;
     }
 
-    smlsiz = ilaenv_(&c__9, "SLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
+    smlsiz = ilaenv_(&c__9, "DLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
 	    ftnlen)6, (ftnlen)1);
 
 /*
@@ -72717,7 +8937,7 @@ L10:
     if (iwork[subpbs] > smlsiz) {
 	for (j = subpbs; j >= 1; --j) {
 	    iwork[j * 2] = (iwork[j] + 1) / 2;
-	    iwork[((j) << (1)) - 1] = iwork[j] / 2;
+	    iwork[(j << 1) - 1] = iwork[j] / 2;
 /* L20: */
 	}
 	++tlvls;
@@ -72740,12 +8960,12 @@ L10:
     for (i__ = 1; i__ <= i__1; ++i__) {
 	submat = iwork[i__] + 1;
 	smm1 = submat - 1;
-	d__[smm1] -= (r__1 = e[smm1], dabs(r__1));
-	d__[submat] -= (r__1 = e[smm1], dabs(r__1));
+	d__[smm1] -= (d__1 = e[smm1], abs(d__1));
+	d__[submat] -= (d__1 = e[smm1], abs(d__1));
 /* L40: */
     }
 
-    indxq = ((*n) << (2)) + 3;
+    indxq = (*n << 2) + 3;
     if (*icompq != 2) {
 
 /*
@@ -72753,7 +8973,7 @@ L10:
           routine
 */
 
-	temp = log((real) (*n)) / log(2.f);
+	temp = log((doublereal) (*n)) / log(2.);
 	lgn = (integer) temp;
 	if (pow_ii(&c__2, &lgn) < *n) {
 	    ++lgn;
@@ -72768,7 +8988,7 @@ L10:
 	igivcl = igivpt + *n * lgn;
 
 	igivnm = 1;
-	iq = igivnm + ((*n) << (1)) * lgn;
+	iq = igivnm + (*n << 1) * lgn;
 /* Computing 2nd power */
 	i__1 = *n;
 	iwrem = iq + i__1 * i__1 + 1;
@@ -72800,22 +9020,22 @@ L10:
 	    matsiz = iwork[i__ + 1] - iwork[i__];
 	}
 	if (*icompq == 2) {
-	    ssteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat +
+	    dsteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat +
 		    submat * q_dim1], ldq, &work[1], info);
 	    if (*info != 0) {
 		goto L130;
 	    }
 	} else {
-	    ssteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 +
+	    dsteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 +
 		    iwork[iqptr + curr]], &matsiz, &work[1], info);
 	    if (*info != 0) {
 		goto L130;
 	    }
 	    if (*icompq == 1) {
-		sgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b1011, &q[submat *
-			 q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]]
-			, &matsiz, &c_b320, &qstore[submat * qstore_dim1 + 1],
-			 ldqs);
+		dgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b15, &q[submat *
+			q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]],
+			 &matsiz, &c_b29, &qstore[submat * qstore_dim1 + 1],
+			ldqs);
 	    }
 /* Computing 2nd power */
 	    i__2 = matsiz;
@@ -72860,19 +9080,19 @@ L80:
 /*
        Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
        into an eigensystem of size MATSIZ.
-       SLAED1 is used only for the full eigensystem of a tridiagonal
+       DLAED1 is used only for the full eigensystem of a tridiagonal
        matrix.
-       SLAED7 handles the cases in which eigenvalues only or eigenvalues
+       DLAED7 handles the cases in which eigenvalues only or eigenvalues
        and eigenvectors of a full symmetric matrix (which was reduced to
        tridiagonal form) are desired.
 */
 
 	    if (*icompq == 2) {
-		slaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1],
+		dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1],
 			ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], &
 			msd2, &work[1], &iwork[subpbs + 1], info);
 	    } else {
-		slaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[
+		dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[
 			submat], &qstore[submat * qstore_dim1 + 1], ldqs, &
 			iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &
 			work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm]
@@ -72902,21 +9122,21 @@ L80:
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    j = iwork[indxq + i__];
 	    work[i__] = d__[j];
-	    scopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1
+	    dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1
 		    + 1], &c__1);
 /* L100: */
 	}
-	scopy_(n, &work[1], &c__1, &d__[1], &c__1);
+	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
     } else if (*icompq == 2) {
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    j = iwork[indxq + i__];
 	    work[i__] = d__[j];
-	    scopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
+	    dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
 /* L110: */
 	}
-	scopy_(n, &work[1], &c__1, &d__[1], &c__1);
-	slacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq);
+	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
+	dlacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq);
     } else {
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
@@ -72924,7 +9144,7 @@ L80:
 	    work[i__] = d__[j];
 /* L120: */
 	}
-	scopy_(n, &work[1], &c__1, &d__[1], &c__1);
+	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
     }
     goto L140;
 
@@ -72934,29 +9154,32 @@ L130:
 L140:
     return 0;
 
-/*     End of SLAED0 */
+/*     End of DLAED0 */
 
-} /* slaed0_ */
+} /* dlaed0_ */
 
-/* Subroutine */ int slaed1_(integer *n, real *d__, real *q, integer *ldq,
-	integer *indxq, real *rho, integer *cutpnt, real *work, integer *
-	iwork, integer *info)
+/* Subroutine */ int dlaed1_(integer *n, doublereal *d__, doublereal *q,
+	integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt,
+	doublereal *work, integer *iwork, integer *info)
 {
     /* System generated locals */
     integer q_dim1, q_offset, i__1, i__2;
 
     /* Local variables */
-    static integer i__, k, n1, n2, is, iw, iz, iq2, cpp1, indx, indxc, indxp;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *), slaed2_(integer *, integer *, integer *, real *, real
-	    *, integer *, integer *, real *, real *, real *, real *, real *,
-	    integer *, integer *, integer *, integer *, integer *), slaed3_(
-	    integer *, integer *, integer *, real *, real *, integer *, real *
-	    , real *, real *, integer *, integer *, real *, real *, integer *)
-	    ;
+    static integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer indxp;
+    extern /* Subroutine */ int dlaed2_(integer *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *, integer *, integer *, integer *), dlaed3_(integer *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, integer *,
+	    doublereal *, doublereal *, integer *);
     static integer idlmda;
-    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
-	    integer *, integer *, real *, integer *, integer *, integer *);
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *);
     static integer coltyp;
 
 
@@ -72970,10 +9193,10 @@ L140:
     Purpose
     =======
 
-    SLAED1 computes the updated eigensystem of a diagonal
+    DLAED1 computes the updated eigensystem of a diagonal
     matrix after modification by a rank-one symmetric matrix.  This
     routine is used only for the eigenproblem which requires all
-    eigenvalues and eigenvectors of a tridiagonal matrix.  SLAED7 handles
+    eigenvalues and eigenvectors of a tridiagonal matrix.  DLAED7 handles
     the case in which eigenvalues only or eigenvalues and eigenvectors
     of a full symmetric matrix (which was reduced to tridiagonal form)
     are desired.
@@ -72990,11 +9213,11 @@ L140:
           when there are multiple eigenvalues or if there is a zero in
           the Z vector.  For each such occurence the dimension of the
           secular equation problem is reduced by one.  This stage is
-          performed by the routine SLAED2.
+          performed by the routine DLAED2.
 
           The second stage consists of calculating the updated
           eigenvalues. This is done by finding the roots of the secular
-          equation via the routine SLAED4 (as called by SLAED3).
+          equation via the routine DLAED4 (as called by DLAED3).
           This routine also calculates the eigenvectors of the current
           problem.
 
@@ -73009,11 +9232,11 @@ L140:
     N      (input) INTEGER
            The dimension of the symmetric tridiagonal matrix.  N >= 0.
 
-    D      (input/output) REAL array, dimension (N)
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
            On entry, the eigenvalues of the rank-1-perturbed matrix.
            On exit, the eigenvalues of the repaired matrix.
 
-    Q      (input/output) REAL array, dimension (LDQ,N)
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
            On entry, the eigenvectors of the rank-1-perturbed matrix.
            On exit, the eigenvectors of the repaired tridiagonal matrix.
 
@@ -73027,14 +9250,14 @@ L140:
            subproblems back into sorted order,
            i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
 
-    RHO    (input) REAL
+    RHO    (input) DOUBLE PRECISION
            The subdiagonal entry used to create the rank-1 modification.
 
     CUTPNT (input) INTEGER
            The location of the last eigenvalue in the leading sub-matrix.
            min(1,N) <= CUTPNT <= N/2.
 
-    WORK   (workspace) REAL array, dimension (4*N + N**2)
+    WORK   (workspace) DOUBLE PRECISION array, dimension (4*N + N**2)
 
     IWORK  (workspace) INTEGER array, dimension (4*N)
 
@@ -73076,13 +9299,13 @@ L140:
     } else /* if(complicated condition) */ {
 /* Computing MIN */
 	i__1 = 1, i__2 = *n / 2;
-	if ((min(i__1,i__2) > *cutpnt) || (*n / 2 < *cutpnt)) {
+	if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) {
 	    *info = -7;
 	}
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLAED1", &i__1);
+	xerbla_("DLAED1", &i__1);
 	return 0;
     }
 
@@ -73095,7 +9318,7 @@ L140:
 /*
        The following values are integer pointers which indicate
        the portion of the workspace
-       used by a particular array in SLAED2 and SLAED3.
+       used by a particular array in DLAED2 and DLAED3.
 */
 
     iz = 1;
@@ -73114,14 +9337,14 @@ L140:
        first row of Q_2.
 */
 
-    scopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
-    cpp1 = *cutpnt + 1;
+    dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
+    zpp1 = *cutpnt + 1;
     i__1 = *n - *cutpnt;
-    scopy_(&i__1, &q[cpp1 + cpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);
+    dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);
 
 /*     Deflate eigenvalues. */
 
-    slaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[
+    dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[
 	    iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[
 	    indxc], &iwork[indxp], &iwork[coltyp], info);
 
@@ -73134,7 +9357,7 @@ L140:
     if (k != 0) {
 	is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp +
 		1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
-	slaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda],
+	dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda],
 		 &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[
 		is], info);
 	if (*info != 0) {
@@ -73145,7 +9368,7 @@ L140:
 
 	n1 = k;
 	n2 = *n - k;
-	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+	dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
     } else {
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
@@ -73157,39 +9380,41 @@ L140:
 L20:
     return 0;
 
-/*     End of SLAED1 */
+/*     End of DLAED1 */
 
-} /* slaed1_ */
+} /* dlaed1_ */
 
-/* Subroutine */ int slaed2_(integer *k, integer *n, integer *n1, real *d__,
-	real *q, integer *ldq, integer *indxq, real *rho, real *z__, real *
-	dlamda, real *w, real *q2, integer *indx, integer *indxc, integer *
-	indxp, integer *coltyp, integer *info)
+/* Subroutine */ int dlaed2_(integer *k, integer *n, integer *n1, doublereal *
+	d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho,
+	doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2,
+	integer *indx, integer *indxc, integer *indxp, integer *coltyp,
+	integer *info)
 {
     /* System generated locals */
     integer q_dim1, q_offset, i__1, i__2;
-    real r__1, r__2, r__3, r__4;
+    doublereal d__1, d__2, d__3, d__4;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static real c__;
+    static doublereal c__;
     static integer i__, j;
-    static real s, t;
+    static doublereal s, t;
     static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
-    static real eps, tau, tol;
-    static integer psm[4], imax, jmax, ctot[4];
-    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
-	    integer *, real *, real *), sscal_(integer *, real *, real *,
-	    integer *), scopy_(integer *, real *, integer *, real *, integer *
-	    );
-    extern doublereal slapy2_(real *, real *), slamch_(char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer isamax_(integer *, real *, integer *);
-    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
-	    *, integer *, integer *), slacpy_(char *, integer *, integer *,
-	    real *, integer *, real *, integer *);
+    static doublereal eps, tau, tol;
+    static integer psm[4], imax, jmax;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer ctot[4];
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dcopy_(integer *, doublereal *, integer *, doublereal
+	    *, integer *);
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
 
 
 /*
@@ -73202,7 +9427,7 @@ L20:
     Purpose
     =======
 
-    SLAED2 merges the two sets of eigenvalues together into a single
+    DLAED2 merges the two sets of eigenvalues together into a single
     sorted set.  Then it tries to deflate the size of the problem.
     There are two ways in which deflation can occur:  when two or more
     eigenvalues are close together or if there is a tiny entry in the
@@ -73223,13 +9448,13 @@ L20:
            The location of the last eigenvalue in the leading sub-matrix.
            min(1,N) <= N1 <= N/2.
 
-    D      (input/output) REAL array, dimension (N)
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
            On entry, D contains the eigenvalues of the two submatrices to
            be combined.
            On exit, D contains the trailing (N-K) updated eigenvalues
            (those which were deflated) sorted into increasing order.
 
-    Q      (input/output) REAL array, dimension (LDQ, N)
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
            On entry, Q contains the eigenvectors of two submatrices in
            the two square blocks with corners at (1,1), (N1,N1)
            and (N1+1, N1+1), (N,N).
@@ -73245,31 +9470,31 @@ L20:
            half of this permutation must first have N1 added to their
            values. Destroyed on exit.
 
-    RHO    (input/output) REAL
+    RHO    (input/output) DOUBLE PRECISION
            On entry, the off-diagonal element associated with the rank-1
            cut which originally split the two submatrices which are now
            being recombined.
            On exit, RHO has been modified to the value required by
-           SLAED3.
+           DLAED3.
 
-    Z      (input) REAL array, dimension (N)
+    Z      (input) DOUBLE PRECISION array, dimension (N)
            On entry, Z contains the updating vector (the last
            row of the first sub-eigenvector matrix and the first row of
            the second sub-eigenvector matrix).
            On exit, the contents of Z have been destroyed by the updating
            process.
 
-    DLAMDA (output) REAL array, dimension (N)
+    DLAMDA (output) DOUBLE PRECISION array, dimension (N)
            A copy of the first K eigenvalues which will be used by
-           SLAED3 to form the secular equation.
+           DLAED3 to form the secular equation.
 
-    W      (output) REAL array, dimension (N)
+    W      (output) DOUBLE PRECISION array, dimension (N)
            The first k values of the final deflation-altered z-vector
-           which will be passed to SLAED3.
+           which will be passed to DLAED3.
 
-    Q2     (output) REAL array, dimension (N1**2+(N-N1)**2)
+    Q2     (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
            A copy of the first K eigenvectors which will be used by
-           SLAED3 in a matrix multiply (SGEMM) to solve for the new
+           DLAED3 in a matrix multiply (DGEMM) to solve for the new
            eigenvectors.
 
     INDX   (workspace) INTEGER array, dimension (N)
@@ -73340,13 +9565,13 @@ L20:
     } else /* if(complicated condition) */ {
 /* Computing MIN */
 	i__1 = 1, i__2 = *n / 2;
-	if ((min(i__1,i__2) > *n1) || (*n / 2 < *n1)) {
+	if (min(i__1,i__2) > *n1 || *n / 2 < *n1) {
 	    *info = -3;
 	}
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLAED2", &i__1);
+	xerbla_("DLAED2", &i__1);
 	return 0;
     }
 
@@ -73359,8 +9584,8 @@ L20:
     n2 = *n - *n1;
     n1p1 = *n1 + 1;
 
-    if (*rho < 0.f) {
-	sscal_(&n2, &c_b1290, &z__[n1p1], &c__1);
+    if (*rho < 0.) {
+	dscal_(&n2, &c_b151, &z__[n1p1], &c__1);
     }
 
 /*
@@ -73368,12 +9593,12 @@ L20:
        two normalized vectors, norm2(z) = sqrt(2).
 */
 
-    t = 1.f / sqrt(2.f);
-    sscal_(n, &t, &z__[1], &c__1);
+    t = 1. / sqrt(2.);
+    dscal_(n, &t, &z__[1], &c__1);
 
 /*     RHO = ABS( norm(z)**2 * RHO ) */
 
-    *rho = (r__1 = *rho * 2.f, dabs(r__1));
+    *rho = (d__1 = *rho * 2., abs(d__1));
 
 /*     Sort the eigenvalues into increasing order */
 
@@ -73390,7 +9615,7 @@ L20:
 	dlamda[i__] = d__[indxq[i__]];
 /* L20: */
     }
-    slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
+    dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
     i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
 	indx[i__] = indxq[indxc[i__]];
@@ -73399,13 +9624,13 @@ L20:
 
 /*     Calculate the allowable deflation tolerance */
 
-    imax = isamax_(n, &z__[1], &c__1);
-    jmax = isamax_(n, &d__[1], &c__1);
-    eps = slamch_("Epsilon");
+    imax = idamax_(n, &z__[1], &c__1);
+    jmax = idamax_(n, &d__[1], &c__1);
+    eps = EPSILON;
 /* Computing MAX */
-    r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs(
-	    r__2));
-    tol = eps * 8.f * dmax(r__3,r__4);
+    d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2))
+	    ;
+    tol = eps * 8. * max(d__3,d__4);
 
 /*
        If the rank-1 modifier is small enough, no more needs to be done
@@ -73413,19 +9638,19 @@ L20:
        elements in D.
 */
 
-    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
+    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
 	*k = 0;
 	iq2 = 1;
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__ = indx[j];
-	    scopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
+	    dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
 	    dlamda[j] = d__[i__];
 	    iq2 += *n;
 /* L40: */
 	}
-	slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
-	scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
+	dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
+	dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
 	goto L190;
     }
 
@@ -73454,7 +9679,7 @@ L20:
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
 	nj = indx[j];
-	if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {
+	if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
 
 /*           Deflate due to small z component. */
 
@@ -73476,7 +9701,7 @@ L80:
     if (j > *n) {
 	goto L100;
     }
-    if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {
+    if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
 
 /*        Deflate due to small z component. */
 
@@ -73495,32 +9720,32 @@ L80:
           destructive underflow.
 */
 
-	tau = slapy2_(&c__, &s);
+	tau = dlapy2_(&c__, &s);
 	t = d__[nj] - d__[pj];
 	c__ /= tau;
 	s = -s / tau;
-	if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
+	if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
 
 /*           Deflation is possible. */
 
 	    z__[nj] = tau;
-	    z__[pj] = 0.f;
+	    z__[pj] = 0.;
 	    if (coltyp[nj] != coltyp[pj]) {
 		coltyp[nj] = 2;
 	    }
 	    coltyp[pj] = 4;
-	    srot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
+	    drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
 		    c__, &s);
 /* Computing 2nd power */
-	    r__1 = c__;
+	    d__1 = c__;
 /* Computing 2nd power */
-	    r__2 = s;
-	    t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
+	    d__2 = s;
+	    t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
 /* Computing 2nd power */
-	    r__1 = s;
+	    d__1 = s;
 /* Computing 2nd power */
-	    r__2 = c__;
-	    d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
+	    d__2 = c__;
+	    d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
 	    d__[pj] = t;
 	    --k2;
 	    i__ = 1;
@@ -73611,7 +9836,7 @@ L100:
     i__1 = ctot[0];
     for (j = 1; j <= i__1; ++j) {
 	js = indx[i__];
-	scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
+	dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
 	z__[i__] = d__[js];
 	++i__;
 	iq1 += *n1;
@@ -73621,8 +9846,8 @@ L100:
     i__1 = ctot[1];
     for (j = 1; j <= i__1; ++j) {
 	js = indx[i__];
-	scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
-	scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
+	dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
+	dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
 	z__[i__] = d__[js];
 	++i__;
 	iq1 += *n1;
@@ -73633,7 +9858,7 @@ L100:
     i__1 = ctot[2];
     for (j = 1; j <= i__1; ++j) {
 	js = indx[i__];
-	scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
+	dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
 	z__[i__] = d__[js];
 	++i__;
 	iq2 += n2;
@@ -73644,7 +9869,7 @@ L100:
     i__1 = ctot[3];
     for (j = 1; j <= i__1; ++j) {
 	js = indx[i__];
-	scopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
+	dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
 	iq2 += *n;
 	z__[i__] = d__[js];
 	++i__;
@@ -73656,11 +9881,11 @@ L100:
        into the last N - K slots of D and Q respectively.
 */
 
-    slacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
+    dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
     i__1 = *n - *k;
-    scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
+    dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
 
-/*     Copy CTOT into COLTYP for referencing in SLAED3. */
+/*     Copy CTOT into COLTYP for referencing in DLAED3. */
 
     for (j = 1; j <= 4; ++j) {
 	coltyp[j] = ctot[j - 1];
@@ -73670,35 +9895,37 @@ L100:
 L190:
     return 0;
 
-/*     End of SLAED2 */
+/*     End of DLAED2 */
 
-} /* slaed2_ */
+} /* dlaed2_ */
 
-/* Subroutine */ int slaed3_(integer *k, integer *n, integer *n1, real *d__,
-	real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer *
-	indx, integer *ctot, real *w, real *s, integer *info)
+/* Subroutine */ int dlaed3_(integer *k, integer *n, integer *n1, doublereal *
+	d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda,
+	 doublereal *q2, integer *indx, integer *ctot, doublereal *w,
+	doublereal *s, integer *info)
 {
     /* System generated locals */
     integer q_dim1, q_offset, i__1, i__2;
-    real r__1;
+    doublereal d__1;
 
     /* Builtin functions */
-    double sqrt(doublereal), r_sign(real *, real *);
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
 
     /* Local variables */
     static integer i__, j, n2, n12, ii, n23, iq2;
-    static real temp;
-    extern doublereal snrm2_(integer *, real *, integer *);
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *), scopy_(integer *, real *,
-	    integer *, real *, integer *), slaed4_(integer *, integer *, real
-	    *, real *, real *, real *, real *, integer *);
-    extern doublereal slamc3_(real *, real *);
-    extern /* Subroutine */ int xerbla_(char *, integer *), slacpy_(
-	    char *, integer *, integer *, real *, integer *, real *, integer *
-	    ), slaset_(char *, integer *, integer *, real *, real *,
-	    real *, integer *);
+    static doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *),
+	     dcopy_(integer *, doublereal *, integer *, doublereal *, integer
+	    *), dlaed4_(integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *),
+	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *), xerbla_(char *, integer *);
 
 
 /*
@@ -73711,9 +9938,9 @@ L190:
     Purpose
     =======
 
-    SLAED3 finds the roots of the secular equation, as defined by the
+    DLAED3 finds the roots of the secular equation, as defined by the
     values in D, W, and RHO, between 1 and K.  It makes the
-    appropriate calls to SLAED4 and then updates the eigenvectors by
+    appropriate calls to DLAED4 and then updates the eigenvectors by
     multiplying the matrix of eigenvectors of the pair of eigensystems
     being combined by the matrix of eigenvectors of the K-by-K system
     which is solved here.
@@ -73730,7 +9957,7 @@ L190:
 
     K       (input) INTEGER
             The number of terms in the rational function to be solved by
-            SLAED4.  K >= 0.
+            DLAED4.  K >= 0.
 
     N       (input) INTEGER
             The number of rows and columns in the Q matrix.
@@ -73740,11 +9967,11 @@ L190:
             The location of the last eigenvalue in the leading submatrix.
             min(1,N) <= N1 <= N/2.
 
-    D       (output) REAL array, dimension (N)
+    D       (output) DOUBLE PRECISION array, dimension (N)
             D(I) contains the updated eigenvalues for
             1 <= I <= K.
 
-    Q       (output) REAL array, dimension (LDQ,N)
+    Q       (output) DOUBLE PRECISION array, dimension (LDQ,N)
             Initially the first K columns are used as workspace.
             On output the columns 1 to K contain
             the updated eigenvectors.
@@ -73752,25 +9979,25 @@ L190:
     LDQ     (input) INTEGER
             The leading dimension of the array Q.  LDQ >= max(1,N).
 
-    RHO     (input) REAL
+    RHO     (input) DOUBLE PRECISION
             The value of the parameter in the rank one update equation.
             RHO >= 0 required.
 
-    DLAMDA  (input/output) REAL array, dimension (K)
+    DLAMDA  (input/output) DOUBLE PRECISION array, dimension (K)
             The first K elements of this array contain the old roots
             of the deflated updating problem.  These are the poles
             of the secular equation. May be changed on output by
             having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
             Cray-2, or Cray C-90, as described above.
 
-    Q2      (input) REAL array, dimension (LDQ2, N)
+    Q2      (input) DOUBLE PRECISION array, dimension (LDQ2, N)
             The first K columns of this matrix contain the non-deflated
             eigenvectors for the split problem.
 
     INDX    (input) INTEGER array, dimension (N)
             The permutation used to arrange the columns of the deflated
-            Q matrix into three groups (see SLAED2).
-            The rows of the eigenvectors found by SLAED4 must be likewise
+            Q matrix into three groups (see DLAED2).
+            The rows of the eigenvectors found by DLAED4 must be likewise
             permuted before the matrix multiply can take place.
 
     CTOT    (input) INTEGER array, dimension (4)
@@ -73778,12 +10005,12 @@ L190:
             in Q, as described in INDX.  The fourth column type is any
             column which has been deflated.
 
-    W       (input/output) REAL array, dimension (K)
+    W       (input/output) DOUBLE PRECISION array, dimension (K)
             The first K elements of this array contain the components
             of the deflation-adjusted updating vector. Destroyed on
             output.
 
-    S       (workspace) REAL array, dimension (N1 + 1)*K
+    S       (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K
             Will contain the eigenvectors of the repaired matrix which
             will be multiplied by the previously accumulated eigenvectors
             to update the system.
@@ -73834,7 +10061,7 @@ L190:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLAED3", &i__1);
+	xerbla_("DLAED3", &i__1);
 	return 0;
     }
 
@@ -73865,13 +10092,13 @@ L190:
 
     i__1 = *k;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
+	dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
 /* L10: */
     }
 
     i__1 = *k;
     for (j = 1; j <= i__1; ++j) {
-	slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
+	dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
 		info);
 
 /*        If the zero finder fails, the computation is terminated. */
@@ -73901,12 +10128,12 @@ L190:
 
 /*     Compute updated W. */
 
-    scopy_(k, &w[1], &c__1, &s[1], &c__1);
+    dcopy_(k, &w[1], &c__1, &s[1], &c__1);
 
 /*     Initialize W(I) = Q(I,I) */
 
     i__1 = *ldq + 1;
-    scopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
+    dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
     i__1 = *k;
     for (j = 1; j <= i__1; ++j) {
 	i__2 = j - 1;
@@ -73923,8 +10150,8 @@ L190:
     }
     i__1 = *k;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	r__1 = sqrt(-w[i__]);
-	w[i__] = r_sign(&r__1, &s[i__]);
+	d__1 = sqrt(-w[i__]);
+	w[i__] = d_sign(&d__1, &s[i__]);
 /* L70: */
     }
 
@@ -73937,7 +10164,7 @@ L190:
 	    s[i__] = w[i__] / q[i__ + j * q_dim1];
 /* L80: */
 	}
-	temp = snrm2_(k, &s[1], &c__1);
+	temp = dnrm2_(k, &s[1], &c__1);
 	i__2 = *k;
 	for (i__ = 1; i__ <= i__2; ++i__) {
 	    ii = indx[i__];
@@ -73955,62 +10182,64 @@ L110:
     n12 = ctot[1] + ctot[2];
     n23 = ctot[2] + ctot[3];
 
-    slacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23);
+    dlacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23);
     iq2 = *n1 * n12 + 1;
     if (n23 != 0) {
-	sgemm_("N", "N", &n2, k, &n23, &c_b1011, &q2[iq2], &n2, &s[1], &n23, &
-		c_b320, &q[*n1 + 1 + q_dim1], ldq);
+	dgemm_("N", "N", &n2, k, &n23, &c_b15, &q2[iq2], &n2, &s[1], &n23, &
+		c_b29, &q[*n1 + 1 + q_dim1], ldq);
     } else {
-	slaset_("A", &n2, k, &c_b320, &c_b320, &q[*n1 + 1 + q_dim1], ldq);
+	dlaset_("A", &n2, k, &c_b29, &c_b29, &q[*n1 + 1 + q_dim1], ldq);
     }
 
-    slacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12);
+    dlacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12);
     if (n12 != 0) {
-	sgemm_("N", "N", n1, k, &n12, &c_b1011, &q2[1], n1, &s[1], &n12, &
-		c_b320, &q[q_offset], ldq);
+	dgemm_("N", "N", n1, k, &n12, &c_b15, &q2[1], n1, &s[1], &n12, &c_b29,
+		 &q[q_offset], ldq);
     } else {
-	slaset_("A", n1, k, &c_b320, &c_b320, &q[q_dim1 + 1], ldq);
+	dlaset_("A", n1, k, &c_b29, &c_b29, &q[q_dim1 + 1], ldq);
     }
 
 
 L120:
     return 0;
 
-/*     End of SLAED3 */
+/*     End of DLAED3 */
 
-} /* slaed3_ */
+} /* dlaed3_ */
 
-/* Subroutine */ int slaed4_(integer *n, integer *i__, real *d__, real *z__,
-	real *delta, real *rho, real *dlam, integer *info)
+/* Subroutine */ int dlaed4_(integer *n, integer *i__, doublereal *d__,
+	doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam,
+	 integer *info)
 {
     /* System generated locals */
     integer i__1;
-    real r__1;
+    doublereal d__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static real a, b, c__;
+    static doublereal a, b, c__;
     static integer j;
-    static real w;
+    static doublereal w;
     static integer ii;
-    static real dw, zz[3];
+    static doublereal dw, zz[3];
     static integer ip1;
-    static real del, eta, phi, eps, tau, psi;
+    static doublereal del, eta, phi, eps, tau, psi;
     static integer iim1, iip1;
-    static real dphi, dpsi;
+    static doublereal dphi, dpsi;
     static integer iter;
-    static real temp, prew, temp1, dltlb, dltub, midpt;
+    static doublereal temp, prew, temp1, dltlb, dltub, midpt;
     static integer niter;
     static logical swtch;
-    extern /* Subroutine */ int slaed5_(integer *, real *, real *, real *,
-	    real *, real *), slaed6_(integer *, logical *, real *, real *,
-	    real *, real *, real *, integer *);
+    extern /* Subroutine */ int dlaed5_(integer *, doublereal *, doublereal *,
+	     doublereal *, doublereal *, doublereal *), dlaed6_(integer *,
+	    logical *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *);
     static logical swtch3;
-    extern doublereal slamch_(char *);
+
     static logical orgati;
-    static real erretm, rhoinv;
+    static doublereal erretm, rhoinv;
 
 
 /*
@@ -74048,23 +10277,23 @@ L120:
     I      (input) INTEGER
            The index of the eigenvalue to be computed.  1 <= I <= N.
 
-    D      (input) REAL array, dimension (N)
+    D      (input) DOUBLE PRECISION array, dimension (N)
            The original eigenvalues.  It is assumed that they are in
            order, D(I) < D(J)  for I < J.
 
-    Z      (input) REAL array, dimension (N)
+    Z      (input) DOUBLE PRECISION array, dimension (N)
            The components of the updating vector.
 
-    DELTA  (output) REAL array, dimension (N)
+    DELTA  (output) DOUBLE PRECISION array, dimension (N)
            If N .ne. 1, DELTA contains (D(j) - lambda_I) in its  j-th
            component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
            contains the information necessary to construct the
            eigenvectors.
 
-    RHO    (input) REAL
+    RHO    (input) DOUBLE PRECISION
            The scalar in the symmetric updating formula.
 
-    DLAM   (output) REAL
+    DLAM   (output) DOUBLE PRECISION
            The computed lambda_I, the I-th updated eigenvalue.
 
     INFO   (output) INTEGER
@@ -74114,18 +10343,18 @@ L120:
 /*         Presumably, I=1 upon entry */
 
 	*dlam = d__[1] + *rho * z__[1] * z__[1];
-	delta[1] = 1.f;
+	delta[1] = 1.;
 	return 0;
     }
     if (*n == 2) {
-	slaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
+	dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
 	return 0;
     }
 
 /*     Compute machine epsilon */
 
-    eps = slamch_("Epsilon");
-    rhoinv = 1.f / *rho;
+    eps = EPSILON;
+    rhoinv = 1. / *rho;
 
 /*     The case I = N */
 
@@ -74138,7 +10367,7 @@ L120:
 
 /*        Calculate initial guess */
 
-	midpt = *rho / 2.f;
+	midpt = *rho / 2.;
 
 /*
           If ||Z||_2 is not one, then TEMP should be set to
@@ -74151,7 +10380,7 @@ L120:
 /* L10: */
 	}
 
-	psi = 0.f;
+	psi = 0.;
 	i__1 = *n - 2;
 	for (j = 1; j <= i__1; ++j) {
 	    psi += z__[j] * z__[j] / delta[j];
@@ -74162,7 +10391,7 @@ L120:
 	w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*
 		n];
 
-	if (w <= 0.f) {
+	if (w <= 0.) {
 	    temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho)
 		    + z__[*n] * z__[*n] / *rho;
 	    if (c__ <= temp) {
@@ -74172,10 +10401,10 @@ L120:
 		a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]
 			;
 		b = z__[*n] * z__[*n] * del;
-		if (a < 0.f) {
-		    tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
+		if (a < 0.) {
+		    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
 		} else {
-		    tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
+		    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
 		}
 	    }
 
@@ -74190,10 +10419,10 @@ L120:
 	    del = d__[*n] - d__[*n - 1];
 	    a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
 	    b = z__[*n] * z__[*n] * del;
-	    if (a < 0.f) {
-		tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
+	    if (a < 0.) {
+		tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
 	    } else {
-		tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
+		tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
 	    }
 
 /*
@@ -74201,7 +10430,7 @@ L120:
                  D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
 */
 
-	    dltlb = 0.f;
+	    dltlb = 0.;
 	    dltub = midpt;
 	}
 
@@ -74213,9 +10442,9 @@ L120:
 
 /*        Evaluate PSI and the derivative DPSI */
 
-	dpsi = 0.f;
-	psi = 0.f;
-	erretm = 0.f;
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
 	i__1 = ii;
 	for (j = 1; j <= i__1; ++j) {
 	    temp = z__[j] / delta[j];
@@ -74224,29 +10453,29 @@ L120:
 	    erretm += psi;
 /* L40: */
 	}
-	erretm = dabs(erretm);
+	erretm = abs(erretm);
 
 /*        Evaluate PHI and the derivative DPHI */
 
 	temp = z__[*n] / delta[*n];
 	phi = z__[*n] * temp;
 	dphi = temp * temp;
-	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
-		dpsi + dphi);
+	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
+		+ dphi);
 
 	w = rhoinv + phi + psi;
 
 /*        Test for convergence */
 
-	if (dabs(w) <= eps * erretm) {
+	if (abs(w) <= eps * erretm) {
 	    *dlam = d__[*i__] + tau;
 	    goto L250;
 	}
 
-	if (w <= 0.f) {
-	    dltlb = dmax(dltlb,tau);
+	if (w <= 0.) {
+	    dltlb = max(dltlb,tau);
 	} else {
-	    dltub = dmin(dltub,tau);
+	    dltub = min(dltub,tau);
 	}
 
 /*        Calculate the new step */
@@ -74256,21 +10485,21 @@ L120:
 	a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (
 		dpsi + dphi);
 	b = delta[*n - 1] * delta[*n] * w;
-	if (c__ < 0.f) {
-	    c__ = dabs(c__);
+	if (c__ < 0.) {
+	    c__ = abs(c__);
 	}
-	if (c__ == 0.f) {
+	if (c__ == 0.) {
 /*
             ETA = B/A
              ETA = RHO - TAU
 */
 	    eta = dltub - tau;
-	} else if (a >= 0.f) {
-	    eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
-		    c__ * 2.f);
+	} else if (a >= 0.) {
+	    eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
+		    * 2.);
 	} else {
-	    eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
-		    r__1))));
+	    eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
+		    );
 	}
 
 /*
@@ -74281,15 +10510,15 @@ L120:
           will guarantee eta*w < 0.
 */
 
-	if (w * eta > 0.f) {
+	if (w * eta > 0.) {
 	    eta = -w / (dpsi + dphi);
 	}
 	temp = tau + eta;
-	if ((temp > dltub) || (temp < dltlb)) {
-	    if (w < 0.f) {
-		eta = (dltub - tau) / 2.f;
+	if (temp > dltub || temp < dltlb) {
+	    if (w < 0.) {
+		eta = (dltub - tau) / 2.;
 	    } else {
-		eta = (dltlb - tau) / 2.f;
+		eta = (dltlb - tau) / 2.;
 	    }
 	}
 	i__1 = *n;
@@ -74302,9 +10531,9 @@ L120:
 
 /*        Evaluate PSI and the derivative DPSI */
 
-	dpsi = 0.f;
-	psi = 0.f;
-	erretm = 0.f;
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
 	i__1 = ii;
 	for (j = 1; j <= i__1; ++j) {
 	    temp = z__[j] / delta[j];
@@ -74313,15 +10542,15 @@ L120:
 	    erretm += psi;
 /* L60: */
 	}
-	erretm = dabs(erretm);
+	erretm = abs(erretm);
 
 /*        Evaluate PHI and the derivative DPHI */
 
 	temp = z__[*n] / delta[*n];
 	phi = z__[*n] * temp;
 	dphi = temp * temp;
-	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
-		dpsi + dphi);
+	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
+		+ dphi);
 
 	w = rhoinv + phi + psi;
 
@@ -74333,15 +10562,15 @@ L120:
 
 /*           Test for convergence */
 
-	    if (dabs(w) <= eps * erretm) {
+	    if (abs(w) <= eps * erretm) {
 		*dlam = d__[*i__] + tau;
 		goto L250;
 	    }
 
-	    if (w <= 0.f) {
-		dltlb = dmax(dltlb,tau);
+	    if (w <= 0.) {
+		dltlb = max(dltlb,tau);
 	    } else {
-		dltub = dmin(dltub,tau);
+		dltub = min(dltub,tau);
 	    }
 
 /*           Calculate the new step */
@@ -74350,12 +10579,12 @@ L120:
 	    a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] *
 		    (dpsi + dphi);
 	    b = delta[*n - 1] * delta[*n] * w;
-	    if (a >= 0.f) {
-		eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
-			 (c__ * 2.f);
+	    if (a >= 0.) {
+		eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
 	    } else {
-		eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
-			r__1))));
+		eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
 	    }
 
 /*
@@ -74366,15 +10595,15 @@ L120:
              will guarantee eta*w < 0.
 */
 
-	    if (w * eta > 0.f) {
+	    if (w * eta > 0.) {
 		eta = -w / (dpsi + dphi);
 	    }
 	    temp = tau + eta;
-	    if ((temp > dltub) || (temp < dltlb)) {
-		if (w < 0.f) {
-		    eta = (dltub - tau) / 2.f;
+	    if (temp > dltub || temp < dltlb) {
+		if (w < 0.) {
+		    eta = (dltub - tau) / 2.;
 		} else {
-		    eta = (dltlb - tau) / 2.f;
+		    eta = (dltlb - tau) / 2.;
 		}
 	    }
 	    i__1 = *n;
@@ -74387,9 +10616,9 @@ L120:
 
 /*           Evaluate PSI and the derivative DPSI */
 
-	    dpsi = 0.f;
-	    psi = 0.f;
-	    erretm = 0.f;
+	    dpsi = 0.;
+	    psi = 0.;
+	    erretm = 0.;
 	    i__1 = ii;
 	    for (j = 1; j <= i__1; ++j) {
 		temp = z__[j] / delta[j];
@@ -74398,15 +10627,15 @@ L120:
 		erretm += psi;
 /* L80: */
 	    }
-	    erretm = dabs(erretm);
+	    erretm = abs(erretm);
 
 /*           Evaluate PHI and the derivative DPHI */
 
 	    temp = z__[*n] / delta[*n];
 	    phi = z__[*n] * temp;
 	    dphi = temp * temp;
-	    erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) *
-		    (dpsi + dphi);
+	    erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
+		    dpsi + dphi);
 
 	    w = rhoinv + phi + psi;
 /* L90: */
@@ -74430,21 +10659,21 @@ L120:
 /*        Calculate initial guess */
 
 	del = d__[ip1] - d__[*i__];
-	midpt = del / 2.f;
+	midpt = del / 2.;
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    delta[j] = d__[j] - d__[*i__] - midpt;
 /* L100: */
 	}
 
-	psi = 0.f;
+	psi = 0.;
 	i__1 = *i__ - 1;
 	for (j = 1; j <= i__1; ++j) {
 	    psi += z__[j] * z__[j] / delta[j];
 /* L110: */
 	}
 
-	phi = 0.f;
+	phi = 0.;
 	i__1 = *i__ + 2;
 	for (j = *n; j >= i__1; --j) {
 	    phi += z__[j] * z__[j] / delta[j];
@@ -74454,7 +10683,7 @@ L120:
 	w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] /
 		delta[ip1];
 
-	if (w > 0.f) {
+	if (w > 0.) {
 
 /*
              d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
@@ -74465,14 +10694,14 @@ L120:
 	    orgati = TRUE_;
 	    a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
 	    b = z__[*i__] * z__[*i__] * del;
-	    if (a > 0.f) {
-		tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
-			r__1))));
+	    if (a > 0.) {
+		tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
 	    } else {
-		tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
-			 (c__ * 2.f);
+		tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
 	    }
-	    dltlb = 0.f;
+	    dltlb = 0.;
 	    dltub = midpt;
 	} else {
 
@@ -74485,15 +10714,15 @@ L120:
 	    orgati = FALSE_;
 	    a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
 	    b = z__[ip1] * z__[ip1] * del;
-	    if (a < 0.f) {
-		tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs(
-			r__1))));
+	    if (a < 0.) {
+		tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
+			d__1))));
 	    } else {
-		tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1))))
-			/ (c__ * 2.f);
+		tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
+			(c__ * 2.);
 	    }
 	    dltlb = -midpt;
-	    dltub = 0.f;
+	    dltub = 0.;
 	}
 
 	if (orgati) {
@@ -74519,9 +10748,9 @@ L120:
 
 /*        Evaluate PSI and the derivative DPSI */
 
-	dpsi = 0.f;
-	psi = 0.f;
-	erretm = 0.f;
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
 	i__1 = iim1;
 	for (j = 1; j <= i__1; ++j) {
 	    temp = z__[j] / delta[j];
@@ -74530,12 +10759,12 @@ L120:
 	    erretm += psi;
 /* L150: */
 	}
-	erretm = dabs(erretm);
+	erretm = abs(erretm);
 
 /*        Evaluate PHI and the derivative DPHI */
 
-	dphi = 0.f;
-	phi = 0.f;
+	dphi = 0.;
+	phi = 0.;
 	i__1 = iip1;
 	for (j = *n; j >= i__1; --j) {
 	    temp = z__[j] / delta[j];
@@ -74554,15 +10783,15 @@ L120:
 
 	swtch3 = FALSE_;
 	if (orgati) {
-	    if (w < 0.f) {
+	    if (w < 0.) {
 		swtch3 = TRUE_;
 	    }
 	} else {
-	    if (w > 0.f) {
+	    if (w > 0.) {
 		swtch3 = TRUE_;
 	    }
 	}
-	if ((ii == 1) || (ii == *n)) {
+	if (ii == 1 || ii == *n) {
 	    swtch3 = FALSE_;
 	}
 
@@ -74570,12 +10799,12 @@ L120:
 	dw = dpsi + dphi + temp * temp;
 	temp = z__[ii] * temp;
 	w += temp;
-	erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
-		+ dabs(tau) * dw;
+	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
+		abs(tau) * dw;
 
 /*        Test for convergence */
 
-	if (dabs(w) <= eps * erretm) {
+	if (abs(w) <= eps * erretm) {
 	    if (orgati) {
 		*dlam = d__[*i__] + tau;
 	    } else {
@@ -74584,10 +10813,10 @@ L120:
 	    goto L250;
 	}
 
-	if (w <= 0.f) {
-	    dltlb = dmax(dltlb,tau);
+	if (w <= 0.) {
+	    dltlb = max(dltlb,tau);
 	} else {
-	    dltub = dmin(dltub,tau);
+	    dltub = min(dltub,tau);
 	}
 
 /*        Calculate the new step */
@@ -74596,20 +10825,20 @@ L120:
 	if (! swtch3) {
 	    if (orgati) {
 /* Computing 2nd power */
-		r__1 = z__[*i__] / delta[*i__];
-		c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (r__1 *
-			r__1);
+		d__1 = z__[*i__] / delta[*i__];
+		c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 *
+			d__1);
 	    } else {
 /* Computing 2nd power */
-		r__1 = z__[ip1] / delta[ip1];
-		c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (r__1 *
-			r__1);
+		d__1 = z__[ip1] / delta[ip1];
+		c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 *
+			d__1);
 	    }
 	    a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] *
 		    dw;
 	    b = delta[*i__] * delta[ip1] * w;
-	    if (c__ == 0.f) {
-		if (a == 0.f) {
+	    if (c__ == 0.) {
+		if (a == 0.) {
 		    if (orgati) {
 			a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] *
 				(dpsi + dphi);
@@ -74619,12 +10848,12 @@ L120:
 		    }
 		}
 		eta = b / a;
-	    } else if (a <= 0.f) {
-		eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
-			 (c__ * 2.f);
+	    } else if (a <= 0.) {
+		eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
 	    } else {
-		eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
-			r__1))));
+		eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
 	    }
 	} else {
 
@@ -74647,7 +10876,7 @@ L120:
 		zz[2] = z__[iip1] * z__[iip1];
 	    }
 	    zz[1] = z__[ii] * z__[ii];
-	    slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
+	    dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
 	    if (*info != 0) {
 		goto L250;
 	    }
@@ -74661,15 +10890,15 @@ L120:
           will guarantee eta*w < 0.
 */
 
-	if (w * eta >= 0.f) {
+	if (w * eta >= 0.) {
 	    eta = -w / dw;
 	}
 	temp = tau + eta;
-	if ((temp > dltub) || (temp < dltlb)) {
-	    if (w < 0.f) {
-		eta = (dltub - tau) / 2.f;
+	if (temp > dltub || temp < dltlb) {
+	    if (w < 0.) {
+		eta = (dltub - tau) / 2.;
 	    } else {
-		eta = (dltlb - tau) / 2.f;
+		eta = (dltlb - tau) / 2.;
 	    }
 	}
 
@@ -74684,9 +10913,9 @@ L120:
 
 /*        Evaluate PSI and the derivative DPSI */
 
-	dpsi = 0.f;
-	psi = 0.f;
-	erretm = 0.f;
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
 	i__1 = iim1;
 	for (j = 1; j <= i__1; ++j) {
 	    temp = z__[j] / delta[j];
@@ -74695,12 +10924,12 @@ L120:
 	    erretm += psi;
 /* L190: */
 	}
-	erretm = dabs(erretm);
+	erretm = abs(erretm);
 
 /*        Evaluate PHI and the derivative DPHI */
 
-	dphi = 0.f;
-	phi = 0.f;
+	dphi = 0.;
+	phi = 0.;
 	i__1 = iip1;
 	for (j = *n; j >= i__1; --j) {
 	    temp = z__[j] / delta[j];
@@ -74714,16 +10943,16 @@ L120:
 	dw = dpsi + dphi + temp * temp;
 	temp = z__[ii] * temp;
 	w = rhoinv + phi + psi + temp;
-	erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
-		+ (r__1 = tau + eta, dabs(r__1)) * dw;
+	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + (
+		d__1 = tau + eta, abs(d__1)) * dw;
 
 	swtch = FALSE_;
 	if (orgati) {
-	    if (-w > dabs(prew) / 10.f) {
+	    if (-w > abs(prew) / 10.) {
 		swtch = TRUE_;
 	    }
 	} else {
-	    if (w > dabs(prew) / 10.f) {
+	    if (w > abs(prew) / 10.) {
 		swtch = TRUE_;
 	    }
 	}
@@ -74738,7 +10967,7 @@ L120:
 
 /*           Test for convergence */
 
-	    if (dabs(w) <= eps * erretm) {
+	    if (abs(w) <= eps * erretm) {
 		if (orgati) {
 		    *dlam = d__[*i__] + tau;
 		} else {
@@ -74747,10 +10976,10 @@ L120:
 		goto L250;
 	    }
 
-	    if (w <= 0.f) {
-		dltlb = dmax(dltlb,tau);
+	    if (w <= 0.) {
+		dltlb = max(dltlb,tau);
 	    } else {
-		dltub = dmin(dltub,tau);
+		dltub = min(dltub,tau);
 	    }
 
 /*           Calculate the new step */
@@ -74759,14 +10988,14 @@ L120:
 		if (! swtch) {
 		    if (orgati) {
 /* Computing 2nd power */
-			r__1 = z__[*i__] / delta[*i__];
+			d__1 = z__[*i__] / delta[*i__];
 			c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (
-				r__1 * r__1);
+				d__1 * d__1);
 		    } else {
 /* Computing 2nd power */
-			r__1 = z__[ip1] / delta[ip1];
+			d__1 = z__[ip1] / delta[ip1];
 			c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) *
-				(r__1 * r__1);
+				(d__1 * d__1);
 		    }
 		} else {
 		    temp = z__[ii] / delta[ii];
@@ -74780,8 +11009,8 @@ L120:
 		a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1]
 			* dw;
 		b = delta[*i__] * delta[ip1] * w;
-		if (c__ == 0.f) {
-		    if (a == 0.f) {
+		if (c__ == 0.) {
+		    if (a == 0.) {
 			if (! swtch) {
 			    if (orgati) {
 				a = z__[*i__] * z__[*i__] + delta[ip1] *
@@ -74789,1464 +11018,766 @@ L120:
 			    } else {
 				a = z__[ip1] * z__[ip1] + delta[*i__] * delta[
 					*i__] * (dpsi + dphi);
-			    }
-			} else {
-			    a = delta[*i__] * delta[*i__] * dpsi + delta[ip1]
-				    * delta[ip1] * dphi;
-			}
-		    }
-		    eta = b / a;
-		} else if (a <= 0.f) {
-		    eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1))
-			    )) / (c__ * 2.f);
-		} else {
-		    eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__,
-			    dabs(r__1))));
-		}
-	    } else {
-
-/*              Interpolation using THREE most relevant poles */
-
-		temp = rhoinv + psi + phi;
-		if (swtch) {
-		    c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
-		    zz[0] = delta[iim1] * delta[iim1] * dpsi;
-		    zz[2] = delta[iip1] * delta[iip1] * dphi;
-		} else {
-		    if (orgati) {
-			temp1 = z__[iim1] / delta[iim1];
-			temp1 *= temp1;
-			c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1]
-				- d__[iip1]) * temp1;
-			zz[0] = z__[iim1] * z__[iim1];
-			zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 +
-				dphi);
-		    } else {
-			temp1 = z__[iip1] / delta[iip1];
-			temp1 *= temp1;
-			c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1]
-				- d__[iim1]) * temp1;
-			zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi -
-				temp1));
-			zz[2] = z__[iip1] * z__[iip1];
-		    }
-		}
-		slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta,
-			info);
-		if (*info != 0) {
-		    goto L250;
-		}
-	    }
-
-/*
-             Note, eta should be positive if w is negative, and
-             eta should be negative otherwise. However,
-             if for some reason caused by roundoff, eta*w > 0,
-             we simply use one Newton step instead. This way
-             will guarantee eta*w < 0.
-*/
-
-	    if (w * eta >= 0.f) {
-		eta = -w / dw;
-	    }
-	    temp = tau + eta;
-	    if ((temp > dltub) || (temp < dltlb)) {
-		if (w < 0.f) {
-		    eta = (dltub - tau) / 2.f;
-		} else {
-		    eta = (dltlb - tau) / 2.f;
-		}
-	    }
-
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		delta[j] -= eta;
-/* L210: */
-	    }
-
-	    tau += eta;
-	    prew = w;
-
-/*           Evaluate PSI and the derivative DPSI */
-
-	    dpsi = 0.f;
-	    psi = 0.f;
-	    erretm = 0.f;
-	    i__1 = iim1;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = z__[j] / delta[j];
-		psi += z__[j] * temp;
-		dpsi += temp * temp;
-		erretm += psi;
-/* L220: */
-	    }
-	    erretm = dabs(erretm);
-
-/*           Evaluate PHI and the derivative DPHI */
-
-	    dphi = 0.f;
-	    phi = 0.f;
-	    i__1 = iip1;
-	    for (j = *n; j >= i__1; --j) {
-		temp = z__[j] / delta[j];
-		phi += z__[j] * temp;
-		dphi += temp * temp;
-		erretm += phi;
-/* L230: */
-	    }
-
-	    temp = z__[ii] / delta[ii];
-	    dw = dpsi + dphi + temp * temp;
-	    temp = z__[ii] * temp;
-	    w = rhoinv + phi + psi + temp;
-	    erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) *
-		    3.f + dabs(tau) * dw;
-	    if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) {
-		swtch = ! swtch;
-	    }
-
-/* L240: */
-	}
-
-/*        Return with INFO = 1, NITER = MAXIT and not converged */
-
-	*info = 1;
-	if (orgati) {
-	    *dlam = d__[*i__] + tau;
-	} else {
-	    *dlam = d__[ip1] + tau;
-	}
-
-    }
-
-L250:
-
-    return 0;
-
-/*     End of SLAED4 */
-
-} /* slaed4_ */
-
-/* Subroutine */ int slaed5_(integer *i__, real *d__, real *z__, real *delta,
-	real *rho, real *dlam)
-{
-    /* System generated locals */
-    real r__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static real b, c__, w, del, tau, temp;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    This subroutine computes the I-th eigenvalue of a symmetric rank-one
-    modification of a 2-by-2 diagonal matrix
-
-               diag( D )  +  RHO *  Z * transpose(Z) .
-
-    The diagonal elements in the array D are assumed to satisfy
-
-               D(i) < D(j)  for  i < j .
-
-    We also assume RHO > 0 and that the Euclidean norm of the vector
-    Z is one.
-
-    Arguments
-    =========
-
-    I      (input) INTEGER
-           The index of the eigenvalue to be computed.  I = 1 or I = 2.
-
-    D      (input) REAL array, dimension (2)
-           The original eigenvalues.  We assume D(1) < D(2).
-
-    Z      (input) REAL array, dimension (2)
-           The components of the updating vector.
-
-    DELTA  (output) REAL array, dimension (2)
-           The vector DELTA contains the information necessary
-           to construct the eigenvectors.
-
-    RHO    (input) REAL
-           The scalar in the symmetric updating formula.
-
-    DLAM   (output) REAL
-           The computed lambda_I, the I-th updated eigenvalue.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ren-Cang Li, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --delta;
-    --z__;
-    --d__;
-
-    /* Function Body */
-    del = d__[2] - d__[1];
-    if (*i__ == 1) {
-	w = *rho * 2.f * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.f;
-	if (w > 0.f) {
-	    b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
-	    c__ = *rho * z__[1] * z__[1] * del;
-
-/*           B > ZERO, always */
-
-	    tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1))
-		    ));
-	    *dlam = d__[1] + tau;
-	    delta[1] = -z__[1] / tau;
-	    delta[2] = z__[2] / (del - tau);
-	} else {
-	    b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
-	    c__ = *rho * z__[2] * z__[2] * del;
-	    if (b > 0.f) {
-		tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f));
-	    } else {
-		tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f;
-	    }
-	    *dlam = d__[2] + tau;
-	    delta[1] = -z__[1] / (del + tau);
-	    delta[2] = -z__[2] / tau;
-	}
-	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
-	delta[1] /= temp;
-	delta[2] /= temp;
-    } else {
-
-/*     Now I=2 */
-
-	b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
-	c__ = *rho * z__[2] * z__[2] * del;
-	if (b > 0.f) {
-	    tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f;
-	} else {
-	    tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f));
-	}
-	*dlam = d__[2] + tau;
-	delta[1] = -z__[1] / (del + tau);
-	delta[2] = -z__[2] / tau;
-	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
-	delta[1] /= temp;
-	delta[2] /= temp;
-    }
-    return 0;
-
-/*     End OF SLAED5 */
-
-} /* slaed5_ */
-
-/* Subroutine */ int slaed6_(integer *kniter, logical *orgati, real *rho,
-	real *d__, real *z__, real *finit, real *tau, integer *info)
-{
-    /* Initialized data */
-
-    static logical first = TRUE_;
-
-    /* System generated locals */
-    integer i__1;
-    real r__1, r__2, r__3, r__4;
-
-    /* Builtin functions */
-    double sqrt(doublereal), log(doublereal), pow_ri(real *, integer *);
-
-    /* Local variables */
-    static real a, b, c__, f;
-    static integer i__;
-    static real fc, df, ddf, eta, eps, base;
-    static integer iter;
-    static real temp, temp1, temp2, temp3, temp4;
-    static logical scale;
-    static integer niter;
-    static real small1, small2, sminv1, sminv2, dscale[3], sclfac;
-    extern doublereal slamch_(char *);
-    static real zscale[3], erretm, sclinv;
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    SLAED6 computes the positive or negative root (closest to the origin)
-    of
-                     z(1)        z(2)        z(3)
-    f(x) =   rho + --------- + ---------- + ---------
-                    d(1)-x      d(2)-x      d(3)-x
-
-    It is assumed that
-
-          if ORGATI = .true. the root is between d(2) and d(3);
-          otherwise it is between d(1) and d(2)
-
-    This routine will be called by SLAED4 when necessary. In most cases,
-    the root sought is the smallest in magnitude, though it might not be
-    in some extremely rare situations.
-
-    Arguments
-    =========
-
-    KNITER       (input) INTEGER
-                 Refer to SLAED4 for its significance.
-
-    ORGATI       (input) LOGICAL
-                 If ORGATI is true, the needed root is between d(2) and
-                 d(3); otherwise it is between d(1) and d(2).  See
-                 SLAED4 for further details.
-
-    RHO          (input) REAL
-                 Refer to the equation f(x) above.
-
-    D            (input) REAL array, dimension (3)
-                 D satisfies d(1) < d(2) < d(3).
-
-    Z            (input) REAL array, dimension (3)
-                 Each of the elements in z must be positive.
-
-    FINIT        (input) REAL
-                 The value of f at 0. It is more accurate than the one
-                 evaluated inside this routine (if someone wants to do
-                 so).
-
-    TAU          (output) REAL
-                 The root of the equation f(x).
-
-    INFO         (output) INTEGER
-                 = 0: successful exit
-                 > 0: if INFO = 1, failure to converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ren-Cang Li, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-*/
-
-    /* Parameter adjustments */
-    --z__;
-    --d__;
-
-    /* Function Body */
-
-    *info = 0;
-
-    niter = 1;
-    *tau = 0.f;
-    if (*kniter == 2) {
-	if (*orgati) {
-	    temp = (d__[3] - d__[2]) / 2.f;
-	    c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
-	    a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
-	    b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
-	} else {
-	    temp = (d__[1] - d__[2]) / 2.f;
-	    c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
-	    a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
-	    b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
-	}
-/* Computing MAX */
-	r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs(
-		c__);
-	temp = dmax(r__1,r__2);
-	a /= temp;
-	b /= temp;
-	c__ /= temp;
-	if (c__ == 0.f) {
-	    *tau = b / a;
-	} else if (a <= 0.f) {
-	    *tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
-		    c__ * 2.f);
-	} else {
-	    *tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
-		    r__1))));
-	}
-	temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) +
-		z__[3] / (d__[3] - *tau);
-	if (dabs(*finit) <= dabs(temp)) {
-	    *tau = 0.f;
-	}
-    }
+			    }
+			} else {
+			    a = delta[*i__] * delta[*i__] * dpsi + delta[ip1]
+				    * delta[ip1] * dphi;
+			}
+		    }
+		    eta = b / a;
+		} else if (a <= 0.) {
+		    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
+			     / (c__ * 2.);
+		} else {
+		    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
+			    abs(d__1))));
+		}
+	    } else {
 
-/*
-       On first call to routine, get machine parameters for
-       possible scaling to avoid overflow
-*/
+/*              Interpolation using THREE most relevant poles */
 
-    if (first) {
-	eps = slamch_("Epsilon");
-	base = slamch_("Base");
-	i__1 = (integer) (log(slamch_("SafMin")) / log(base) / 3.f)
-		;
-	small1 = pow_ri(&base, &i__1);
-	sminv1 = 1.f / small1;
-	small2 = small1 * small1;
-	sminv2 = sminv1 * sminv1;
-	first = FALSE_;
-    }
+		temp = rhoinv + psi + phi;
+		if (swtch) {
+		    c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
+		    zz[0] = delta[iim1] * delta[iim1] * dpsi;
+		    zz[2] = delta[iip1] * delta[iip1] * dphi;
+		} else {
+		    if (orgati) {
+			temp1 = z__[iim1] / delta[iim1];
+			temp1 *= temp1;
+			c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1]
+				- d__[iip1]) * temp1;
+			zz[0] = z__[iim1] * z__[iim1];
+			zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 +
+				dphi);
+		    } else {
+			temp1 = z__[iip1] / delta[iip1];
+			temp1 *= temp1;
+			c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1]
+				- d__[iim1]) * temp1;
+			zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi -
+				temp1));
+			zz[2] = z__[iip1] * z__[iip1];
+		    }
+		}
+		dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta,
+			info);
+		if (*info != 0) {
+		    goto L250;
+		}
+	    }
 
 /*
-       Determine if scaling of inputs necessary to avoid overflow
-       when computing 1/TEMP**3
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
 */
 
-    if (*orgati) {
-/* Computing MIN */
-	r__3 = (r__1 = d__[2] - *tau, dabs(r__1)), r__4 = (r__2 = d__[3] - *
-		tau, dabs(r__2));
-	temp = dmin(r__3,r__4);
-    } else {
-/* Computing MIN */
-	r__3 = (r__1 = d__[1] - *tau, dabs(r__1)), r__4 = (r__2 = d__[2] - *
-		tau, dabs(r__2));
-	temp = dmin(r__3,r__4);
-    }
-    scale = FALSE_;
-    if (temp <= small1) {
-	scale = TRUE_;
-	if (temp <= small2) {
+	    if (w * eta >= 0.) {
+		eta = -w / dw;
+	    }
+	    temp = tau + eta;
+	    if (temp > dltub || temp < dltlb) {
+		if (w < 0.) {
+		    eta = (dltub - tau) / 2.;
+		} else {
+		    eta = (dltlb - tau) / 2.;
+		}
+	    }
 
-/*        Scale up by power of radix nearest 1/SAFMIN**(2/3) */
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] -= eta;
+/* L210: */
+	    }
 
-	    sclfac = sminv2;
-	    sclinv = small2;
-	} else {
+	    tau += eta;
+	    prew = w;
 
-/*        Scale up by power of radix nearest 1/SAFMIN**(1/3) */
+/*           Evaluate PSI and the derivative DPSI */
 
-	    sclfac = sminv1;
-	    sclinv = small1;
-	}
+	    dpsi = 0.;
+	    psi = 0.;
+	    erretm = 0.;
+	    i__1 = iim1;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / delta[j];
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L220: */
+	    }
+	    erretm = abs(erretm);
 
-/*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
+/*           Evaluate PHI and the derivative DPHI */
 
-	for (i__ = 1; i__ <= 3; ++i__) {
-	    dscale[i__ - 1] = d__[i__] * sclfac;
-	    zscale[i__ - 1] = z__[i__] * sclfac;
-/* L10: */
-	}
-	*tau *= sclfac;
-    } else {
+	    dphi = 0.;
+	    phi = 0.;
+	    i__1 = iip1;
+	    for (j = *n; j >= i__1; --j) {
+		temp = z__[j] / delta[j];
+		phi += z__[j] * temp;
+		dphi += temp * temp;
+		erretm += phi;
+/* L230: */
+	    }
 
-/*        Copy D and Z to DSCALE and ZSCALE */
+	    temp = z__[ii] / delta[ii];
+	    dw = dpsi + dphi + temp * temp;
+	    temp = z__[ii] * temp;
+	    w = rhoinv + phi + psi + temp;
+	    erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
+		    + abs(tau) * dw;
+	    if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
+		swtch = ! swtch;
+	    }
 
-	for (i__ = 1; i__ <= 3; ++i__) {
-	    dscale[i__ - 1] = d__[i__];
-	    zscale[i__ - 1] = z__[i__];
-/* L20: */
+/* L240: */
 	}
-    }
-
-    fc = 0.f;
-    df = 0.f;
-    ddf = 0.f;
-    for (i__ = 1; i__ <= 3; ++i__) {
-	temp = 1.f / (dscale[i__ - 1] - *tau);
-	temp1 = zscale[i__ - 1] * temp;
-	temp2 = temp1 * temp;
-	temp3 = temp2 * temp;
-	fc += temp1 / dscale[i__ - 1];
-	df += temp2;
-	ddf += temp3;
-/* L30: */
-    }
-    f = *finit + *tau * fc;
-
-    if (dabs(f) <= 0.f) {
-	goto L60;
-    }
-
-/*
-          Iteration begins
-
-       It is not hard to see that
-
-             1) Iterations will go up monotonically
-                if FINIT < 0;
-
-             2) Iterations will go down monotonically
-                if FINIT > 0.
-*/
-
-    iter = niter + 1;
-
-    for (niter = iter; niter <= 20; ++niter) {
 
-	if (*orgati) {
-	    temp1 = dscale[1] - *tau;
-	    temp2 = dscale[2] - *tau;
-	} else {
-	    temp1 = dscale[0] - *tau;
-	    temp2 = dscale[1] - *tau;
-	}
-	a = (temp1 + temp2) * f - temp1 * temp2 * df;
-	b = temp1 * temp2 * f;
-	c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
-/* Computing MAX */
-	r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs(
-		c__);
-	temp = dmax(r__1,r__2);
-	a /= temp;
-	b /= temp;
-	c__ /= temp;
-	if (c__ == 0.f) {
-	    eta = b / a;
-	} else if (a <= 0.f) {
-	    eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
-		    c__ * 2.f);
-	} else {
-	    eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
-		    r__1))));
-	}
-	if (f * eta >= 0.f) {
-	    eta = -f / df;
-	}
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
 
-	temp = eta + *tau;
-	if (*orgati) {
-	    if (eta > 0.f && temp >= dscale[2]) {
-		eta = (dscale[2] - *tau) / 2.f;
-	    }
-	    if (eta < 0.f && temp <= dscale[1]) {
-		eta = (dscale[1] - *tau) / 2.f;
-	    }
+	*info = 1;
+	if (orgati) {
+	    *dlam = d__[*i__] + tau;
 	} else {
-	    if (eta > 0.f && temp >= dscale[1]) {
-		eta = (dscale[1] - *tau) / 2.f;
-	    }
-	    if (eta < 0.f && temp <= dscale[0]) {
-		eta = (dscale[0] - *tau) / 2.f;
-	    }
+	    *dlam = d__[ip1] + tau;
 	}
-	*tau += eta;
 
-	fc = 0.f;
-	erretm = 0.f;
-	df = 0.f;
-	ddf = 0.f;
-	for (i__ = 1; i__ <= 3; ++i__) {
-	    temp = 1.f / (dscale[i__ - 1] - *tau);
-	    temp1 = zscale[i__ - 1] * temp;
-	    temp2 = temp1 * temp;
-	    temp3 = temp2 * temp;
-	    temp4 = temp1 / dscale[i__ - 1];
-	    fc += temp4;
-	    erretm += dabs(temp4);
-	    df += temp2;
-	    ddf += temp3;
-/* L40: */
-	}
-	f = *finit + *tau * fc;
-	erretm = (dabs(*finit) + dabs(*tau) * erretm) * 8.f + dabs(*tau) * df;
-	if (dabs(f) <= eps * erretm) {
-	    goto L60;
-	}
-/* L50: */
     }
-    *info = 1;
-L60:
 
-/*     Undo scaling */
+L250:
 
-    if (scale) {
-	*tau *= sclinv;
-    }
     return 0;
 
-/*     End of SLAED6 */
+/*     End of DLAED4 */
 
-} /* slaed6_ */
+} /* dlaed4_ */
 
-/* Subroutine */ int slaed7_(integer *icompq, integer *n, integer *qsiz,
-	integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q,
-	integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *
-	qstore, integer *qptr, integer *prmptr, integer *perm, integer *
-	givptr, integer *givcol, real *givnum, real *work, integer *iwork,
-	integer *info)
+/* Subroutine */ int dlaed5_(integer *i__, doublereal *d__, doublereal *z__,
+	doublereal *delta, doublereal *rho, doublereal *dlam)
 {
     /* System generated locals */
-    integer q_dim1, q_offset, i__1, i__2;
+    doublereal d__1;
 
     /* Builtin functions */
-    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
 
     /* Local variables */
-    static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr,
-	    indxc;
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
-    static integer indxp;
-    extern /* Subroutine */ int slaed8_(integer *, integer *, integer *,
-	    integer *, real *, real *, integer *, integer *, real *, integer *
-	    , real *, real *, real *, integer *, real *, integer *, integer *,
-	     integer *, real *, integer *, integer *, integer *), slaed9_(
-	    integer *, integer *, integer *, integer *, real *, real *,
-	    integer *, real *, real *, real *, real *, integer *, integer *),
-	    slaeda_(integer *, integer *, integer *, integer *, integer *,
-	    integer *, integer *, integer *, real *, real *, integer *, real *
-	    , real *, integer *);
-    static integer idlmda;
-    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
-	    integer *, integer *, real *, integer *, integer *, integer *);
-    static integer coltyp;
+    static doublereal b, c__, w, del, tau, temp;
 
 
 /*
     -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
        September 30, 1994
 
 
     Purpose
     =======
 
-    SLAED7 computes the updated eigensystem of a diagonal
-    matrix after modification by a rank-one symmetric matrix. This
-    routine is used only for the eigenproblem which requires all
-    eigenvalues and optionally eigenvectors of a dense symmetric matrix
-    that has been reduced to tridiagonal form.  SLAED1 handles
-    the case in which all eigenvalues and eigenvectors of a symmetric
-    tridiagonal matrix are desired.
-
-      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
-
-       where Z = Q'u, u is a vector of length N with ones in the
-       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+    This subroutine computes the I-th eigenvalue of a symmetric rank-one
+    modification of a 2-by-2 diagonal matrix
 
-       The eigenvectors of the original matrix are stored in Q, and the
-       eigenvalues are in D.  The algorithm consists of three stages:
+               diag( D )  +  RHO *  Z * transpose(Z) .
 
-          The first stage consists of deflating the size of the problem
-          when there are multiple eigenvalues or if there is a zero in
-          the Z vector.  For each such occurence the dimension of the
-          secular equation problem is reduced by one.  This stage is
-          performed by the routine SLAED8.
+    The diagonal elements in the array D are assumed to satisfy
 
-          The second stage consists of calculating the updated
-          eigenvalues. This is done by finding the roots of the secular
-          equation via the routine SLAED4 (as called by SLAED9).
-          This routine also calculates the eigenvectors of the current
-          problem.
+               D(i) < D(j)  for  i < j .
 
-          The final stage consists of computing the updated eigenvectors
-          directly using the updated eigenvalues.  The eigenvectors for
-          the current problem are multiplied with the eigenvectors from
-          the overall problem.
+    We also assume RHO > 0 and that the Euclidean norm of the vector
+    Z is one.
 
     Arguments
     =========
 
-    ICOMPQ  (input) INTEGER
-            = 0:  Compute eigenvalues only.
-            = 1:  Compute eigenvectors of original dense symmetric matrix
-                  also.  On entry, Q contains the orthogonal matrix used
-                  to reduce the original matrix to tridiagonal form.
-
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    QSIZ   (input) INTEGER
-           The dimension of the orthogonal matrix used to reduce
-           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
-
-    TLVLS  (input) INTEGER
-           The total number of merging levels in the overall divide and
-           conquer tree.
-
-    CURLVL (input) INTEGER
-           The current level in the overall merge routine,
-           0 <= CURLVL <= TLVLS.
-
-    CURPBM (input) INTEGER
-           The current problem in the current level in the overall
-           merge routine (counting from upper left to lower right).
-
-    D      (input/output) REAL array, dimension (N)
-           On entry, the eigenvalues of the rank-1-perturbed matrix.
-           On exit, the eigenvalues of the repaired matrix.
-
-    Q      (input/output) REAL array, dimension (LDQ, N)
-           On entry, the eigenvectors of the rank-1-perturbed matrix.
-           On exit, the eigenvectors of the repaired tridiagonal matrix.
-
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max(1,N).
-
-    INDXQ  (output) INTEGER array, dimension (N)
-           The permutation which will reintegrate the subproblem just
-           solved back into sorted order, i.e., D( INDXQ( I = 1, N ) )
-           will be in ascending order.
-
-    RHO    (input) REAL
-           The subdiagonal element used to create the rank-1
-           modification.
-
-    CUTPNT (input) INTEGER
-           Contains the location of the last eigenvalue in the leading
-           sub-matrix.  min(1,N) <= CUTPNT <= N.
-
-    QSTORE (input/output) REAL array, dimension (N**2+1)
-           Stores eigenvectors of submatrices encountered during
-           divide and conquer, packed together. QPTR points to
-           beginning of the submatrices.
-
-    QPTR   (input/output) INTEGER array, dimension (N+2)
-           List of indices pointing to beginning of submatrices stored
-           in QSTORE. The submatrices are numbered starting at the
-           bottom left of the divide and conquer tree, from left to
-           right and bottom to top.
-
-    PRMPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in PERM a
-           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
-           indicates the size of the permutation and also the size of
-           the full, non-deflated problem.
-
-    PERM   (input) INTEGER array, dimension (N lg N)
-           Contains the permutations (from deflation and sorting) to be
-           applied to each eigenblock.
-
-    GIVPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in GIVCOL a
-           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
-           indicates the number of Givens rotations.
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  I = 1 or I = 2.
 
-    GIVCOL (input) INTEGER array, dimension (2, N lg N)
-           Each pair of numbers indicates a pair of columns to take place
-           in a Givens rotation.
+    D      (input) DOUBLE PRECISION array, dimension (2)
+           The original eigenvalues.  We assume D(1) < D(2).
 
-    GIVNUM (input) REAL array, dimension (2, N lg N)
-           Each number indicates the S value to be used in the
-           corresponding Givens rotation.
+    Z      (input) DOUBLE PRECISION array, dimension (2)
+           The components of the updating vector.
 
-    WORK   (workspace) REAL array, dimension (3*N+QSIZ*N)
+    DELTA  (output) DOUBLE PRECISION array, dimension (2)
+           The vector DELTA contains the information necessary
+           to construct the eigenvectors.
 
-    IWORK  (workspace) INTEGER array, dimension (4*N)
+    RHO    (input) DOUBLE PRECISION
+           The scalar in the symmetric updating formula.
 
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an eigenvalue did not converge
+    DLAM   (output) DOUBLE PRECISION
+           The computed lambda_I, the I-th updated eigenvalue.
 
     Further Details
     ===============
 
     Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
+       Ren-Cang Li, Computer Science Division, University of California
        at Berkeley, USA
 
     =====================================================================
-
-
-       Test the input parameters.
 */
 
+
     /* Parameter adjustments */
+    --delta;
+    --z__;
     --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --indxq;
-    --qstore;
-    --qptr;
-    --prmptr;
-    --perm;
-    --givptr;
-    givcol -= 3;
-    givnum -= 3;
-    --work;
-    --iwork;
 
     /* Function Body */
-    *info = 0;
-
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*icompq == 1 && *qsiz < *n) {
-	*info = -4;
-    } else if (*ldq < max(1,*n)) {
-	*info = -9;
-    } else if ((min(1,*n) > *cutpnt) || (*n < *cutpnt)) {
-	*info = -12;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("SLAED7", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*
-       The following values are for bookkeeping purposes only.  They are
-       integer pointers which indicate the portion of the workspace
-       used by a particular array in SLAED8 and SLAED9.
-*/
-
-    if (*icompq == 1) {
-	ldq2 = *qsiz;
-    } else {
-	ldq2 = *n;
-    }
-
-    iz = 1;
-    idlmda = iz + *n;
-    iw = idlmda + *n;
-    iq2 = iw + *n;
-    is = iq2 + *n * ldq2;
-
-    indx = 1;
-    indxc = indx + *n;
-    coltyp = indxc + *n;
-    indxp = coltyp + *n;
-
-/*
-       Form the z-vector which consists of the last row of Q_1 and the
-       first row of Q_2.
-*/
-
-    ptr = pow_ii(&c__2, tlvls) + 1;
-    i__1 = *curlvl - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = *tlvls - i__;
-	ptr += pow_ii(&c__2, &i__2);
-/* L10: */
-    }
-    curr = ptr + *curpbm;
-    slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
-	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz
-	    + *n], info);
-
-/*
-       When solving the final problem, we no longer need the stored data,
-       so we will overwrite the data from this level onto the previously
-       used storage space.
-*/
-
-    if (*curlvl == *tlvls) {
-	qptr[curr] = 1;
-	prmptr[curr] = 1;
-	givptr[curr] = 1;
-    }
-
-/*     Sort and Deflate eigenvalues. */
-
-    slaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho,
-	    cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], &
-	    perm[prmptr[curr]], &givptr[curr + 1], &givcol[((givptr[curr]) <<
-	    (1)) + 1], &givnum[((givptr[curr]) << (1)) + 1], &iwork[indxp], &
-	    iwork[indx], info);
-    prmptr[curr + 1] = prmptr[curr] + *n;
-    givptr[curr + 1] += givptr[curr];
+    del = d__[2] - d__[1];
+    if (*i__ == 1) {
+	w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.;
+	if (w > 0.) {
+	    b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[1] * z__[1] * del;
 
-/*     Solve Secular Equation. */
+/*           B > ZERO, always */
 
-    if (k != 0) {
-	slaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda],
-		&work[iw], &qstore[qptr[curr]], &k, info);
-	if (*info != 0) {
-	    goto L30;
-	}
-	if (*icompq == 1) {
-	    sgemm_("N", "N", qsiz, &k, &k, &c_b1011, &work[iq2], &ldq2, &
-		    qstore[qptr[curr]], &k, &c_b320, &q[q_offset], ldq);
+	    tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
+	    *dlam = d__[1] + tau;
+	    delta[1] = -z__[1] / tau;
+	    delta[2] = z__[2] / (del - tau);
+	} else {
+	    b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[2] * z__[2] * del;
+	    if (b > 0.) {
+		tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
+	    } else {
+		tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
+	    }
+	    *dlam = d__[2] + tau;
+	    delta[1] = -z__[1] / (del + tau);
+	    delta[2] = -z__[2] / tau;
 	}
-/* Computing 2nd power */
-	i__1 = k;
-	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
-
-/*     Prepare the INDXQ sorting permutation. */
-
-	n1 = k;
-	n2 = *n - k;
-	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
-    } else {
-	qptr[curr + 1] = qptr[curr];
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    indxq[i__] = i__;
-/* L20: */
+	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
+	delta[1] /= temp;
+	delta[2] /= temp;
+    } else {
+
+/*     Now I=2 */
+
+	b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	c__ = *rho * z__[2] * z__[2] * del;
+	if (b > 0.) {
+	    tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
+	} else {
+	    tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
 	}
+	*dlam = d__[2] + tau;
+	delta[1] = -z__[1] / (del + tau);
+	delta[2] = -z__[2] / tau;
+	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
+	delta[1] /= temp;
+	delta[2] /= temp;
     }
-
-L30:
     return 0;
 
-/*     End of SLAED7 */
+/*     End OF DLAED5 */
 
-} /* slaed7_ */
+} /* dlaed5_ */
 
-/* Subroutine */ int slaed8_(integer *icompq, integer *k, integer *n, integer
-	*qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho,
-	integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2,
-	real *w, integer *perm, integer *givptr, integer *givcol, real *
-	givnum, integer *indxp, integer *indx, integer *info)
+/* Subroutine */ int dlaed6_(integer *kniter, logical *orgati, doublereal *
+	rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal *
+	tau, integer *info)
 {
+    /* Initialized data */
+
+    static logical first = TRUE_;
+
     /* System generated locals */
-    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
-    real r__1;
+    integer i__1;
+    doublereal d__1, d__2, d__3, d__4;
 
     /* Builtin functions */
-    double sqrt(doublereal);
+    double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *);
 
     /* Local variables */
-    static real c__;
-    static integer i__, j;
-    static real s, t;
-    static integer k2, n1, n2, jp, n1p1;
-    static real eps, tau, tol;
-    static integer jlam, imax, jmax;
-    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
-	    integer *, real *, real *), sscal_(integer *, real *, real *,
-	    integer *), scopy_(integer *, real *, integer *, real *, integer *
-	    );
-    extern doublereal slapy2_(real *, real *), slamch_(char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer isamax_(integer *, real *, integer *);
-    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
-	    *, integer *, integer *), slacpy_(char *, integer *, integer *,
-	    real *, integer *, real *, integer *);
+    static doublereal a, b, c__, f;
+    static integer i__;
+    static doublereal fc, df, ddf, eta, eps, base;
+    static integer iter;
+    static doublereal temp, temp1, temp2, temp3, temp4;
+    static logical scale;
+    static integer niter;
+    static doublereal small1, small2, sminv1, sminv2;
+
+    static doublereal dscale[3], sclfac, zscale[3], erretm, sclinv;
 
 
 /*
     -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
        Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
+       June 30, 1999
 
 
     Purpose
     =======
 
-    SLAED8 merges the two sets of eigenvalues together into a single
-    sorted set.  Then it tries to deflate the size of the problem.
-    There are two ways in which deflation can occur:  when two or more
-    eigenvalues are close together or if there is a tiny element in the
-    Z vector.  For each such occurrence the order of the related secular
-    equation problem is reduced by one.
-
-    Arguments
-    =========
-
-    ICOMPQ  (input) INTEGER
-            = 0:  Compute eigenvalues only.
-            = 1:  Compute eigenvectors of original dense symmetric matrix
-                  also.  On entry, Q contains the orthogonal matrix used
-                  to reduce the original matrix to tridiagonal form.
-
-    K      (output) INTEGER
-           The number of non-deflated eigenvalues, and the order of the
-           related secular equation.
-
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    QSIZ   (input) INTEGER
-           The dimension of the orthogonal matrix used to reduce
-           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
-
-    D      (input/output) REAL array, dimension (N)
-           On entry, the eigenvalues of the two submatrices to be
-           combined.  On exit, the trailing (N-K) updated eigenvalues
-           (those which were deflated) sorted into increasing order.
-
-    Q      (input/output) REAL array, dimension (LDQ,N)
-           If ICOMPQ = 0, Q is not referenced.  Otherwise,
-           on entry, Q contains the eigenvectors of the partially solved
-           system which has been previously updated in matrix
-           multiplies with other partially solved eigensystems.
-           On exit, Q contains the trailing (N-K) updated eigenvectors
-           (those which were deflated) in its last N-K columns.
-
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max(1,N).
-
-    INDXQ  (input) INTEGER array, dimension (N)
-           The permutation which separately sorts the two sub-problems
-           in D into ascending order.  Note that elements in the second
-           half of this permutation must first have CUTPNT added to
-           their values in order to be accurate.
-
-    RHO    (input/output) REAL
-           On entry, the off-diagonal element associated with the rank-1
-           cut which originally split the two submatrices which are now
-           being recombined.
-           On exit, RHO has been modified to the value required by
-           SLAED3.
-
-    CUTPNT (input) INTEGER
-           The location of the last eigenvalue in the leading
-           sub-matrix.  min(1,N) <= CUTPNT <= N.
+    DLAED6 computes the positive or negative root (closest to the origin)
+    of
+                     z(1)        z(2)        z(3)
+    f(x) =   rho + --------- + ---------- + ---------
+                    d(1)-x      d(2)-x      d(3)-x
 
-    Z      (input) REAL array, dimension (N)
-           On entry, Z contains the updating vector (the last row of
-           the first sub-eigenvector matrix and the first row of the
-           second sub-eigenvector matrix).
-           On exit, the contents of Z are destroyed by the updating
-           process.
+    It is assumed that
 
-    DLAMDA (output) REAL array, dimension (N)
-           A copy of the first K eigenvalues which will be used by
-           SLAED3 to form the secular equation.
+          if ORGATI = .true. the root is between d(2) and d(3);
+          otherwise it is between d(1) and d(2)
 
-    Q2     (output) REAL array, dimension (LDQ2,N)
-           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
-           a copy of the first K eigenvectors which will be used by
-           SLAED7 in a matrix multiply (SGEMM) to update the new
-           eigenvectors.
+    This routine will be called by DLAED4 when necessary. In most cases,
+    the root sought is the smallest in magnitude, though it might not be
+    in some extremely rare situations.
 
-    LDQ2   (input) INTEGER
-           The leading dimension of the array Q2.  LDQ2 >= max(1,N).
+    Arguments
+    =========
 
-    W      (output) REAL array, dimension (N)
-           The first k values of the final deflation-altered z-vector and
-           will be passed to SLAED3.
+    KNITER       (input) INTEGER
+                 Refer to DLAED4 for its significance.
 
-    PERM   (output) INTEGER array, dimension (N)
-           The permutations (from deflation and sorting) to be applied
-           to each eigenblock.
+    ORGATI       (input) LOGICAL
+                 If ORGATI is true, the needed root is between d(2) and
+                 d(3); otherwise it is between d(1) and d(2).  See
+                 DLAED4 for further details.
 
-    GIVPTR (output) INTEGER
-           The number of Givens rotations which took place in this
-           subproblem.
+    RHO          (input) DOUBLE PRECISION
+                 Refer to the equation f(x) above.
 
-    GIVCOL (output) INTEGER array, dimension (2, N)
-           Each pair of numbers indicates a pair of columns to take place
-           in a Givens rotation.
+    D            (input) DOUBLE PRECISION array, dimension (3)
+                 D satisfies d(1) < d(2) < d(3).
 
-    GIVNUM (output) REAL array, dimension (2, N)
-           Each number indicates the S value to be used in the
-           corresponding Givens rotation.
+    Z            (input) DOUBLE PRECISION array, dimension (3)
+                 Each of the elements in z must be positive.
 
-    INDXP  (workspace) INTEGER array, dimension (N)
-           The permutation used to place deflated values of D at the end
-           of the array.  INDXP(1:K) points to the nondeflated D-values
-           and INDXP(K+1:N) points to the deflated eigenvalues.
+    FINIT        (input) DOUBLE PRECISION
+                 The value of f at 0. It is more accurate than the one
+                 evaluated inside this routine (if someone wants to do
+                 so).
 
-    INDX   (workspace) INTEGER array, dimension (N)
-           The permutation used to sort the contents of D into ascending
-           order.
+    TAU          (output) DOUBLE PRECISION
+                 The root of the equation f(x).
 
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+    INFO         (output) INTEGER
+                 = 0: successful exit
+                 > 0: if INFO = 1, failure to converge
 
     Further Details
     ===============
 
     Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
+       Ren-Cang Li, Computer Science Division, University of California
        at Berkeley, USA
 
     =====================================================================
-
-
-       Test the input parameters.
 */
 
     /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --indxq;
     --z__;
-    --dlamda;
-    q2_dim1 = *ldq2;
-    q2_offset = 1 + q2_dim1;
-    q2 -= q2_offset;
-    --w;
-    --perm;
-    givcol -= 3;
-    givnum -= 3;
-    --indxp;
-    --indx;
+    --d__;
 
     /* Function Body */
-    *info = 0;
 
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*icompq == 1 && *qsiz < *n) {
-	*info = -4;
-    } else if (*ldq < max(1,*n)) {
-	*info = -7;
-    } else if ((*cutpnt < min(1,*n)) || (*cutpnt > *n)) {
-	*info = -10;
-    } else if (*ldq2 < max(1,*n)) {
-	*info = -14;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("SLAED8", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    n1 = *cutpnt;
-    n2 = *n - n1;
-    n1p1 = n1 + 1;
+    *info = 0;
 
-    if (*rho < 0.f) {
-	sscal_(&n2, &c_b1290, &z__[n1p1], &c__1);
+    niter = 1;
+    *tau = 0.;
+    if (*kniter == 2) {
+	if (*orgati) {
+	    temp = (d__[3] - d__[2]) / 2.;
+	    c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
+	    a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
+	    b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
+	} else {
+	    temp = (d__[1] - d__[2]) / 2.;
+	    c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
+	    a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
+	    b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
+	}
+/* Computing MAX */
+	d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
+	temp = max(d__1,d__2);
+	a /= temp;
+	b /= temp;
+	c__ /= temp;
+	if (c__ == 0.) {
+	    *tau = b / a;
+	} else if (a <= 0.) {
+	    *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+		    c__ * 2.);
+	} else {
+	    *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))
+		    ));
+	}
+	temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) +
+		z__[3] / (d__[3] - *tau);
+	if (abs(*finit) <= abs(temp)) {
+	    *tau = 0.;
+	}
     }
 
-/*     Normalize z so that norm(z) = 1 */
+/*
+       On first call to routine, get machine parameters for
+       possible scaling to avoid overflow
+*/
 
-    t = 1.f / sqrt(2.f);
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	indx[j] = j;
-/* L10: */
+    if (first) {
+	eps = EPSILON;
+	base = BASE;
+	i__1 = (integer) (log(SAFEMINIMUM) / log(base) / 3.);
+	small1 = pow_di(&base, &i__1);
+	sminv1 = 1. / small1;
+	small2 = small1 * small1;
+	sminv2 = sminv1 * sminv1;
+	first = FALSE_;
     }
-    sscal_(n, &t, &z__[1], &c__1);
-    *rho = (r__1 = *rho * 2.f, dabs(r__1));
 
-/*     Sort the eigenvalues into increasing order */
+/*
+       Determine if scaling of inputs necessary to avoid overflow
+       when computing 1/TEMP**3
+*/
 
-    i__1 = *n;
-    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
-	indxq[i__] += *cutpnt;
-/* L20: */
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dlamda[i__] = d__[indxq[i__]];
-	w[i__] = z__[indxq[i__]];
-/* L30: */
-    }
-    i__ = 1;
-    j = *cutpnt + 1;
-    slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	d__[i__] = dlamda[indx[i__]];
-	z__[i__] = w[indx[i__]];
-/* L40: */
+    if (*orgati) {
+/* Computing MIN */
+	d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - *
+		tau, abs(d__2));
+	temp = min(d__3,d__4);
+    } else {
+/* Computing MIN */
+	d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - *
+		tau, abs(d__2));
+	temp = min(d__3,d__4);
     }
+    scale = FALSE_;
+    if (temp <= small1) {
+	scale = TRUE_;
+	if (temp <= small2) {
 
-/*     Calculate the allowable deflation tolerence */
+/*        Scale up by power of radix nearest 1/SAFMIN**(2/3) */
 
-    imax = isamax_(n, &z__[1], &c__1);
-    jmax = isamax_(n, &d__[1], &c__1);
-    eps = slamch_("Epsilon");
-    tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1));
+	    sclfac = sminv2;
+	    sclinv = small2;
+	} else {
 
-/*
-       If the rank-1 modifier is small enough, no more needs to be done
-       except to reorganize Q so that its columns correspond with the
-       elements in D.
-*/
+/*        Scale up by power of radix nearest 1/SAFMIN**(1/3) */
 
-    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
-	*k = 0;
-	if (*icompq == 0) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		perm[j] = indxq[indx[j]];
-/* L50: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		perm[j] = indxq[indx[j]];
-		scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1
-			+ 1], &c__1);
-/* L60: */
-	    }
-	    slacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
+	    sclfac = sminv1;
+	    sclinv = small1;
 	}
-	return 0;
-    }
 
-/*
-       If there are multiple eigenvalues then the problem deflates.  Here
-       the number of equal eigenvalues are found.  As each equal
-       eigenvalue is found, an elementary reflector is computed to rotate
-       the corresponding eigensubspace so that the corresponding
-       components of Z are zero in this new basis.
-*/
+/*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
 
-    *k = 0;
-    *givptr = 0;
-    k2 = *n + 1;
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    dscale[i__ - 1] = d__[i__] * sclfac;
+	    zscale[i__ - 1] = z__[i__] * sclfac;
+/* L10: */
+	}
+	*tau *= sclfac;
+    } else {
 
-/*           Deflate due to small z component. */
+/*        Copy D and Z to DSCALE and ZSCALE */
 
-	    --k2;
-	    indxp[k2] = j;
-	    if (j == *n) {
-		goto L110;
-	    }
-	} else {
-	    jlam = j;
-	    goto L80;
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    dscale[i__ - 1] = d__[i__];
+	    zscale[i__ - 1] = z__[i__];
+/* L20: */
 	}
-/* L70: */
     }
-L80:
-    ++j;
-    if (j > *n) {
-	goto L100;
+
+    fc = 0.;
+    df = 0.;
+    ddf = 0.;
+    for (i__ = 1; i__ <= 3; ++i__) {
+	temp = 1. / (dscale[i__ - 1] - *tau);
+	temp1 = zscale[i__ - 1] * temp;
+	temp2 = temp1 * temp;
+	temp3 = temp2 * temp;
+	fc += temp1 / dscale[i__ - 1];
+	df += temp2;
+	ddf += temp3;
+/* L30: */
     }
-    if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
+    f = *finit + *tau * fc;
 
-/*        Deflate due to small z component. */
+    if (abs(f) <= 0.) {
+	goto L60;
+    }
 
-	--k2;
-	indxp[k2] = j;
-    } else {
+/*
+          Iteration begins
 
-/*        Check if eigenvalues are close enough to allow deflation. */
+       It is not hard to see that
 
-	s = z__[jlam];
-	c__ = z__[j];
+             1) Iterations will go up monotonically
+                if FINIT < 0;
 
-/*
-          Find sqrt(a**2+b**2) without overflow or
-          destructive underflow.
+             2) Iterations will go down monotonically
+                if FINIT > 0.
 */
 
-	tau = slapy2_(&c__, &s);
-	t = d__[j] - d__[jlam];
-	c__ /= tau;
-	s = -s / tau;
-	if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
-
-/*           Deflation is possible. */
+    iter = niter + 1;
 
-	    z__[j] = tau;
-	    z__[jlam] = 0.f;
+    for (niter = iter; niter <= 20; ++niter) {
 
-/*           Record the appropriate Givens rotation */
+	if (*orgati) {
+	    temp1 = dscale[1] - *tau;
+	    temp2 = dscale[2] - *tau;
+	} else {
+	    temp1 = dscale[0] - *tau;
+	    temp2 = dscale[1] - *tau;
+	}
+	a = (temp1 + temp2) * f - temp1 * temp2 * df;
+	b = temp1 * temp2 * f;
+	c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
+/* Computing MAX */
+	d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
+	temp = max(d__1,d__2);
+	a /= temp;
+	b /= temp;
+	c__ /= temp;
+	if (c__ == 0.) {
+	    eta = b / a;
+	} else if (a <= 0.) {
+	    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
+		    * 2.);
+	} else {
+	    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
+		    );
+	}
+	if (f * eta >= 0.) {
+	    eta = -f / df;
+	}
 
-	    ++(*givptr);
-	    givcol[((*givptr) << (1)) + 1] = indxq[indx[jlam]];
-	    givcol[((*givptr) << (1)) + 2] = indxq[indx[j]];
-	    givnum[((*givptr) << (1)) + 1] = c__;
-	    givnum[((*givptr) << (1)) + 2] = s;
-	    if (*icompq == 1) {
-		srot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[
-			indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
+	temp = eta + *tau;
+	if (*orgati) {
+	    if (eta > 0. && temp >= dscale[2]) {
+		eta = (dscale[2] - *tau) / 2.;
 	    }
-	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
-	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
-	    d__[jlam] = t;
-	    --k2;
-	    i__ = 1;
-L90:
-	    if (k2 + i__ <= *n) {
-		if (d__[jlam] < d__[indxp[k2 + i__]]) {
-		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
-		    indxp[k2 + i__] = jlam;
-		    ++i__;
-		    goto L90;
-		} else {
-		    indxp[k2 + i__ - 1] = jlam;
-		}
-	    } else {
-		indxp[k2 + i__ - 1] = jlam;
+	    if (eta < 0. && temp <= dscale[1]) {
+		eta = (dscale[1] - *tau) / 2.;
 	    }
-	    jlam = j;
 	} else {
-	    ++(*k);
-	    w[*k] = z__[jlam];
-	    dlamda[*k] = d__[jlam];
-	    indxp[*k] = jlam;
-	    jlam = j;
+	    if (eta > 0. && temp >= dscale[1]) {
+		eta = (dscale[1] - *tau) / 2.;
+	    }
+	    if (eta < 0. && temp <= dscale[0]) {
+		eta = (dscale[0] - *tau) / 2.;
+	    }
 	}
-    }
-    goto L80;
-L100:
-
-/*     Record the last eigenvalue. */
-
-    ++(*k);
-    w[*k] = z__[jlam];
-    dlamda[*k] = d__[jlam];
-    indxp[*k] = jlam;
-
-L110:
-
-/*
-       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
-       and Q2 respectively.  The eigenvalues/vectors which were not
-       deflated go into the first K slots of DLAMDA and Q2 respectively,
-       while those which were deflated go into the last N - K slots.
-*/
-
-    if (*icompq == 0) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    jp = indxp[j];
-	    dlamda[j] = d__[jp];
-	    perm[j] = indxq[indx[jp]];
-/* L120: */
+	*tau += eta;
+
+	fc = 0.;
+	erretm = 0.;
+	df = 0.;
+	ddf = 0.;
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    temp = 1. / (dscale[i__ - 1] - *tau);
+	    temp1 = zscale[i__ - 1] * temp;
+	    temp2 = temp1 * temp;
+	    temp3 = temp2 * temp;
+	    temp4 = temp1 / dscale[i__ - 1];
+	    fc += temp4;
+	    erretm += abs(temp4);
+	    df += temp2;
+	    ddf += temp3;
+/* L40: */
 	}
-    } else {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    jp = indxp[j];
-	    dlamda[j] = d__[jp];
-	    perm[j] = indxq[indx[jp]];
-	    scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
-		    , &c__1);
-/* L130: */
+	f = *finit + *tau * fc;
+	erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df;
+	if (abs(f) <= eps * erretm) {
+	    goto L60;
 	}
+/* L50: */
     }
+    *info = 1;
+L60:
 
-/*
-       The deflated eigenvalues and their corresponding vectors go back
-       into the last N - K slots of D and Q respectively.
-*/
+/*     Undo scaling */
 
-    if (*k < *n) {
-	if (*icompq == 0) {
-	    i__1 = *n - *k;
-	    scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
-	} else {
-	    i__1 = *n - *k;
-	    scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
-	    i__1 = *n - *k;
-	    slacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*
-		    k + 1) * q_dim1 + 1], ldq);
-	}
+    if (scale) {
+	*tau *= sclinv;
     }
-
     return 0;
 
-/*     End of SLAED8 */
+/*     End of DLAED6 */
 
-} /* slaed8_ */
+} /* dlaed6_ */
 
-/* Subroutine */ int slaed9_(integer *k, integer *kstart, integer *kstop,
-	integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda,
-	 real *w, real *s, integer *lds, integer *info)
+/* Subroutine */ int dlaed7_(integer *icompq, integer *n, integer *qsiz,
+	integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__,
+	doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer
+	*cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer *
+	perm, integer *givptr, integer *givcol, doublereal *givnum,
+	doublereal *work, integer *iwork, integer *info)
 {
     /* System generated locals */
-    integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2;
-    real r__1;
+    integer q_dim1, q_offset, i__1, i__2;
 
     /* Builtin functions */
-    double sqrt(doublereal), r_sign(real *, real *);
+    integer pow_ii(integer *, integer *);
 
     /* Local variables */
-    static integer i__, j;
-    static real temp;
-    extern doublereal snrm2_(integer *, real *, integer *);
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *), slaed4_(integer *, integer *, real *, real *, real *,
-	    real *, real *, integer *);
-    extern doublereal slamc3_(real *, real *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer indxc, indxp;
+    extern /* Subroutine */ int dlaed8_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, integer *, integer *,
+	    doublereal *, integer *, integer *, integer *), dlaed9_(integer *,
+	     integer *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     integer *, integer *), dlaeda_(integer *, integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *, doublereal
+	    *, doublereal *, integer *, doublereal *, doublereal *, integer *)
+	    ;
+    static integer idlmda;
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *);
+    static integer coltyp;
 
 
 /*
     -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
        September 30, 1994
 
 
     Purpose
     =======
 
-    SLAED9 finds the roots of the secular equation, as defined by the
-    values in D, Z, and RHO, between KSTART and KSTOP.  It makes the
-    appropriate calls to SLAED4 and then stores the new matrix of
-    eigenvectors for use in calculating the next level of Z vectors.
+    DLAED7 computes the updated eigensystem of a diagonal
+    matrix after modification by a rank-one symmetric matrix. This
+    routine is used only for the eigenproblem which requires all
+    eigenvalues and optionally eigenvectors of a dense symmetric matrix
+    that has been reduced to tridiagonal form.  DLAED1 handles
+    the case in which all eigenvalues and eigenvectors of a symmetric
+    tridiagonal matrix are desired.
+
+      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+
+       where Z = Q'u, u is a vector of length N with ones in the
+       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+
+       The eigenvectors of the original matrix are stored in Q, and the
+       eigenvalues are in D.  The algorithm consists of three stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple eigenvalues or if there is a zero in
+          the Z vector.  For each such occurence the dimension of the
+          secular equation problem is reduced by one.  This stage is
+          performed by the routine DLAED8.
+
+          The second stage consists of calculating the updated
+          eigenvalues. This is done by finding the roots of the secular
+          equation via the routine DLAED4 (as called by DLAED9).
+          This routine also calculates the eigenvectors of the current
+          problem.
+
+          The final stage consists of computing the updated eigenvectors
+          directly using the updated eigenvalues.  The eigenvectors for
+          the current problem are multiplied with the eigenvectors from
+          the overall problem.
 
     Arguments
     =========
 
-    K       (input) INTEGER
-            The number of terms in the rational function to be solved by
-            SLAED4.  K >= 0.
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
 
-    KSTART  (input) INTEGER
-    KSTOP   (input) INTEGER
-            The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP
-            are to be computed.  1 <= KSTART <= KSTOP <= K.
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
 
-    N       (input) INTEGER
-            The number of rows and columns in the Q matrix.
-            N >= K (delation may result in N > K).
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
 
-    D       (output) REAL array, dimension (N)
-            D(I) contains the updated eigenvalues
-            for KSTART <= I <= KSTOP.
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
 
-    Q       (workspace) REAL array, dimension (LDQ,N)
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= CURLVL <= TLVLS.
 
-    LDQ     (input) INTEGER
-            The leading dimension of the array Q.  LDQ >= max( 1, N ).
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
 
-    RHO     (input) REAL
-            The value of the parameter in the rank one update equation.
-            RHO >= 0 required.
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, the eigenvalues of the rank-1-perturbed matrix.
+           On exit, the eigenvalues of the repaired matrix.
 
-    DLAMDA  (input) REAL array, dimension (K)
-            The first K elements of this array contain the old roots
-            of the deflated updating problem.  These are the poles
-            of the secular equation.
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
+           On entry, the eigenvectors of the rank-1-perturbed matrix.
+           On exit, the eigenvectors of the repaired tridiagonal matrix.
 
-    W       (input) REAL array, dimension (K)
-            The first K elements of this array contain the components
-            of the deflation-adjusted updating vector.
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
 
-    S       (output) REAL array, dimension (LDS, K)
-            Will contain the eigenvectors of the repaired matrix which
-            will be stored for subsequent Z vector calculation and
-            multiplied by the previously accumulated eigenvectors
-            to update the system.
+    INDXQ  (output) INTEGER array, dimension (N)
+           The permutation which will reintegrate the subproblem just
+           solved back into sorted order, i.e., D( INDXQ( I = 1, N ) )
+           will be in ascending order.
 
-    LDS     (input) INTEGER
-            The leading dimension of S.  LDS >= max( 1, K ).
+    RHO    (input) DOUBLE PRECISION
+           The subdiagonal element used to create the rank-1
+           modification.
 
-    INFO    (output) INTEGER
+    CUTPNT (input) INTEGER
+           Contains the location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
+
+    QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1)
+           Stores eigenvectors of submatrices encountered during
+           divide and conquer, packed together. QPTR points to
+           beginning of the submatrices.
+
+    QPTR   (input/output) INTEGER array, dimension (N+2)
+           List of indices pointing to beginning of submatrices stored
+           in QSTORE. The submatrices are numbered starting at the
+           bottom left of the divide and conquer tree, from left to
+           right and bottom to top.
+
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and also the size of
+           the full, non-deflated problem.
+
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
+
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension (3*N+QSIZ*N)
+
+    IWORK  (workspace) INTEGER array, dimension (4*N)
+
+    INFO   (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
             > 0:  if INFO = 1, an eigenvalue did not converge
@@ -76269,237 +11800,292 @@ L110:
     q_dim1 = *ldq;
     q_offset = 1 + q_dim1;
     q -= q_offset;
-    --dlamda;
-    --w;
-    s_dim1 = *lds;
-    s_offset = 1 + s_dim1;
-    s -= s_offset;
+    --indxq;
+    --qstore;
+    --qptr;
+    --prmptr;
+    --perm;
+    --givptr;
+    givcol -= 3;
+    givnum -= 3;
+    --work;
+    --iwork;
 
     /* Function Body */
     *info = 0;
 
-    if (*k < 0) {
+    if (*icompq < 0 || *icompq > 1) {
 	*info = -1;
-    } else if ((*kstart < 1) || (*kstart > max(1,*k))) {
+    } else if (*n < 0) {
 	*info = -2;
-    } else if ((max(1,*kstop) < *kstart) || (*kstop > max(1,*k))) {
-	*info = -3;
-    } else if (*n < *k) {
+    } else if (*icompq == 1 && *qsiz < *n) {
 	*info = -4;
-    } else if (*ldq < max(1,*k)) {
-	*info = -7;
-    } else if (*lds < max(1,*k)) {
+    } else if (*ldq < max(1,*n)) {
+	*info = -9;
+    } else if (min(1,*n) > *cutpnt || *n < *cutpnt) {
 	*info = -12;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLAED9", &i__1);
+	xerbla_("DLAED7", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
-    if (*k == 0) {
+    if (*n == 0) {
 	return 0;
     }
 
 /*
-       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
-       be computed with high relative accuracy (barring over/underflow).
-       This is a problem on machines without a guard digit in
-       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
-       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
-       which on any of these machines zeros out the bottommost
-       bit of DLAMDA(I) if it is 1; this makes the subsequent
-       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
-       occurs. On binary machines with a guard digit (almost all
-       machines) it does not change DLAMDA(I) at all. On hexadecimal
-       and decimal machines with a guard digit, it slightly
-       changes the bottommost bits of DLAMDA(I). It does not account
-       for hexadecimal or decimal machines without guard digits
-       (we know of none). We use a subroutine call to compute
-       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
-       this code.
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in DLAED8 and DLAED9.
 */
 
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
-/* L10: */
+    if (*icompq == 1) {
+	ldq2 = *qsiz;
+    } else {
+	ldq2 = *n;
     }
 
-    i__1 = *kstop;
-    for (j = *kstart; j <= i__1; ++j) {
-	slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
-		info);
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq2 = iw + *n;
+    is = iq2 + *n * ldq2;
 
-/*        If the zero finder fails, the computation is terminated. */
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
 
-	if (*info != 0) {
-	    goto L120;
-	}
-/* L20: */
+/*
+       Form the z-vector which consists of the last row of Q_1 and the
+       first row of Q_2.
+*/
+
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = *tlvls - i__;
+	ptr += pow_ii(&c__2, &i__2);
+/* L10: */
     }
+    curr = ptr + *curpbm;
+    dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
+	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz
+	    + *n], info);
 
-    if ((*k == 1) || (*k == 2)) {
-	i__1 = *k;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = *k;
-	    for (j = 1; j <= i__2; ++j) {
-		s[j + i__ * s_dim1] = q[j + i__ * q_dim1];
-/* L30: */
-	    }
-/* L40: */
-	}
-	goto L120;
+/*
+       When solving the final problem, we no longer need the stored data,
+       so we will overwrite the data from this level onto the previously
+       used storage space.
+*/
+
+    if (*curlvl == *tlvls) {
+	qptr[curr] = 1;
+	prmptr[curr] = 1;
+	givptr[curr] = 1;
     }
 
-/*     Compute updated W. */
+/*     Sort and Deflate eigenvalues. */
 
-    scopy_(k, &w[1], &c__1, &s[s_offset], &c__1);
+    dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho,
+	    cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], &
+	    perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1)
+	     + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[
+	    indx], info);
+    prmptr[curr + 1] = prmptr[curr] + *n;
+    givptr[curr + 1] += givptr[curr];
 
-/*     Initialize W(I) = Q(I,I) */
+/*     Solve Secular Equation. */
 
-    i__1 = *ldq + 1;
-    scopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = j - 1;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
-/* L50: */
+    if (k != 0) {
+	dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda],
+		&work[iw], &qstore[qptr[curr]], &k, info);
+	if (*info != 0) {
+	    goto L30;
 	}
-	i__2 = *k;
-	for (i__ = j + 1; i__ <= i__2; ++i__) {
-	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
-/* L60: */
+	if (*icompq == 1) {
+	    dgemm_("N", "N", qsiz, &k, &k, &c_b15, &work[iq2], &ldq2, &qstore[
+		    qptr[curr]], &k, &c_b29, &q[q_offset], ldq);
 	}
-/* L70: */
-    }
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	r__1 = sqrt(-w[i__]);
-	w[i__] = r_sign(&r__1, &s[i__ + s_dim1]);
-/* L80: */
-    }
+/* Computing 2nd power */
+	i__1 = k;
+	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
 
-/*     Compute eigenvectors of the modified rank-1 modification. */
+/*     Prepare the INDXQ sorting permutation. */
 
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *k;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1];
-/* L90: */
-	}
-	temp = snrm2_(k, &q[j * q_dim1 + 1], &c__1);
-	i__2 = *k;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp;
-/* L100: */
+	n1 = k;
+	n2 = *n - k;
+	dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+	qptr[curr + 1] = qptr[curr];
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    indxq[i__] = i__;
+/* L20: */
 	}
-/* L110: */
     }
 
-L120:
+L30:
     return 0;
 
-/*     End of SLAED9 */
+/*     End of DLAED7 */
 
-} /* slaed9_ */
+} /* dlaed7_ */
 
-/* Subroutine */ int slaeda_(integer *n, integer *tlvls, integer *curlvl,
-	integer *curpbm, integer *prmptr, integer *perm, integer *givptr,
-	integer *givcol, real *givnum, real *q, integer *qptr, real *z__,
-	real *ztemp, integer *info)
+/* Subroutine */ int dlaed8_(integer *icompq, integer *k, integer *n, integer
+	*qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
+	doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda,
+	 doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer
+	*givptr, integer *givcol, doublereal *givnum, integer *indxp, integer
+	*indx, integer *info)
 {
     /* System generated locals */
-    integer i__1, i__2, i__3;
+    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
+    doublereal d__1;
 
     /* Builtin functions */
-    integer pow_ii(integer *, integer *);
     double sqrt(doublereal);
 
     /* Local variables */
-    static integer i__, k, mid, ptr, curr;
-    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
-	    integer *, real *, real *);
-    static integer bsiz1, bsiz2, psiz1, psiz2, zptr1;
-    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
-	    real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *),
-	    xerbla_(char *, integer *);
+    static doublereal c__;
+    static integer i__, j;
+    static doublereal s, t;
+    static integer k2, n1, n2, jp, n1p1;
+    static doublereal eps, tau, tol;
+    static integer jlam, imax, jmax;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *), dscal_(
+	    integer *, doublereal *, doublereal *, integer *), dcopy_(integer
+	    *, doublereal *, integer *, doublereal *, integer *);
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
 
 
 /*
     -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
        September 30, 1994
 
 
     Purpose
     =======
 
-    SLAEDA computes the Z vector corresponding to the merge step in the
-    CURLVLth step of the merge process with TLVLS steps for the CURPBMth
-    problem.
+    DLAED8 merges the two sets of eigenvalues together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    eigenvalues are close together or if there is a tiny element in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
 
     Arguments
     =========
 
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+
+    K      (output) INTEGER
+           The number of non-deflated eigenvalues, and the order of the
+           related secular equation.
+
     N      (input) INTEGER
            The dimension of the symmetric tridiagonal matrix.  N >= 0.
 
-    TLVLS  (input) INTEGER
-           The total number of merging levels in the overall divide and
-           conquer tree.
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
 
-    CURLVL (input) INTEGER
-           The current level in the overall merge routine,
-           0 <= curlvl <= tlvls.
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, the eigenvalues of the two submatrices to be
+           combined.  On exit, the trailing (N-K) updated eigenvalues
+           (those which were deflated) sorted into increasing order.
 
-    CURPBM (input) INTEGER
-           The current problem in the current level in the overall
-           merge routine (counting from upper left to lower right).
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+           If ICOMPQ = 0, Q is not referenced.  Otherwise,
+           on entry, Q contains the eigenvectors of the partially solved
+           system which has been previously updated in matrix
+           multiplies with other partially solved eigensystems.
+           On exit, Q contains the trailing (N-K) updated eigenvectors
+           (those which were deflated) in its last N-K columns.
 
-    PRMPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in PERM a
-           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
-           indicates the size of the permutation and incidentally the
-           size of the full, non-deflated problem.
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
 
-    PERM   (input) INTEGER array, dimension (N lg N)
-           Contains the permutations (from deflation and sorting) to be
-           applied to each eigenblock.
+    INDXQ  (input) INTEGER array, dimension (N)
+           The permutation which separately sorts the two sub-problems
+           in D into ascending order.  Note that elements in the second
+           half of this permutation must first have CUTPNT added to
+           their values in order to be accurate.
+
+    RHO    (input/output) DOUBLE PRECISION
+           On entry, the off-diagonal element associated with the rank-1
+           cut which originally split the two submatrices which are now
+           being recombined.
+           On exit, RHO has been modified to the value required by
+           DLAED3.
+
+    CUTPNT (input) INTEGER
+           The location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
+
+    Z      (input) DOUBLE PRECISION array, dimension (N)
+           On entry, Z contains the updating vector (the last row of
+           the first sub-eigenvector matrix and the first row of the
+           second sub-eigenvector matrix).
+           On exit, the contents of Z are destroyed by the updating
+           process.
+
+    DLAMDA (output) DOUBLE PRECISION array, dimension (N)
+           A copy of the first K eigenvalues which will be used by
+           DLAED3 to form the secular equation.
+
+    Q2     (output) DOUBLE PRECISION array, dimension (LDQ2,N)
+           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
+           a copy of the first K eigenvectors which will be used by
+           DLAED7 in a matrix multiply (DGEMM) to update the new
+           eigenvectors.
+
+    LDQ2   (input) INTEGER
+           The leading dimension of the array Q2.  LDQ2 >= max(1,N).
+
+    W      (output) DOUBLE PRECISION array, dimension (N)
+           The first k values of the final deflation-altered z-vector and
+           will be passed to DLAED3.
+
+    PERM   (output) INTEGER array, dimension (N)
+           The permutations (from deflation and sorting) to be applied
+           to each eigenblock.
 
-    GIVPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in GIVCOL a
-           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
-           indicates the number of Givens rotations.
+    GIVPTR (output) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem.
 
-    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+    GIVCOL (output) INTEGER array, dimension (2, N)
            Each pair of numbers indicates a pair of columns to take place
            in a Givens rotation.
 
-    GIVNUM (input) REAL array, dimension (2, N lg N)
+    GIVNUM (output) DOUBLE PRECISION array, dimension (2, N)
            Each number indicates the S value to be used in the
            corresponding Givens rotation.
 
-    Q      (input) REAL array, dimension (N**2)
-           Contains the square eigenblocks from previous levels, the
-           starting positions for blocks are given by QPTR.
-
-    QPTR   (input) INTEGER array, dimension (N+2)
-           Contains a list of pointers which indicate where in Q an
-           eigenblock is stored.  SQRT( QPTR(i+1) - QPTR(i) ) indicates
-           the size of the block.
-
-    Z      (output) REAL array, dimension (N)
-           On output this vector contains the updating vector (the last
-           row of the first sub-eigenvector matrix and the first row of
-           the second sub-eigenvector matrix).
+    INDXP  (workspace) INTEGER array, dimension (N)
+           The permutation used to place deflated values of D at the end
+           of the array.  INDXP(1:K) points to the nondeflated D-values
+           and INDXP(K+1:N) points to the deflated eigenvalues.
 
-    ZTEMP  (workspace) REAL array, dimension (N)
+    INDX   (workspace) INTEGER array, dimension (N)
+           The permutation used to sort the contents of D into ascending
+           order.
 
     INFO   (output) INTEGER
             = 0:  successful exit.
@@ -76519,25 +12105,42 @@ L120:
 */
 
     /* Parameter adjustments */
-    --ztemp;
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
     --z__;
-    --qptr;
-    --q;
-    givnum -= 3;
-    givcol -= 3;
-    --givptr;
+    --dlamda;
+    q2_dim1 = *ldq2;
+    q2_offset = 1 + q2_dim1;
+    q2 -= q2_offset;
+    --w;
     --perm;
-    --prmptr;
+    givcol -= 3;
+    givnum -= 3;
+    --indxp;
+    --indx;
 
     /* Function Body */
     *info = 0;
 
-    if (*n < 0) {
+    if (*icompq < 0 || *icompq > 1) {
 	*info = -1;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*icompq == 1 && *qsiz < *n) {
+	*info = -4;
+    } else if (*ldq < max(1,*n)) {
+	*info = -7;
+    } else if (*cutpnt < min(1,*n) || *cutpnt > *n) {
+	*info = -10;
+    } else if (*ldq2 < max(1,*n)) {
+	*info = -14;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLAEDA", &i__1);
+	xerbla_("DLAED8", &i__1);
 	return 0;
     }
 
@@ -76547,2680 +12150,2452 @@ L120:
 	return 0;
     }
 
-/*     Determine location of first number in second half. */
-
-    mid = *n / 2 + 1;
-
-/*     Gather last/first rows of appropriate eigenblocks into center of Z */
-
-    ptr = 1;
-
-/*
-       Determine location of lowest level subproblem in the full storage
-       scheme
-*/
+    n1 = *cutpnt;
+    n2 = *n - n1;
+    n1p1 = n1 + 1;
 
-    i__1 = *curlvl - 1;
-    curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
+    if (*rho < 0.) {
+	dscal_(&n2, &c_b151, &z__[n1p1], &c__1);
+    }
 
-/*
-       Determine size of these matrices.  We add HALF to the value of
-       the SQRT in case the machine underestimates one of these square
-       roots.
-*/
+/*     Normalize z so that norm(z) = 1 */
 
-    bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
-    bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f);
-    i__1 = mid - bsiz1 - 1;
-    for (k = 1; k <= i__1; ++k) {
-	z__[k] = 0.f;
+    t = 1. / sqrt(2.);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	indx[j] = j;
 /* L10: */
     }
-    scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], &
-	    c__1);
-    scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
+    dscal_(n, &t, &z__[1], &c__1);
+    *rho = (d__1 = *rho * 2., abs(d__1));
+
+/*     Sort the eigenvalues into increasing order */
+
     i__1 = *n;
-    for (k = mid + bsiz2; k <= i__1; ++k) {
-	z__[k] = 0.f;
+    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
+	indxq[i__] += *cutpnt;
 /* L20: */
     }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = d__[indxq[i__]];
+	w[i__] = z__[indxq[i__]];
+/* L30: */
+    }
+    i__ = 1;
+    j = *cutpnt + 1;
+    dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__[i__] = dlamda[indx[i__]];
+	z__[i__] = w[indx[i__]];
+/* L40: */
+    }
 
-/*
-       Loop thru remaining levels 1 -> CURLVL applying the Givens
-       rotations and permutation and then multiplying the center matrices
-       against the current Z.
-*/
+/*     Calculate the allowable deflation tolerence */
 
-    ptr = pow_ii(&c__2, tlvls) + 1;
-    i__1 = *curlvl - 1;
-    for (k = 1; k <= i__1; ++k) {
-	i__2 = *curlvl - k;
-	i__3 = *curlvl - k - 1;
-	curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
-		1;
-	psiz1 = prmptr[curr + 1] - prmptr[curr];
-	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
-	zptr1 = mid - psiz1;
+    imax = idamax_(n, &z__[1], &c__1);
+    jmax = idamax_(n, &d__[1], &c__1);
+    eps = EPSILON;
+    tol = eps * 8. * (d__1 = d__[jmax], abs(d__1));
 
-/*       Apply Givens at CURR and CURR+1 */
+/*
+       If the rank-1 modifier is small enough, no more needs to be done
+       except to reorganize Q so that its columns correspond with the
+       elements in D.
+*/
 
-	i__2 = givptr[curr + 1] - 1;
-	for (i__ = givptr[curr]; i__ <= i__2; ++i__) {
-	    srot_(&c__1, &z__[zptr1 + givcol[((i__) << (1)) + 1] - 1], &c__1,
-		    &z__[zptr1 + givcol[((i__) << (1)) + 2] - 1], &c__1, &
-		    givnum[((i__) << (1)) + 1], &givnum[((i__) << (1)) + 2]);
-/* L30: */
-	}
-	i__2 = givptr[curr + 2] - 1;
-	for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) {
-	    srot_(&c__1, &z__[mid - 1 + givcol[((i__) << (1)) + 1]], &c__1, &
-		    z__[mid - 1 + givcol[((i__) << (1)) + 2]], &c__1, &givnum[
-		    ((i__) << (1)) + 1], &givnum[((i__) << (1)) + 2]);
-/* L40: */
-	}
-	psiz1 = prmptr[curr + 1] - prmptr[curr];
-	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
-	i__2 = psiz1 - 1;
-	for (i__ = 0; i__ <= i__2; ++i__) {
-	    ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
+    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
+	*k = 0;
+	if (*icompq == 0) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		perm[j] = indxq[indx[j]];
 /* L50: */
-	}
-	i__2 = psiz2 - 1;
-	for (i__ = 0; i__ <= i__2; ++i__) {
-	    ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] -
-		    1];
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		perm[j] = indxq[indx[j]];
+		dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1
+			+ 1], &c__1);
 /* L60: */
+	    }
+	    dlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
 	}
+	return 0;
+    }
 
 /*
-          Multiply Blocks at CURR and CURR+1
-
-          Determine size of these matrices.  We add HALF to the value of
-          the SQRT in case the machine underestimates one of these
-          square roots.
+       If there are multiple eigenvalues then the problem deflates.  Here
+       the number of equal eigenvalues are found.  As each equal
+       eigenvalue is found, an elementary reflector is computed to rotate
+       the corresponding eigensubspace so that the corresponding
+       components of Z are zero in this new basis.
 */
 
-	bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
-	bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) +
-		.5f);
-	if (bsiz1 > 0) {
-	    sgemv_("T", &bsiz1, &bsiz1, &c_b1011, &q[qptr[curr]], &bsiz1, &
-		    ztemp[1], &c__1, &c_b320, &z__[zptr1], &c__1);
-	}
-	i__2 = psiz1 - bsiz1;
-	scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
-	if (bsiz2 > 0) {
-	    sgemv_("T", &bsiz2, &bsiz2, &c_b1011, &q[qptr[curr + 1]], &bsiz2,
-		    &ztemp[psiz1 + 1], &c__1, &c_b320, &z__[mid], &c__1);
-	}
-	i__2 = psiz2 - bsiz2;
-	scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], &
-		c__1);
+    *k = 0;
+    *givptr = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
 
-	i__2 = *tlvls - k;
-	ptr += pow_ii(&c__2, &i__2);
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    indxp[k2] = j;
+	    if (j == *n) {
+		goto L110;
+	    }
+	} else {
+	    jlam = j;
+	    goto L80;
+	}
 /* L70: */
     }
+L80:
+    ++j;
+    if (j > *n) {
+	goto L100;
+    }
+    if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
 
-    return 0;
-
-/*     End of SLAEDA */
-
-} /* slaeda_ */
-
-/* Subroutine */ int slaev2_(real *a, real *b, real *c__, real *rt1, real *
-	rt2, real *cs1, real *sn1)
-{
-    /* System generated locals */
-    real r__1;
+/*        Deflate due to small z component. */
 
-    /* Builtin functions */
-    double sqrt(doublereal);
+	--k2;
+	indxp[k2] = j;
+    } else {
 
-    /* Local variables */
-    static real ab, df, cs, ct, tb, sm, tn, rt, adf, acs;
-    static integer sgn1, sgn2;
-    static real acmn, acmx;
+/*        Check if eigenvalues are close enough to allow deflation. */
 
+	s = z__[jlam];
+	c__ = z__[j];
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
-       [  A   B  ]
-       [  B   C  ].
-    On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
-    eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
-    eigenvector for RT1, giving the decomposition
-
-       [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
-       [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
-
-    Arguments
-    =========
-
-    A       (input) REAL
-            The (1,1) element of the 2-by-2 matrix.
-
-    B       (input) REAL
-            The (1,2) element and the conjugate of the (2,1) element of
-            the 2-by-2 matrix.
-
-    C       (input) REAL
-            The (2,2) element of the 2-by-2 matrix.
-
-    RT1     (output) REAL
-            The eigenvalue of larger absolute value.
-
-    RT2     (output) REAL
-            The eigenvalue of smaller absolute value.
-
-    CS1     (output) REAL
-    SN1     (output) REAL
-            The vector (CS1, SN1) is a unit right eigenvector for RT1.
-
-    Further Details
-    ===============
-
-    RT1 is accurate to a few ulps barring over/underflow.
-
-    RT2 may be inaccurate if there is massive cancellation in the
-    determinant A*C-B*B; higher precision or correctly rounded or
-    correctly truncated arithmetic would be needed to compute RT2
-    accurately in all cases.
-
-    CS1 and SN1 are accurate to a few ulps barring over/underflow.
+          Find sqrt(a**2+b**2) without overflow or
+          destructive underflow.
+*/
 
-    Overflow is possible only if RT1 is within a factor of 5 of overflow.
-    Underflow is harmless if the input data is 0 or exceeds
-       underflow_threshold / macheps.
+	tau = dlapy2_(&c__, &s);
+	t = d__[j] - d__[jlam];
+	c__ /= tau;
+	s = -s / tau;
+	if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
 
-   =====================================================================
+/*           Deflation is possible. */
 
+	    z__[j] = tau;
+	    z__[jlam] = 0.;
 
-       Compute the eigenvalues
-*/
+/*           Record the appropriate Givens rotation */
 
-    sm = *a + *c__;
-    df = *a - *c__;
-    adf = dabs(df);
-    tb = *b + *b;
-    ab = dabs(tb);
-    if (dabs(*a) > dabs(*c__)) {
-	acmx = *a;
-	acmn = *c__;
-    } else {
-	acmx = *c__;
-	acmn = *a;
+	    ++(*givptr);
+	    givcol[(*givptr << 1) + 1] = indxq[indx[jlam]];
+	    givcol[(*givptr << 1) + 2] = indxq[indx[j]];
+	    givnum[(*givptr << 1) + 1] = c__;
+	    givnum[(*givptr << 1) + 2] = s;
+	    if (*icompq == 1) {
+		drot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[
+			indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
+	    }
+	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
+	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
+	    d__[jlam] = t;
+	    --k2;
+	    i__ = 1;
+L90:
+	    if (k2 + i__ <= *n) {
+		if (d__[jlam] < d__[indxp[k2 + i__]]) {
+		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+		    indxp[k2 + i__] = jlam;
+		    ++i__;
+		    goto L90;
+		} else {
+		    indxp[k2 + i__ - 1] = jlam;
+		}
+	    } else {
+		indxp[k2 + i__ - 1] = jlam;
+	    }
+	    jlam = j;
+	} else {
+	    ++(*k);
+	    w[*k] = z__[jlam];
+	    dlamda[*k] = d__[jlam];
+	    indxp[*k] = jlam;
+	    jlam = j;
+	}
     }
-    if (adf > ab) {
-/* Computing 2nd power */
-	r__1 = ab / adf;
-	rt = adf * sqrt(r__1 * r__1 + 1.f);
-    } else if (adf < ab) {
-/* Computing 2nd power */
-	r__1 = adf / ab;
-	rt = ab * sqrt(r__1 * r__1 + 1.f);
-    } else {
-
-/*        Includes case AB=ADF=0 */
+    goto L80;
+L100:
 
-	rt = ab * sqrt(2.f);
-    }
-    if (sm < 0.f) {
-	*rt1 = (sm - rt) * .5f;
-	sgn1 = -1;
+/*     Record the last eigenvalue. */
 
-/*
-          Order of execution important.
-          To get fully accurate smaller eigenvalue,
-          next line needs to be executed in higher precision.
-*/
+    ++(*k);
+    w[*k] = z__[jlam];
+    dlamda[*k] = d__[jlam];
+    indxp[*k] = jlam;
 
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else if (sm > 0.f) {
-	*rt1 = (sm + rt) * .5f;
-	sgn1 = 1;
+L110:
 
 /*
-          Order of execution important.
-          To get fully accurate smaller eigenvalue,
-          next line needs to be executed in higher precision.
+       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+       and Q2 respectively.  The eigenvalues/vectors which were not
+       deflated go into the first K slots of DLAMDA and Q2 respectively,
+       while those which were deflated go into the last N - K slots.
 */
 
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    if (*icompq == 0) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    jp = indxp[j];
+	    dlamda[j] = d__[jp];
+	    perm[j] = indxq[indx[jp]];
+/* L120: */
+	}
     } else {
-
-/*        Includes case RT1 = RT2 = 0 */
-
-	*rt1 = rt * .5f;
-	*rt2 = rt * -.5f;
-	sgn1 = 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    jp = indxp[j];
+	    dlamda[j] = d__[jp];
+	    perm[j] = indxq[indx[jp]];
+	    dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
+		    , &c__1);
+/* L130: */
+	}
     }
 
-/*     Compute the eigenvector */
+/*
+       The deflated eigenvalues and their corresponding vectors go back
+       into the last N - K slots of D and Q respectively.
+*/
 
-    if (df >= 0.f) {
-	cs = df + rt;
-	sgn2 = 1;
-    } else {
-	cs = df - rt;
-	sgn2 = -1;
-    }
-    acs = dabs(cs);
-    if (acs > ab) {
-	ct = -tb / cs;
-	*sn1 = 1.f / sqrt(ct * ct + 1.f);
-	*cs1 = ct * *sn1;
-    } else {
-	if (ab == 0.f) {
-	    *cs1 = 1.f;
-	    *sn1 = 0.f;
+    if (*k < *n) {
+	if (*icompq == 0) {
+	    i__1 = *n - *k;
+	    dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
 	} else {
-	    tn = -cs / tb;
-	    *cs1 = 1.f / sqrt(tn * tn + 1.f);
-	    *sn1 = tn * *cs1;
+	    i__1 = *n - *k;
+	    dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	    i__1 = *n - *k;
+	    dlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*
+		    k + 1) * q_dim1 + 1], ldq);
 	}
     }
-    if (sgn1 == sgn2) {
-	tn = *cs1;
-	*cs1 = -(*sn1);
-	*sn1 = tn;
-    }
+
     return 0;
 
-/*     End of SLAEV2 */
+/*     End of DLAED8 */
 
-} /* slaev2_ */
+} /* dlaed8_ */
 
-/* Subroutine */ int slahqr_(logical *wantt, logical *wantz, integer *n,
-	integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real *
-	wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *
-	info)
+/* Subroutine */ int dlaed9_(integer *k, integer *kstart, integer *kstop,
+	integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal *
+	rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds,
+	integer *info)
 {
     /* System generated locals */
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
-    real r__1, r__2;
+    integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2;
+    doublereal d__1;
 
     /* Builtin functions */
-    double sqrt(doublereal), r_sign(real *, real *);
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
 
     /* Local variables */
-    static integer i__, j, k, l, m;
-    static real s, v[3];
-    static integer i1, i2;
-    static real t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22, h33,
-	    h44;
-    static integer nh;
-    static real cs;
-    static integer nr;
-    static real sn;
-    static integer nz;
-    static real ave, h33s, h44s;
-    static integer itn, its;
-    static real ulp, sum, tst1, h43h34, disc, unfl, ovfl, work[1];
-    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
-	    integer *, real *, real *), scopy_(integer *, real *, integer *,
-	    real *, integer *), slanv2_(real *, real *, real *, real *, real *
-	    , real *, real *, real *, real *, real *), slabad_(real *, real *)
-	    ;
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
-	    real *);
-    extern doublereal slanhs_(char *, integer *, real *, integer *, real *);
-    static real smlnum;
+    static integer i__, j;
+    static doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlaed4_(integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
 
 
     Purpose
     =======
 
-    SLAHQR is an auxiliary routine called by SHSEQR to update the
-    eigenvalues and Schur decomposition already computed by SHSEQR, by
-    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+    DLAED9 finds the roots of the secular equation, as defined by the
+    values in D, Z, and RHO, between KSTART and KSTOP.  It makes the
+    appropriate calls to DLAED4 and then stores the new matrix of
+    eigenvectors for use in calculating the next level of Z vectors.
 
     Arguments
     =========
 
-    WANTT   (input) LOGICAL
-            = .TRUE. : the full Schur form T is required;
-            = .FALSE.: only eigenvalues are required.
+    K       (input) INTEGER
+            The number of terms in the rational function to be solved by
+            DLAED4.  K >= 0.
 
-    WANTZ   (input) LOGICAL
-            = .TRUE. : the matrix of Schur vectors Z is required;
-            = .FALSE.: Schur vectors are not required.
+    KSTART  (input) INTEGER
+    KSTOP   (input) INTEGER
+            The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP
+            are to be computed.  1 <= KSTART <= KSTOP <= K.
 
     N       (input) INTEGER
-            The order of the matrix H.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that H is already upper quasi-triangular in
-            rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless
-            ILO = 1). SLAHQR works primarily with the Hessenberg
-            submatrix in rows and columns ILO to IHI, but applies
-            transformations to all of H if WANTT is .TRUE..
-            1 <= ILO <= max(1,IHI); IHI <= N.
+            The number of rows and columns in the Q matrix.
+            N >= K (delation may result in N > K).
 
-    H       (input/output) REAL array, dimension (LDH,N)
-            On entry, the upper Hessenberg matrix H.
-            On exit, if WANTT is .TRUE., H is upper quasi-triangular in
-            rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in
-            standard form. If WANTT is .FALSE., the contents of H are
-            unspecified on exit.
+    D       (output) DOUBLE PRECISION array, dimension (N)
+            D(I) contains the updated eigenvalues
+            for KSTART <= I <= KSTOP.
 
-    LDH     (input) INTEGER
-            The leading dimension of the array H. LDH >= max(1,N).
+    Q       (workspace) DOUBLE PRECISION array, dimension (LDQ,N)
 
-    WR      (output) REAL array, dimension (N)
-    WI      (output) REAL array, dimension (N)
-            The real and imaginary parts, respectively, of the computed
-            eigenvalues ILO to IHI are stored in the corresponding
-            elements of WR and WI. If two eigenvalues are computed as a
-            complex conjugate pair, they are stored in consecutive
-            elements of WR and WI, say the i-th and (i+1)th, with
-            WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
-            eigenvalues are stored in the same order as on the diagonal
-            of the Schur form returned in H, with WR(i) = H(i,i), and, if
-            H(i:i+1,i:i+1) is a 2-by-2 diagonal block,
-            WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i).
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max( 1, N ).
 
-    ILOZ    (input) INTEGER
-    IHIZ    (input) INTEGER
-            Specify the rows of Z to which transformations must be
-            applied if WANTZ is .TRUE..
-            1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
+    RHO     (input) DOUBLE PRECISION
+            The value of the parameter in the rank one update equation.
+            RHO >= 0 required.
 
-    Z       (input/output) REAL array, dimension (LDZ,N)
-            If WANTZ is .TRUE., on entry Z must contain the current
-            matrix Z of transformations accumulated by SHSEQR, and on
-            exit Z has been updated; transformations are applied only to
-            the submatrix Z(ILOZ:IHIZ,ILO:IHI).
-            If WANTZ is .FALSE., Z is not referenced.
+    DLAMDA  (input) DOUBLE PRECISION array, dimension (K)
+            The first K elements of this array contain the old roots
+            of the deflated updating problem.  These are the poles
+            of the secular equation.
 
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z. LDZ >= max(1,N).
+    W       (input) DOUBLE PRECISION array, dimension (K)
+            The first K elements of this array contain the components
+            of the deflation-adjusted updating vector.
+
+    S       (output) DOUBLE PRECISION array, dimension (LDS, K)
+            Will contain the eigenvectors of the repaired matrix which
+            will be stored for subsequent Z vector calculation and
+            multiplied by the previously accumulated eigenvectors
+            to update the system.
+
+    LDS     (input) INTEGER
+            The leading dimension of S.  LDS >= max( 1, K ).
 
     INFO    (output) INTEGER
-            = 0: successful exit
-            > 0: SLAHQR failed to compute all the eigenvalues ILO to IHI
-                 in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
-                 elements i+1:ihi of WR and WI contain those eigenvalues
-                 which have been successfully computed.
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
 
     Further Details
     ===============
 
-    2-96 Based on modifications by
-       David Day, Sandia National Laboratory, USA
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
 
     =====================================================================
-*/
 
 
+       Test the input parameters.
+*/
+
     /* Parameter adjustments */
-    h_dim1 = *ldh;
-    h_offset = 1 + h_dim1;
-    h__ -= h_offset;
-    --wr;
-    --wi;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dlamda;
+    --w;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1;
+    s -= s_offset;
 
     /* Function Body */
     *info = 0;
 
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
+    if (*k < 0) {
+	*info = -1;
+    } else if (*kstart < 1 || *kstart > max(1,*k)) {
+	*info = -2;
+    } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) {
+	*info = -3;
+    } else if (*n < *k) {
+	*info = -4;
+    } else if (*ldq < max(1,*k)) {
+	*info = -7;
+    } else if (*lds < max(1,*k)) {
+	*info = -12;
     }
-    if (*ilo == *ihi) {
-	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
-	wi[*ilo] = 0.f;
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED9", &i__1);
 	return 0;
     }
 
-    nh = *ihi - *ilo + 1;
-    nz = *ihiz - *iloz + 1;
-
-/*
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
+/*     Quick return if possible */
 
-    unfl = slamch_("Safe minimum");
-    ovfl = 1.f / unfl;
-    slabad_(&unfl, &ovfl);
-    ulp = slamch_("Precision");
-    smlnum = unfl * (nh / ulp);
+    if (*k == 0) {
+	return 0;
+    }
 
 /*
-       I1 and I2 are the indices of the first row and last column of H
-       to which transformations must be applied. If eigenvalues only are
-       being computed, I1 and I2 are set inside the main loop.
+       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DLAMDA(I) if it is 1; this makes the subsequent
+       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DLAMDA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DLAMDA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
 */
 
-    if (*wantt) {
-	i1 = 1;
-	i2 = *n;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
+/* L10: */
     }
 
-/*     ITN is the total number of QR iterations allowed. */
+    i__1 = *kstop;
+    for (j = *kstart; j <= i__1; ++j) {
+	dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
+		info);
 
-    itn = nh * 30;
+/*        If the zero finder fails, the computation is terminated. */
 
-/*
-       The main loop begins here. I is the loop index and decreases from
-       IHI to ILO in steps of 1 or 2. Each iteration of the loop works
-       with the active submatrix in rows and columns L to I.
-       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
-       H(L,L-1) is negligible so that the matrix splits.
-*/
+	if (*info != 0) {
+	    goto L120;
+	}
+/* L20: */
+    }
 
-    i__ = *ihi;
-L10:
-    l = *ilo;
-    if (i__ < *ilo) {
-	goto L150;
+    if (*k == 1 || *k == 2) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = *k;
+	    for (j = 1; j <= i__2; ++j) {
+		s[j + i__ * s_dim1] = q[j + i__ * q_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+	goto L120;
     }
 
-/*
-       Perform QR iterations on rows and columns ILO to I until a
-       submatrix of order 1 or 2 splits off at the bottom because a
-       subdiagonal element has become negligible.
-*/
+/*     Compute updated W. */
 
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
+    dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1);
 
-/*        Look for a single small subdiagonal element. */
+/*     Initialize W(I) = Q(I,I) */
 
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2
-		    = h__[k + k * h_dim1], dabs(r__2));
-	    if (tst1 == 0.f) {
-		i__3 = i__ - l + 1;
-		tst1 = slanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work);
-	    }
-/* Computing MAX */
-	    r__2 = ulp * tst1;
-	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2,
-		    smlnum)) {
-		goto L30;
-	    }
-/* L20: */
+    i__1 = *ldq + 1;
+    dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L50: */
 	}
-L30:
-	l = k;
-	if (l > *ilo) {
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L60: */
+	}
+/* L70: */
+    }
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__1 = sqrt(-w[i__]);
+	w[i__] = d_sign(&d__1, &s[i__ + s_dim1]);
+/* L80: */
+    }
 
-/*           H(L,L-1) is negligible */
+/*     Compute eigenvectors of the modified rank-1 modification. */
 
-	    h__[l + (l - 1) * h_dim1] = 0.f;
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1];
+/* L90: */
+	}
+	temp = dnrm2_(k, &q[j * q_dim1 + 1], &c__1);
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp;
+/* L100: */
 	}
+/* L110: */
+    }
 
-/*        Exit from loop if a submatrix of order 1 or 2 has split off. */
+L120:
+    return 0;
 
-	if (l >= i__ - 1) {
-	    goto L140;
-	}
+/*     End of DLAED9 */
 
-/*
-          Now the active submatrix is in rows and columns L to I. If
-          eigenvalues only are being computed, only the active submatrix
-          need be transformed.
-*/
+} /* dlaed9_ */
 
-	if (! (*wantt)) {
-	    i1 = l;
-	    i2 = i__;
-	}
+/* Subroutine */ int dlaeda_(integer *n, integer *tlvls, integer *curlvl,
+	integer *curpbm, integer *prmptr, integer *perm, integer *givptr,
+	integer *givcol, doublereal *givnum, doublereal *q, integer *qptr,
+	doublereal *z__, doublereal *ztemp, integer *info)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
 
-	if ((its == 10) || (its == 20)) {
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
 
-/*           Exceptional shift. */
+    /* Local variables */
+    static integer i__, k, mid, ptr;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer curr, bsiz1, bsiz2, psiz1, psiz2, zptr1;
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), dcopy_(integer *,
+	    doublereal *, integer *, doublereal *, integer *), xerbla_(char *,
+	     integer *);
 
-	    s = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)) + (r__2 =
-		    h__[i__ - 1 + (i__ - 2) * h_dim1], dabs(r__2));
-	    h44 = s * .75f + h__[i__ + i__ * h_dim1];
-	    h33 = h44;
-	    h43h34 = s * -.4375f * s;
-	} else {
 
 /*
-             Prepare to use Francis' double shift
-             (i.e. 2nd degree generalized Rayleigh quotient)
-*/
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-	    h44 = h__[i__ + i__ * h_dim1];
-	    h33 = h__[i__ - 1 + (i__ - 1) * h_dim1];
-	    h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ *
-		    h_dim1];
-	    s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) *
-		    h_dim1];
-	    disc = (h33 - h44) * .5f;
-	    disc = disc * disc + h43h34;
-	    if (disc > 0.f) {
 
-/*              Real roots: use Wilkinson's shift twice */
+    Purpose
+    =======
 
-		disc = sqrt(disc);
-		ave = (h33 + h44) * .5f;
-		if (dabs(h33) - dabs(h44) > 0.f) {
-		    h33 = h33 * h44 - h43h34;
-		    h44 = h33 / (r_sign(&disc, &ave) + ave);
-		} else {
-		    h44 = r_sign(&disc, &ave) + ave;
-		}
-		h33 = h44;
-		h43h34 = 0.f;
-	    }
-	}
+    DLAEDA computes the Z vector corresponding to the merge step in the
+    CURLVLth step of the merge process with TLVLS steps for the CURPBMth
+    problem.
 
-/*        Look for two consecutive small subdiagonal elements. */
+    Arguments
+    =========
 
-	i__2 = l;
-	for (m = i__ - 2; m >= i__2; --m) {
-/*
-             Determine the effect of starting the double-shift QR
-             iteration at row M, and see if this would make H(M,M-1)
-             negligible.
-*/
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
 
-	    h11 = h__[m + m * h_dim1];
-	    h22 = h__[m + 1 + (m + 1) * h_dim1];
-	    h21 = h__[m + 1 + m * h_dim1];
-	    h12 = h__[m + (m + 1) * h_dim1];
-	    h44s = h44 - h11;
-	    h33s = h33 - h11;
-	    v1 = (h33s * h44s - h43h34) / h21 + h12;
-	    v2 = h22 - h11 - h33s - h44s;
-	    v3 = h__[m + 2 + (m + 1) * h_dim1];
-	    s = dabs(v1) + dabs(v2) + dabs(v3);
-	    v1 /= s;
-	    v2 /= s;
-	    v3 /= s;
-	    v[0] = v1;
-	    v[1] = v2;
-	    v[2] = v3;
-	    if (m == l) {
-		goto L50;
-	    }
-	    h00 = h__[m - 1 + (m - 1) * h_dim1];
-	    h10 = h__[m + (m - 1) * h_dim1];
-	    tst1 = dabs(v1) * (dabs(h00) + dabs(h11) + dabs(h22));
-	    if (dabs(h10) * (dabs(v2) + dabs(v3)) <= ulp * tst1) {
-		goto L50;
-	    }
-/* L40: */
-	}
-L50:
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
 
-/*        Double-shift QR step */
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= curlvl <= tlvls.
 
-	i__2 = i__ - 1;
-	for (k = m; k <= i__2; ++k) {
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
 
-/*
-             The first iteration of this loop determines a reflection G
-             from the vector V and applies it from left and right to H,
-             thus creating a nonzero bulge below the subdiagonal.
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and incidentally the
+           size of the full, non-deflated problem.
 
-             Each subsequent iteration determines a reflection G to
-             restore the Hessenberg form in the (K-1)th column, and thus
-             chases the bulge one step toward the bottom of the active
-             submatrix. NR is the order of G.
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
 
-   Computing MIN
-*/
-	    i__3 = 3, i__4 = i__ - k + 1;
-	    nr = min(i__3,i__4);
-	    if (k > m) {
-		scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
-	    }
-	    slarfg_(&nr, v, &v[1], &c__1, &t1);
-	    if (k > m) {
-		h__[k + (k - 1) * h_dim1] = v[0];
-		h__[k + 1 + (k - 1) * h_dim1] = 0.f;
-		if (k < i__ - 1) {
-		    h__[k + 2 + (k - 1) * h_dim1] = 0.f;
-		}
-	    } else if (m > l) {
-		h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1];
-	    }
-	    v2 = v[1];
-	    t2 = t1 * v2;
-	    if (nr == 3) {
-		v3 = v[2];
-		t3 = t1 * v3;
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
 
-/*
-                Apply G from the left to transform the rows of the matrix
-                in columns K to I2.
-*/
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
 
-		i__3 = i2;
-		for (j = k; j <= i__3; ++j) {
-		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]
-			    + v3 * h__[k + 2 + j * h_dim1];
-		    h__[k + j * h_dim1] -= sum * t1;
-		    h__[k + 1 + j * h_dim1] -= sum * t2;
-		    h__[k + 2 + j * h_dim1] -= sum * t3;
-/* L60: */
-		}
+    GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
 
-/*
-                Apply G from the right to transform the columns of the
-                matrix in rows I1 to min(K+3,I).
+    Q      (input) DOUBLE PRECISION array, dimension (N**2)
+           Contains the square eigenblocks from previous levels, the
+           starting positions for blocks are given by QPTR.
 
-   Computing MIN
-*/
-		i__4 = k + 3;
-		i__3 = min(i__4,i__);
-		for (j = i1; j <= i__3; ++j) {
-		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
-			     + v3 * h__[j + (k + 2) * h_dim1];
-		    h__[j + k * h_dim1] -= sum * t1;
-		    h__[j + (k + 1) * h_dim1] -= sum * t2;
-		    h__[j + (k + 2) * h_dim1] -= sum * t3;
-/* L70: */
-		}
+    QPTR   (input) INTEGER array, dimension (N+2)
+           Contains a list of pointers which indicate where in Q an
+           eigenblock is stored.  SQRT( QPTR(i+1) - QPTR(i) ) indicates
+           the size of the block.
 
-		if (*wantz) {
+    Z      (output) DOUBLE PRECISION array, dimension (N)
+           On output this vector contains the updating vector (the last
+           row of the first sub-eigenvector matrix and the first row of
+           the second sub-eigenvector matrix).
 
-/*                 Accumulate transformations in the matrix Z */
+    ZTEMP  (workspace) DOUBLE PRECISION array, dimension (N)
 
-		    i__3 = *ihiz;
-		    for (j = *iloz; j <= i__3; ++j) {
-			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
-				z_dim1] + v3 * z__[j + (k + 2) * z_dim1];
-			z__[j + k * z_dim1] -= sum * t1;
-			z__[j + (k + 1) * z_dim1] -= sum * t2;
-			z__[j + (k + 2) * z_dim1] -= sum * t3;
-/* L80: */
-		    }
-		}
-	    } else if (nr == 2) {
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
 
-/*
-                Apply G from the left to transform the rows of the matrix
-                in columns K to I2.
-*/
+    Further Details
+    ===============
 
-		i__3 = i2;
-		for (j = k; j <= i__3; ++j) {
-		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1];
-		    h__[k + j * h_dim1] -= sum * t1;
-		    h__[k + 1 + j * h_dim1] -= sum * t2;
-/* L90: */
-		}
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
 
-/*
-                Apply G from the right to transform the columns of the
-                matrix in rows I1 to min(K+3,I).
+    =====================================================================
+
+
+       Test the input parameters.
 */
 
-		i__3 = i__;
-		for (j = i1; j <= i__3; ++j) {
-		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
-			    ;
-		    h__[j + k * h_dim1] -= sum * t1;
-		    h__[j + (k + 1) * h_dim1] -= sum * t2;
-/* L100: */
-		}
+    /* Parameter adjustments */
+    --ztemp;
+    --z__;
+    --qptr;
+    --q;
+    givnum -= 3;
+    givcol -= 3;
+    --givptr;
+    --perm;
+    --prmptr;
 
-		if (*wantz) {
+    /* Function Body */
+    *info = 0;
 
-/*                 Accumulate transformations in the matrix Z */
+    if (*n < 0) {
+	*info = -1;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAEDA", &i__1);
+	return 0;
+    }
 
-		    i__3 = *ihiz;
-		    for (j = *iloz; j <= i__3; ++j) {
-			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
-				z_dim1];
-			z__[j + k * z_dim1] -= sum * t1;
-			z__[j + (k + 1) * z_dim1] -= sum * t2;
-/* L110: */
-		    }
-		}
-	    }
-/* L120: */
-	}
+/*     Quick return if possible */
 
-/* L130: */
+    if (*n == 0) {
+	return 0;
     }
 
-/*     Failure to converge in remaining number of iterations */
+/*     Determine location of first number in second half. */
 
-    *info = i__;
-    return 0;
+    mid = *n / 2 + 1;
 
-L140:
+/*     Gather last/first rows of appropriate eigenblocks into center of Z */
+
+    ptr = 1;
+
+/*
+       Determine location of lowest level subproblem in the full storage
+       scheme
+*/
 
-    if (l == i__) {
+    i__1 = *curlvl - 1;
+    curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
 
-/*        H(I,I-1) is negligible: one eigenvalue has converged. */
+/*
+       Determine size of these matrices.  We add HALF to the value of
+       the SQRT in case the machine underestimates one of these square
+       roots.
+*/
 
-	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.f;
-    } else if (l == i__ - 1) {
+    bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5);
+    bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])) +
+	    .5);
+    i__1 = mid - bsiz1 - 1;
+    for (k = 1; k <= i__1; ++k) {
+	z__[k] = 0.;
+/* L10: */
+    }
+    dcopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], &
+	    c__1);
+    dcopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
+    i__1 = *n;
+    for (k = mid + bsiz2; k <= i__1; ++k) {
+	z__[k] = 0.;
+/* L20: */
+    }
 
 /*
-          H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
-
-          Transform the 2-by-2 submatrix to standard Schur form,
-          and compute and store the eigenvalues.
+       Loop thru remaining levels 1 -> CURLVL applying the Givens
+       rotations and permutation and then multiplying the center matrices
+       against the current Z.
 */
 
-	slanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ *
-		h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ *
-		h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs,
-		&sn);
-
-	if (*wantt) {
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (k = 1; k <= i__1; ++k) {
+	i__2 = *curlvl - k;
+	i__3 = *curlvl - k - 1;
+	curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
+		1;
+	psiz1 = prmptr[curr + 1] - prmptr[curr];
+	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
+	zptr1 = mid - psiz1;
 
-/*           Apply the transformation to the rest of H. */
+/*       Apply Givens at CURR and CURR+1 */
 
-	    if (i2 > i__) {
-		i__1 = i2 - i__;
-		srot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[
-			i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn);
-	    }
-	    i__1 = i__ - i1 - 1;
-	    srot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ *
-		     h_dim1], &c__1, &cs, &sn);
+	i__2 = givptr[curr + 1] - 1;
+	for (i__ = givptr[curr]; i__ <= i__2; ++i__) {
+	    drot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, &
+		    z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[(
+		    i__ << 1) + 1], &givnum[(i__ << 1) + 2]);
+/* L30: */
 	}
-	if (*wantz) {
-
-/*           Apply the transformation to Z. */
-
-	    srot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz +
-		    i__ * z_dim1], &c__1, &cs, &sn);
+	i__2 = givptr[curr + 2] - 1;
+	for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) {
+	    drot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[
+		    mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ <<
+		    1) + 1], &givnum[(i__ << 1) + 2]);
+/* L40: */
+	}
+	psiz1 = prmptr[curr + 1] - prmptr[curr];
+	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
+	i__2 = psiz1 - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
+/* L50: */
+	}
+	i__2 = psiz2 - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] -
+		    1];
+/* L60: */
 	}
-    }
 
 /*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with new value of I.
+          Multiply Blocks at CURR and CURR+1
+
+          Determine size of these matrices.  We add HALF to the value of
+          the SQRT in case the machine underestimates one of these
+          square roots.
 */
 
-    itn -= its;
-    i__ = l - 1;
-    goto L10;
+	bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) +
+		.5);
+	bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])
+		) + .5);
+	if (bsiz1 > 0) {
+	    dgemv_("T", &bsiz1, &bsiz1, &c_b15, &q[qptr[curr]], &bsiz1, &
+		    ztemp[1], &c__1, &c_b29, &z__[zptr1], &c__1);
+	}
+	i__2 = psiz1 - bsiz1;
+	dcopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
+	if (bsiz2 > 0) {
+	    dgemv_("T", &bsiz2, &bsiz2, &c_b15, &q[qptr[curr + 1]], &bsiz2, &
+		    ztemp[psiz1 + 1], &c__1, &c_b29, &z__[mid], &c__1);
+	}
+	i__2 = psiz2 - bsiz2;
+	dcopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], &
+		c__1);
+
+	i__2 = *tlvls - k;
+	ptr += pow_ii(&c__2, &i__2);
+/* L70: */
+    }
 
-L150:
     return 0;
 
-/*     End of SLAHQR */
+/*     End of DLAEDA */
 
-} /* slahqr_ */
+} /* dlaeda_ */
 
-/* Subroutine */ int slahrd_(integer *n, integer *k, integer *nb, real *a,
-	integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy)
+/* Subroutine */ int dlaev2_(doublereal *a, doublereal *b, doublereal *c__,
+	doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
-	    i__3;
-    real r__1;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
 
     /* Local variables */
-    static integer i__;
-    static real ei;
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
-	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
-	    real *, integer *, real *, real *, integer *), scopy_(
-	    integer *, real *, integer *, real *, integer *), saxpy_(integer *
-	    , real *, real *, integer *, real *, integer *), strmv_(char *,
-	    char *, char *, integer *, real *, integer *, real *, integer *), slarfg_(integer *, real *, real *,
-	    integer *, real *);
+    static doublereal ab, df, cs, ct, tb, sm, tn, rt, adf, acs;
+    static integer sgn1, sgn2;
+    static doublereal acmn, acmx;
 
 
 /*
     -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+       October 31, 1992
 
 
     Purpose
     =======
 
-    SLAHRD reduces the first NB columns of a real general n-by-(n-k+1)
-    matrix A so that elements below the k-th subdiagonal are zero. The
-    reduction is performed by an orthogonal similarity transformation
-    Q' * A * Q. The routine returns the matrices V and T which determine
-    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+    DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
+       [  A   B  ]
+       [  B   C  ].
+    On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
+    eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
+    eigenvector for RT1, giving the decomposition
 
-    This is an auxiliary routine called by SGEHRD.
+       [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
+       [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
 
     Arguments
     =========
 
-    N       (input) INTEGER
-            The order of the matrix A.
-
-    K       (input) INTEGER
-            The offset for the reduction. Elements below the k-th
-            subdiagonal in the first NB columns are reduced to zero.
-
-    NB      (input) INTEGER
-            The number of columns to be reduced.
-
-    A       (input/output) REAL array, dimension (LDA,N-K+1)
-            On entry, the n-by-(n-k+1) general matrix A.
-            On exit, the elements on and above the k-th subdiagonal in
-            the first NB columns are overwritten with the corresponding
-            elements of the reduced matrix; the elements below the k-th
-            subdiagonal, with the array TAU, represent the matrix Q as a
-            product of elementary reflectors. The other columns of A are
-            unchanged. See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+    A       (input) DOUBLE PRECISION
+            The (1,1) element of the 2-by-2 matrix.
 
-    TAU     (output) REAL array, dimension (NB)
-            The scalar factors of the elementary reflectors. See Further
-            Details.
+    B       (input) DOUBLE PRECISION
+            The (1,2) element and the conjugate of the (2,1) element of
+            the 2-by-2 matrix.
 
-    T       (output) REAL array, dimension (LDT,NB)
-            The upper triangular matrix T.
+    C       (input) DOUBLE PRECISION
+            The (2,2) element of the 2-by-2 matrix.
 
-    LDT     (input) INTEGER
-            The leading dimension of the array T.  LDT >= NB.
+    RT1     (output) DOUBLE PRECISION
+            The eigenvalue of larger absolute value.
 
-    Y       (output) REAL array, dimension (LDY,NB)
-            The n-by-nb matrix Y.
+    RT2     (output) DOUBLE PRECISION
+            The eigenvalue of smaller absolute value.
 
-    LDY     (input) INTEGER
-            The leading dimension of the array Y. LDY >= N.
+    CS1     (output) DOUBLE PRECISION
+    SN1     (output) DOUBLE PRECISION
+            The vector (CS1, SN1) is a unit right eigenvector for RT1.
 
     Further Details
     ===============
 
-    The matrix Q is represented as a product of nb elementary reflectors
-
-       Q = H(1) H(2) . . . H(nb).
-
-    Each H(i) has the form
-
-       H(i) = I - tau * v * v'
-
-    where tau is a real scalar, and v is a real vector with
-    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
-    A(i+k+1:n,i), and tau in TAU(i).
-
-    The elements of the vectors v together form the (n-k+1)-by-nb matrix
-    V which is needed, with T and Y, to apply the transformation to the
-    unreduced part of the matrix, using an update of the form:
-    A := (I - V*T*V') * (A - Y*V').
+    RT1 is accurate to a few ulps barring over/underflow.
 
-    The contents of A on exit are illustrated by the following example
-    with n = 7, k = 3 and nb = 2:
+    RT2 may be inaccurate if there is massive cancellation in the
+    determinant A*C-B*B; higher precision or correctly rounded or
+    correctly truncated arithmetic would be needed to compute RT2
+    accurately in all cases.
 
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
-       ( h   h   a   a   a )
-       ( v1  h   a   a   a )
-       ( v1  v2  a   a   a )
-       ( v1  v2  a   a   a )
+    CS1 and SN1 are accurate to a few ulps barring over/underflow.
 
-    where a denotes an element of the original matrix A, h denotes a
-    modified element of the upper Hessenberg matrix H, and vi denotes an
-    element of the vector defining H(i).
+    Overflow is possible only if RT1 is within a factor of 5 of overflow.
+    Underflow is harmless if the input data is 0 or exceeds
+       underflow_threshold / macheps.
 
-    =====================================================================
+   =====================================================================
 
 
-       Quick return if possible
+       Compute the eigenvalues
 */
 
-    /* Parameter adjustments */
-    --tau;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-    y_dim1 = *ldy;
-    y_offset = 1 + y_dim1;
-    y -= y_offset;
-
-    /* Function Body */
-    if (*n <= 1) {
-	return 0;
+    sm = *a + *c__;
+    df = *a - *c__;
+    adf = abs(df);
+    tb = *b + *b;
+    ab = abs(tb);
+    if (abs(*a) > abs(*c__)) {
+	acmx = *a;
+	acmn = *c__;
+    } else {
+	acmx = *c__;
+	acmn = *a;
     }
+    if (adf > ab) {
+/* Computing 2nd power */
+	d__1 = ab / adf;
+	rt = adf * sqrt(d__1 * d__1 + 1.);
+    } else if (adf < ab) {
+/* Computing 2nd power */
+	d__1 = adf / ab;
+	rt = ab * sqrt(d__1 * d__1 + 1.);
+    } else {
 
-    i__1 = *nb;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (i__ > 1) {
-
-/*
-             Update A(1:n,i)
-
-             Compute i-th column of A - Y * V'
-*/
+/*        Includes case AB=ADF=0 */
 
-	    i__2 = i__ - 1;
-	    sgemv_("No transpose", n, &i__2, &c_b1290, &y[y_offset], ldy, &a[*
-		    k + i__ - 1 + a_dim1], lda, &c_b1011, &a[i__ * a_dim1 + 1]
-		    , &c__1);
+	rt = ab * sqrt(2.);
+    }
+    if (sm < 0.) {
+	*rt1 = (sm - rt) * .5;
+	sgn1 = -1;
 
 /*
-             Apply I - V * T' * V' to this column (call it b) from the
-             left, using the last column of T as workspace
-
-             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
-                      ( V2 )             ( b2 )
-
-             where V1 is unit lower triangular
-
-             w := V1' * b1
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
 */
 
-	    i__2 = i__ - 1;
-	    scopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
-		    1], &c__1);
-	    i__2 = i__ - 1;
-	    strmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1],
-		    lda, &t[*nb * t_dim1 + 1], &c__1);
-
-/*           w := w + V2'*b2 */
-
-	    i__2 = *n - *k - i__ + 1;
-	    i__3 = i__ - 1;
-	    sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[*k + i__ + a_dim1],
-		     lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b1011, &t[*
-		    nb * t_dim1 + 1], &c__1);
-
-/*           w := T'*w */
-
-	    i__2 = i__ - 1;
-	    strmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt,
-		     &t[*nb * t_dim1 + 1], &c__1);
-
-/*           b2 := b2 - V2*w */
-
-	    i__2 = *n - *k - i__ + 1;
-	    i__3 = i__ - 1;
-	    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[*k + i__ +
-		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b1011, &a[*
-		    k + i__ + i__ * a_dim1], &c__1);
-
-/*           b1 := b1 - V1*w */
-
-	    i__2 = i__ - 1;
-	    strmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
-		    , lda, &t[*nb * t_dim1 + 1], &c__1);
-	    i__2 = i__ - 1;
-	    saxpy_(&i__2, &c_b1290, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 +
-		    i__ * a_dim1], &c__1);
-
-	    a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei;
-	}
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else if (sm > 0.) {
+	*rt1 = (sm + rt) * .5;
+	sgn1 = 1;
 
 /*
-          Generate the elementary reflector H(i) to annihilate
-          A(k+i+1:n,i)
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
 */
 
-	i__2 = *n - *k - i__ + 1;
-/* Computing MIN */
-	i__3 = *k + i__ + 1;
-	slarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ *
-		a_dim1], &c__1, &tau[i__]);
-	ei = a[*k + i__ + i__ * a_dim1];
-	a[*k + i__ + i__ * a_dim1] = 1.f;
-
-/*        Compute  Y(1:n,i) */
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else {
 
-	i__2 = *n - *k - i__ + 1;
-	sgemv_("No transpose", n, &i__2, &c_b1011, &a[(i__ + 1) * a_dim1 + 1],
-		 lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b320, &y[i__ *
-		y_dim1 + 1], &c__1);
-	i__2 = *n - *k - i__ + 1;
-	i__3 = i__ - 1;
-	sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[*k + i__ + a_dim1],
-		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b320, &t[i__ *
-		t_dim1 + 1], &c__1);
-	i__2 = i__ - 1;
-	sgemv_("No transpose", n, &i__2, &c_b1290, &y[y_offset], ldy, &t[i__ *
-		 t_dim1 + 1], &c__1, &c_b1011, &y[i__ * y_dim1 + 1], &c__1);
-	sscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+/*        Includes case RT1 = RT2 = 0 */
 
-/*        Compute T(1:i,i) */
+	*rt1 = rt * .5;
+	*rt2 = rt * -.5;
+	sgn1 = 1;
+    }
 
-	i__2 = i__ - 1;
-	r__1 = -tau[i__];
-	sscal_(&i__2, &r__1, &t[i__ * t_dim1 + 1], &c__1);
-	i__2 = i__ - 1;
-	strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
-		&t[i__ * t_dim1 + 1], &c__1)
-		;
-	t[i__ + i__ * t_dim1] = tau[i__];
+/*     Compute the eigenvector */
 
-/* L10: */
+    if (df >= 0.) {
+	cs = df + rt;
+	sgn2 = 1;
+    } else {
+	cs = df - rt;
+	sgn2 = -1;
+    }
+    acs = abs(cs);
+    if (acs > ab) {
+	ct = -tb / cs;
+	*sn1 = 1. / sqrt(ct * ct + 1.);
+	*cs1 = ct * *sn1;
+    } else {
+	if (ab == 0.) {
+	    *cs1 = 1.;
+	    *sn1 = 0.;
+	} else {
+	    tn = -cs / tb;
+	    *cs1 = 1. / sqrt(tn * tn + 1.);
+	    *sn1 = tn * *cs1;
+	}
+    }
+    if (sgn1 == sgn2) {
+	tn = *cs1;
+	*cs1 = -(*sn1);
+	*sn1 = tn;
     }
-    a[*k + *nb + *nb * a_dim1] = ei;
-
     return 0;
 
-/*     End of SLAHRD */
+/*     End of DLAEV2 */
 
-} /* slahrd_ */
+} /* dlaev2_ */
 
-/* Subroutine */ int slaln2_(logical *ltrans, integer *na, integer *nw, real *
-	smin, real *ca, real *a, integer *lda, real *d1, real *d2, real *b,
-	integer *ldb, real *wr, real *wi, real *x, integer *ldx, real *scale,
-	real *xnorm, integer *info)
+/* Subroutine */ int dlahqr_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal
+	*wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__,
+	integer *ldz, integer *info)
 {
-    /* Initialized data */
-
-    static logical cswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
-    static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
-    static integer ipivot[16]	/* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2,
-	    4,3,2,1 };
-
     /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset;
-    real r__1, r__2, r__3, r__4, r__5, r__6;
-    static real equiv_0[4], equiv_1[4];
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
 
     /* Local variables */
-    static integer j;
-#define ci (equiv_0)
-#define cr (equiv_1)
-    static real bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, cr21,
-	    cr22, li21, csi, ui11, lr21, ui12, ui22;
-#define civ (equiv_0)
-    static real csr, ur11, ur12, ur22;
-#define crv (equiv_1)
-    static real bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs;
-    static integer icmax;
-    static real bnorm, cnorm, smini;
-    extern doublereal slamch_(char *);
-    static real bignum;
-    extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real *
-	    , real *);
-    static real smlnum;
+    static integer i__, j, k, l, m;
+    static doublereal s, v[3];
+    static integer i1, i2;
+    static doublereal t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22,
+	    h33, h44;
+    static integer nh;
+    static doublereal cs;
+    static integer nr;
+    static doublereal sn;
+    static integer nz;
+    static doublereal ave, h33s, h44s;
+    static integer itn, its;
+    static doublereal ulp, sum, tst1, h43h34, disc, unfl, ovfl;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static doublereal work[1];
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlanv2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *), dlabad_(
+	    doublereal *, doublereal *);
+
+    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
+	     integer *, doublereal *);
+    extern doublereal dlanhs_(char *, integer *, doublereal *, integer *,
+	    doublereal *);
+    static doublereal smlnum;
 
 
 /*
     -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+       June 30, 1999
 
 
     Purpose
     =======
 
-    SLALN2 solves a system of the form  (ca A - w D ) X = s B
-    or (ca A' - w D) X = s B   with possible scaling ("s") and
-    perturbation of A.  (A' means A-transpose.)
-
-    A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
-    real diagonal matrix, w is a real or complex value, and X and B are
-    NA x 1 matrices -- real if w is real, complex if w is complex.  NA
-    may be 1 or 2.
-
-    If w is complex, X and B are represented as NA x 2 matrices,
-    the first column of each being the real part and the second
-    being the imaginary part.
-
-    "s" is a scaling factor (.LE. 1), computed by SLALN2, which is
-    so chosen that X can be computed without overflow.  X is further
-    scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
-    than overflow.
-
-    If both singular values of (ca A - w D) are less than SMIN,
-    SMIN*identity will be used instead of (ca A - w D).  If only one
-    singular value is less than SMIN, one element of (ca A - w D) will be
-    perturbed enough to make the smallest singular value roughly SMIN.
-    If both singular values are at least SMIN, (ca A - w D) will not be
-    perturbed.  In any case, the perturbation will be at most some small
-    multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
-    are computed by infinity-norm approximations, and thus will only be
-    correct to a factor of 2 or so.
-
-    Note: all input quantities are assumed to be smaller than overflow
-    by a reasonable factor.  (See BIGNUM.)
+    DLAHQR is an auxiliary routine called by DHSEQR to update the
+    eigenvalues and Schur decomposition already computed by DHSEQR, by
+    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
 
     Arguments
-    ==========
-
-    LTRANS  (input) LOGICAL
-            =.TRUE.:  A-transpose will be used.
-            =.FALSE.: A will be used (not transposed.)
-
-    NA      (input) INTEGER
-            The size of the matrix A.  It may (only) be 1 or 2.
-
-    NW      (input) INTEGER
-            1 if "w" is real, 2 if "w" is complex.  It may only be 1
-            or 2.
-
-    SMIN    (input) REAL
-            The desired lower bound on the singular values of A.  This
-            should be a safe distance away from underflow or overflow,
-            say, between (underflow/machine precision) and  (machine
-            precision * overflow ).  (See BIGNUM and ULP.)
-
-    CA      (input) REAL
-            The coefficient c, which A is multiplied by.
+    =========
 
-    A       (input) REAL array, dimension (LDA,NA)
-            The NA x NA matrix A.
+    WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
 
-    LDA     (input) INTEGER
-            The leading dimension of A.  It must be at least NA.
+    WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
 
-    D1      (input) REAL
-            The 1,1 element in the diagonal matrix D.
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
 
-    D2      (input) REAL
-            The 2,2 element in the diagonal matrix D.  Not used if NW=1.
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper quasi-triangular in
+            rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless
+            ILO = 1). DLAHQR works primarily with the Hessenberg
+            submatrix in rows and columns ILO to IHI, but applies
+            transformations to all of H if WANTT is .TRUE..
+            1 <= ILO <= max(1,IHI); IHI <= N.
 
-    B       (input) REAL array, dimension (LDB,NW)
-            The NA x NW matrix B (right-hand side).  If NW=2 ("w" is
-            complex), column 1 contains the real part of B and column 2
-            contains the imaginary part.
+    H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if WANTT is .TRUE., H is upper quasi-triangular in
+            rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in
+            standard form. If WANTT is .FALSE., the contents of H are
+            unspecified on exit.
 
-    LDB     (input) INTEGER
-            The leading dimension of B.  It must be at least NA.
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
 
-    WR      (input) REAL
-            The real part of the scalar "w".
+    WR      (output) DOUBLE PRECISION array, dimension (N)
+    WI      (output) DOUBLE PRECISION array, dimension (N)
+            The real and imaginary parts, respectively, of the computed
+            eigenvalues ILO to IHI are stored in the corresponding
+            elements of WR and WI. If two eigenvalues are computed as a
+            complex conjugate pair, they are stored in consecutive
+            elements of WR and WI, say the i-th and (i+1)th, with
+            WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
+            eigenvalues are stored in the same order as on the diagonal
+            of the Schur form returned in H, with WR(i) = H(i,i), and, if
+            H(i:i+1,i:i+1) is a 2-by-2 diagonal block,
+            WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i).
 
-    WI      (input) REAL
-            The imaginary part of the scalar "w".  Not used if NW=1.
+    ILOZ    (input) INTEGER
+    IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE..
+            1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
 
-    X       (output) REAL array, dimension (LDX,NW)
-            The NA x NW matrix X (unknowns), as computed by SLALN2.
-            If NW=2 ("w" is complex), on exit, column 1 will contain
-            the real part of X and column 2 will contain the imaginary
-            part.
+    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+            If WANTZ is .TRUE., on entry Z must contain the current
+            matrix Z of transformations accumulated by DHSEQR, and on
+            exit Z has been updated; transformations are applied only to
+            the submatrix Z(ILOZ:IHIZ,ILO:IHI).
+            If WANTZ is .FALSE., Z is not referenced.
 
-    LDX     (input) INTEGER
-            The leading dimension of X.  It must be at least NA.
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z. LDZ >= max(1,N).
 
-    SCALE   (output) REAL
-            The scale factor that B must be multiplied by to insure
-            that overflow does not occur when computing X.  Thus,
-            (ca A - w D) X  will be SCALE*B, not B (ignoring
-            perturbations of A.)  It will be at most 1.
+    INFO    (output) INTEGER
+            = 0: successful exit
+            > 0: DLAHQR failed to compute all the eigenvalues ILO to IHI
+                 in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
+                 elements i+1:ihi of WR and WI contain those eigenvalues
+                 which have been successfully computed.
 
-    XNORM   (output) REAL
-            The infinity-norm of X, when X is regarded as an NA x NW
-            real matrix.
+    Further Details
+    ===============
 
-    INFO    (output) INTEGER
-            An error flag.  It will be set to zero if no error occurs,
-            a negative number if an argument is in error, or a positive
-            number if  ca A - w D  had to be perturbed.
-            The possible values are:
-            = 0: No error occurred, and (ca A - w D) did not have to be
-                   perturbed.
-            = 1: (ca A - w D) had to be perturbed to make its smallest
-                 (or only) singular value greater than SMIN.
-            NOTE: In the interests of speed, this routine does not
-                  check the inputs for errors.
+    2-96 Based on modifications by
+       David Day, Sandia National Laboratory, USA
 
-   =====================================================================
+    =====================================================================
 */
 
+
     /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    x_dim1 = *ldx;
-    x_offset = 1 + x_dim1;
-    x -= x_offset;
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
 
     /* Function Body */
-
-/*     Compute BIGNUM */
-
-    smlnum = 2.f * slamch_("Safe minimum");
-    bignum = 1.f / smlnum;
-    smini = dmax(*smin,smlnum);
-
-/*     Don't check for input errors */
-
     *info = 0;
 
-/*     Standard Initializations */
-
-    *scale = 1.f;
-
-    if (*na == 1) {
+/*     Quick return if possible */
 
-/*        1 x 1  (i.e., scalar) system   C X = B */
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
+	wi[*ilo] = 0.;
+	return 0;
+    }
 
-	if (*nw == 1) {
+    nh = *ihi - *ilo + 1;
+    nz = *ihiz - *iloz + 1;
 
 /*
-             Real 1x1 system.
-
-             C = ca A - w D
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
 */
 
-	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
-	    cnorm = dabs(csr);
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
+    smlnum = unfl * (nh / ulp);
 
-/*           If | C | < SMINI, use C = SMINI */
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are set inside the main loop.
+*/
 
-	    if (cnorm < smini) {
-		csr = smini;
-		cnorm = smini;
-		*info = 1;
-	    }
+    if (*wantt) {
+	i1 = 1;
+	i2 = *n;
+    }
 
-/*           Check scaling for  X = B / C */
+/*     ITN is the total number of QR iterations allowed. */
 
-	    bnorm = (r__1 = b[b_dim1 + 1], dabs(r__1));
-	    if (cnorm < 1.f && bnorm > 1.f) {
-		if (bnorm > bignum * cnorm) {
-		    *scale = 1.f / bnorm;
-		}
-	    }
+    itn = nh * 30;
 
-/*           Compute X */
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of 1 or 2. Each iteration of the loop works
+       with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
 
-	    x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr;
-	    *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1));
-	} else {
+    i__ = *ihi;
+L10:
+    l = *ilo;
+    if (i__ < *ilo) {
+	goto L150;
+    }
 
 /*
-             Complex 1x1 system (w is complex)
-
-             C = ca A - w D
+       Perform QR iterations on rows and columns ILO to I until a
+       submatrix of order 1 or 2 splits off at the bottom because a
+       subdiagonal element has become negligible.
 */
 
-	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
-	    csi = -(*wi) * *d1;
-	    cnorm = dabs(csr) + dabs(csi);
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
 
-/*           If | C | < SMINI, use C = SMINI */
+/*        Look for a single small subdiagonal element. */
 
-	    if (cnorm < smini) {
-		csr = smini;
-		csi = 0.f;
-		cnorm = smini;
-		*info = 1;
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
+		     h__[k + k * h_dim1], abs(d__2));
+	    if (tst1 == 0.) {
+		i__3 = i__ - l + 1;
+		tst1 = dlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work);
 	    }
-
-/*           Check scaling for  X = B / C */
-
-	    bnorm = (r__1 = b[b_dim1 + 1], dabs(r__1)) + (r__2 = b[((b_dim1)
-		    << (1)) + 1], dabs(r__2));
-	    if (cnorm < 1.f && bnorm > 1.f) {
-		if (bnorm > bignum * cnorm) {
-		    *scale = 1.f / bnorm;
-		}
+/* Computing MAX */
+	    d__2 = ulp * tst1;
+	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2,
+		    smlnum)) {
+		goto L30;
 	    }
+/* L20: */
+	}
+L30:
+	l = k;
+	if (l > *ilo) {
 
-/*           Compute X */
+/*           H(L,L-1) is negligible */
 
-	    r__1 = *scale * b[b_dim1 + 1];
-	    r__2 = *scale * b[((b_dim1) << (1)) + 1];
-	    sladiv_(&r__1, &r__2, &csr, &csi, &x[x_dim1 + 1], &x[((x_dim1) <<
-		    (1)) + 1]);
-	    *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1)) + (r__2 = x[((x_dim1)
-		    << (1)) + 1], dabs(r__2));
+	    h__[l + (l - 1) * h_dim1] = 0.;
 	}
 
-    } else {
+/*        Exit from loop if a submatrix of order 1 or 2 has split off. */
 
-/*
-          2x2 System
+	if (l >= i__ - 1) {
+	    goto L140;
+	}
 
-          Compute the real part of  C = ca A - w D  (or  ca A' - w D )
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
 */
 
-	cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1;
-	cr[3] = *ca * a[((a_dim1) << (1)) + 2] - *wr * *d2;
-	if (*ltrans) {
-	    cr[2] = *ca * a[a_dim1 + 2];
-	    cr[1] = *ca * a[((a_dim1) << (1)) + 1];
-	} else {
-	    cr[1] = *ca * a[a_dim1 + 2];
-	    cr[2] = *ca * a[((a_dim1) << (1)) + 1];
+	if (! (*wantt)) {
+	    i1 = l;
+	    i2 = i__;
 	}
 
-	if (*nw == 1) {
+	if (its == 10 || its == 20) {
 
-/*
-             Real 2x2 system  (w is real)
+/*           Exceptional shift. */
 
-             Find the largest element in C
-*/
+	    s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 =
+		    h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2));
+	    h44 = s * .75 + h__[i__ + i__ * h_dim1];
+	    h33 = h44;
+	    h43h34 = s * -.4375 * s;
+	} else {
 
-	    cmax = 0.f;
-	    icmax = 0;
+/*
+             Prepare to use Francis' double shift
+             (i.e. 2nd degree generalized Rayleigh quotient)
+*/
 
-	    for (j = 1; j <= 4; ++j) {
-		if ((r__1 = crv[j - 1], dabs(r__1)) > cmax) {
-		    cmax = (r__1 = crv[j - 1], dabs(r__1));
-		    icmax = j;
-		}
-/* L10: */
-	    }
+	    h44 = h__[i__ + i__ * h_dim1];
+	    h33 = h__[i__ - 1 + (i__ - 1) * h_dim1];
+	    h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ *
+		    h_dim1];
+	    s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) *
+		    h_dim1];
+	    disc = (h33 - h44) * .5;
+	    disc = disc * disc + h43h34;
+	    if (disc > 0.) {
 
-/*           If norm(C) < SMINI, use SMINI*identity. */
+/*              Real roots: use Wilkinson's shift twice */
 
-	    if (cmax < smini) {
-/* Computing MAX */
-		r__3 = (r__1 = b[b_dim1 + 1], dabs(r__1)), r__4 = (r__2 = b[
-			b_dim1 + 2], dabs(r__2));
-		bnorm = dmax(r__3,r__4);
-		if (smini < 1.f && bnorm > 1.f) {
-		    if (bnorm > bignum * smini) {
-			*scale = 1.f / bnorm;
-		    }
+		disc = sqrt(disc);
+		ave = (h33 + h44) * .5;
+		if (abs(h33) - abs(h44) > 0.) {
+		    h33 = h33 * h44 - h43h34;
+		    h44 = h33 / (d_sign(&disc, &ave) + ave);
+		} else {
+		    h44 = d_sign(&disc, &ave) + ave;
 		}
-		temp = *scale / smini;
-		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
-		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
-		*xnorm = temp * bnorm;
-		*info = 1;
-		return 0;
+		h33 = h44;
+		h43h34 = 0.;
 	    }
+	}
 
-/*           Gaussian elimination with complete pivoting. */
-
-	    ur11 = crv[icmax - 1];
-	    cr21 = crv[ipivot[((icmax) << (2)) - 3] - 1];
-	    ur12 = crv[ipivot[((icmax) << (2)) - 2] - 1];
-	    cr22 = crv[ipivot[((icmax) << (2)) - 1] - 1];
-	    ur11r = 1.f / ur11;
-	    lr21 = ur11r * cr21;
-	    ur22 = cr22 - ur12 * lr21;
+/*        Look for two consecutive small subdiagonal elements. */
 
-/*           If smaller pivot < SMINI, use SMINI */
+	i__2 = l;
+	for (m = i__ - 2; m >= i__2; --m) {
+/*
+             Determine the effect of starting the double-shift QR
+             iteration at row M, and see if this would make H(M,M-1)
+             negligible.
+*/
 
-	    if (dabs(ur22) < smini) {
-		ur22 = smini;
-		*info = 1;
-	    }
-	    if (rswap[icmax - 1]) {
-		br1 = b[b_dim1 + 2];
-		br2 = b[b_dim1 + 1];
-	    } else {
-		br1 = b[b_dim1 + 1];
-		br2 = b[b_dim1 + 2];
+	    h11 = h__[m + m * h_dim1];
+	    h22 = h__[m + 1 + (m + 1) * h_dim1];
+	    h21 = h__[m + 1 + m * h_dim1];
+	    h12 = h__[m + (m + 1) * h_dim1];
+	    h44s = h44 - h11;
+	    h33s = h33 - h11;
+	    v1 = (h33s * h44s - h43h34) / h21 + h12;
+	    v2 = h22 - h11 - h33s - h44s;
+	    v3 = h__[m + 2 + (m + 1) * h_dim1];
+	    s = abs(v1) + abs(v2) + abs(v3);
+	    v1 /= s;
+	    v2 /= s;
+	    v3 /= s;
+	    v[0] = v1;
+	    v[1] = v2;
+	    v[2] = v3;
+	    if (m == l) {
+		goto L50;
 	    }
-	    br2 -= lr21 * br1;
-/* Computing MAX */
-	    r__2 = (r__1 = br1 * (ur22 * ur11r), dabs(r__1)), r__3 = dabs(br2)
-		    ;
-	    bbnd = dmax(r__2,r__3);
-	    if (bbnd > 1.f && dabs(ur22) < 1.f) {
-		if (bbnd >= bignum * dabs(ur22)) {
-		    *scale = 1.f / bbnd;
-		}
+	    h00 = h__[m - 1 + (m - 1) * h_dim1];
+	    h10 = h__[m + (m - 1) * h_dim1];
+	    tst1 = abs(v1) * (abs(h00) + abs(h11) + abs(h22));
+	    if (abs(h10) * (abs(v2) + abs(v3)) <= ulp * tst1) {
+		goto L50;
 	    }
+/* L40: */
+	}
+L50:
 
-	    xr2 = br2 * *scale / ur22;
-	    xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12);
-	    if (cswap[icmax - 1]) {
-		x[x_dim1 + 1] = xr2;
-		x[x_dim1 + 2] = xr1;
-	    } else {
-		x[x_dim1 + 1] = xr1;
-		x[x_dim1 + 2] = xr2;
-	    }
-/* Computing MAX */
-	    r__1 = dabs(xr1), r__2 = dabs(xr2);
-	    *xnorm = dmax(r__1,r__2);
+/*        Double-shift QR step */
 
-/*           Further scaling if  norm(A) norm(X) > overflow */
+	i__2 = i__ - 1;
+	for (k = m; k <= i__2; ++k) {
 
-	    if (*xnorm > 1.f && cmax > 1.f) {
-		if (*xnorm > bignum / cmax) {
-		    temp = cmax / bignum;
-		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
-		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
-		    *xnorm = temp * *xnorm;
-		    *scale = temp * *scale;
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix. NR is the order of G.
+
+   Computing MIN
+*/
+	    i__3 = 3, i__4 = i__ - k + 1;
+	    nr = min(i__3,i__4);
+	    if (k > m) {
+		dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    dlarfg_(&nr, v, &v[1], &c__1, &t1);
+	    if (k > m) {
+		h__[k + (k - 1) * h_dim1] = v[0];
+		h__[k + 1 + (k - 1) * h_dim1] = 0.;
+		if (k < i__ - 1) {
+		    h__[k + 2 + (k - 1) * h_dim1] = 0.;
 		}
+	    } else if (m > l) {
+		h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1];
 	    }
-	} else {
+	    v2 = v[1];
+	    t2 = t1 * v2;
+	    if (nr == 3) {
+		v3 = v[2];
+		t3 = t1 * v3;
 
 /*
-             Complex 2x2 system  (w is complex)
-
-             Find the largest element in C
+                Apply G from the left to transform the rows of the matrix
+                in columns K to I2.
 */
 
-	    ci[0] = -(*wi) * *d1;
-	    ci[1] = 0.f;
-	    ci[2] = 0.f;
-	    ci[3] = -(*wi) * *d2;
-	    cmax = 0.f;
-	    icmax = 0;
-
-	    for (j = 1; j <= 4; ++j) {
-		if ((r__1 = crv[j - 1], dabs(r__1)) + (r__2 = civ[j - 1],
-			dabs(r__2)) > cmax) {
-		    cmax = (r__1 = crv[j - 1], dabs(r__1)) + (r__2 = civ[j -
-			    1], dabs(r__2));
-		    icmax = j;
+		i__3 = i2;
+		for (j = k; j <= i__3; ++j) {
+		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]
+			    + v3 * h__[k + 2 + j * h_dim1];
+		    h__[k + j * h_dim1] -= sum * t1;
+		    h__[k + 1 + j * h_dim1] -= sum * t2;
+		    h__[k + 2 + j * h_dim1] -= sum * t3;
+/* L60: */
 		}
-/* L20: */
-	    }
 
-/*           If norm(C) < SMINI, use SMINI*identity. */
+/*
+                Apply G from the right to transform the columns of the
+                matrix in rows I1 to min(K+3,I).
 
-	    if (cmax < smini) {
-/* Computing MAX */
-		r__5 = (r__1 = b[b_dim1 + 1], dabs(r__1)) + (r__2 = b[((
-			b_dim1) << (1)) + 1], dabs(r__2)), r__6 = (r__3 = b[
-			b_dim1 + 2], dabs(r__3)) + (r__4 = b[((b_dim1) << (1))
-			 + 2], dabs(r__4));
-		bnorm = dmax(r__5,r__6);
-		if (smini < 1.f && bnorm > 1.f) {
-		    if (bnorm > bignum * smini) {
-			*scale = 1.f / bnorm;
-		    }
+   Computing MIN
+*/
+		i__4 = k + 3;
+		i__3 = min(i__4,i__);
+		for (j = i1; j <= i__3; ++j) {
+		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
+			     + v3 * h__[j + (k + 2) * h_dim1];
+		    h__[j + k * h_dim1] -= sum * t1;
+		    h__[j + (k + 1) * h_dim1] -= sum * t2;
+		    h__[j + (k + 2) * h_dim1] -= sum * t3;
+/* L70: */
 		}
-		temp = *scale / smini;
-		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
-		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
-		x[((x_dim1) << (1)) + 1] = temp * b[((b_dim1) << (1)) + 1];
-		x[((x_dim1) << (1)) + 2] = temp * b[((b_dim1) << (1)) + 2];
-		*xnorm = temp * bnorm;
-		*info = 1;
-		return 0;
-	    }
 
-/*           Gaussian elimination with complete pivoting. */
-
-	    ur11 = crv[icmax - 1];
-	    ui11 = civ[icmax - 1];
-	    cr21 = crv[ipivot[((icmax) << (2)) - 3] - 1];
-	    ci21 = civ[ipivot[((icmax) << (2)) - 3] - 1];
-	    ur12 = crv[ipivot[((icmax) << (2)) - 2] - 1];
-	    ui12 = civ[ipivot[((icmax) << (2)) - 2] - 1];
-	    cr22 = crv[ipivot[((icmax) << (2)) - 1] - 1];
-	    ci22 = civ[ipivot[((icmax) << (2)) - 1] - 1];
-	    if ((icmax == 1) || (icmax == 4)) {
+		if (*wantz) {
 
-/*              Code when off-diagonals of pivoted C are real */
+/*                 Accumulate transformations in the matrix Z */
 
-		if (dabs(ur11) > dabs(ui11)) {
-		    temp = ui11 / ur11;
-/* Computing 2nd power */
-		    r__1 = temp;
-		    ur11r = 1.f / (ur11 * (r__1 * r__1 + 1.f));
-		    ui11r = -temp * ur11r;
-		} else {
-		    temp = ur11 / ui11;
-/* Computing 2nd power */
-		    r__1 = temp;
-		    ui11r = -1.f / (ui11 * (r__1 * r__1 + 1.f));
-		    ur11r = -temp * ui11r;
+		    i__3 = *ihiz;
+		    for (j = *iloz; j <= i__3; ++j) {
+			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
+				z_dim1] + v3 * z__[j + (k + 2) * z_dim1];
+			z__[j + k * z_dim1] -= sum * t1;
+			z__[j + (k + 1) * z_dim1] -= sum * t2;
+			z__[j + (k + 2) * z_dim1] -= sum * t3;
+/* L80: */
+		    }
 		}
-		lr21 = cr21 * ur11r;
-		li21 = cr21 * ui11r;
-		ur12s = ur12 * ur11r;
-		ui12s = ur12 * ui11r;
-		ur22 = cr22 - ur12 * lr21;
-		ui22 = ci22 - ur12 * li21;
-	    } else {
+	    } else if (nr == 2) {
 
-/*              Code when diagonals of pivoted C are real */
+/*
+                Apply G from the left to transform the rows of the matrix
+                in columns K to I2.
+*/
 
-		ur11r = 1.f / ur11;
-		ui11r = 0.f;
-		lr21 = cr21 * ur11r;
-		li21 = ci21 * ur11r;
-		ur12s = ur12 * ur11r;
-		ui12s = ui12 * ur11r;
-		ur22 = cr22 - ur12 * lr21 + ui12 * li21;
-		ui22 = -ur12 * li21 - ui12 * lr21;
-	    }
-	    u22abs = dabs(ur22) + dabs(ui22);
+		i__3 = i2;
+		for (j = k; j <= i__3; ++j) {
+		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1];
+		    h__[k + j * h_dim1] -= sum * t1;
+		    h__[k + 1 + j * h_dim1] -= sum * t2;
+/* L90: */
+		}
 
-/*           If smaller pivot < SMINI, use SMINI */
+/*
+                Apply G from the right to transform the columns of the
+                matrix in rows I1 to min(K+3,I).
+*/
 
-	    if (u22abs < smini) {
-		ur22 = smini;
-		ui22 = 0.f;
-		*info = 1;
-	    }
-	    if (rswap[icmax - 1]) {
-		br2 = b[b_dim1 + 1];
-		br1 = b[b_dim1 + 2];
-		bi2 = b[((b_dim1) << (1)) + 1];
-		bi1 = b[((b_dim1) << (1)) + 2];
-	    } else {
-		br1 = b[b_dim1 + 1];
-		br2 = b[b_dim1 + 2];
-		bi1 = b[((b_dim1) << (1)) + 1];
-		bi2 = b[((b_dim1) << (1)) + 2];
-	    }
-	    br2 = br2 - lr21 * br1 + li21 * bi1;
-	    bi2 = bi2 - li21 * br1 - lr21 * bi1;
-/* Computing MAX */
-	    r__1 = (dabs(br1) + dabs(bi1)) * (u22abs * (dabs(ur11r) + dabs(
-		    ui11r))), r__2 = dabs(br2) + dabs(bi2);
-	    bbnd = dmax(r__1,r__2);
-	    if (bbnd > 1.f && u22abs < 1.f) {
-		if (bbnd >= bignum * u22abs) {
-		    *scale = 1.f / bbnd;
-		    br1 = *scale * br1;
-		    bi1 = *scale * bi1;
-		    br2 = *scale * br2;
-		    bi2 = *scale * bi2;
+		i__3 = i__;
+		for (j = i1; j <= i__3; ++j) {
+		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
+			    ;
+		    h__[j + k * h_dim1] -= sum * t1;
+		    h__[j + (k + 1) * h_dim1] -= sum * t2;
+/* L100: */
 		}
-	    }
 
-	    sladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2);
-	    xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2;
-	    xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2;
-	    if (cswap[icmax - 1]) {
-		x[x_dim1 + 1] = xr2;
-		x[x_dim1 + 2] = xr1;
-		x[((x_dim1) << (1)) + 1] = xi2;
-		x[((x_dim1) << (1)) + 2] = xi1;
-	    } else {
-		x[x_dim1 + 1] = xr1;
-		x[x_dim1 + 2] = xr2;
-		x[((x_dim1) << (1)) + 1] = xi1;
-		x[((x_dim1) << (1)) + 2] = xi2;
-	    }
-/* Computing MAX */
-	    r__1 = dabs(xr1) + dabs(xi1), r__2 = dabs(xr2) + dabs(xi2);
-	    *xnorm = dmax(r__1,r__2);
+		if (*wantz) {
 
-/*           Further scaling if  norm(A) norm(X) > overflow */
+/*                 Accumulate transformations in the matrix Z */
 
-	    if (*xnorm > 1.f && cmax > 1.f) {
-		if (*xnorm > bignum / cmax) {
-		    temp = cmax / bignum;
-		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
-		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
-		    x[((x_dim1) << (1)) + 1] = temp * x[((x_dim1) << (1)) + 1]
-			    ;
-		    x[((x_dim1) << (1)) + 2] = temp * x[((x_dim1) << (1)) + 2]
-			    ;
-		    *xnorm = temp * *xnorm;
-		    *scale = temp * *scale;
+		    i__3 = *ihiz;
+		    for (j = *iloz; j <= i__3; ++j) {
+			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
+				z_dim1];
+			z__[j + k * z_dim1] -= sum * t1;
+			z__[j + (k + 1) * z_dim1] -= sum * t2;
+/* L110: */
+		    }
 		}
 	    }
+/* L120: */
 	}
-    }
 
-    return 0;
-
-/*     End of SLALN2 */
+/* L130: */
+    }
 
-} /* slaln2_ */
+/*     Failure to converge in remaining number of iterations */
 
-#undef crv
-#undef civ
-#undef cr
-#undef ci
+    *info = i__;
+    return 0;
 
+L140:
 
-/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr,
-	integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx,
-	integer *ldbx, integer *perm, integer *givptr, integer *givcol,
-	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
-	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
-	work, integer *info)
-{
-    /* System generated locals */
-    integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset,
-	    difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1,
-	    poles_offset, i__1, i__2;
-    real r__1;
+    if (l == i__) {
 
-    /* Local variables */
-    static integer i__, j, m, n;
-    static real dj;
-    static integer nlp1;
-    static real temp;
-    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
-	    integer *, real *, real *);
-    extern doublereal snrm2_(integer *, real *, integer *);
-    static real diflj, difrj, dsigj;
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
-	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
-	    real *, integer *, real *, real *, integer *), scopy_(
-	    integer *, real *, integer *, real *, integer *);
-    extern doublereal slamc3_(real *, real *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static real dsigjp;
-    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *,
-	    real *, integer *);
+/*        H(I,I-1) is negligible: one eigenvalue has converged. */
 
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.;
+    } else if (l == i__ - 1) {
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       December 1, 1999
+          H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
 
+          Transform the 2-by-2 submatrix to standard Schur form,
+          and compute and store the eigenvalues.
+*/
 
-    Purpose
-    =======
+	dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ *
+		h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ *
+		h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs,
+		&sn);
 
-    SLALS0 applies back the multiplying factors of either the left or the
-    right singular vector matrix of a diagonal matrix appended by a row
-    to the right hand side matrix B in solving the least squares problem
-    using the divide-and-conquer SVD approach.
+	if (*wantt) {
 
-    For the left singular vector matrix, three types of orthogonal
-    matrices are involved:
+/*           Apply the transformation to the rest of H. */
 
-    (1L) Givens rotations: the number of such rotations is GIVPTR; the
-         pairs of columns/rows they were applied to are stored in GIVCOL;
-         and the C- and S-values of these rotations are stored in GIVNUM.
+	    if (i2 > i__) {
+		i__1 = i2 - i__;
+		drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[
+			i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn);
+	    }
+	    i__1 = i__ - i1 - 1;
+	    drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ *
+		     h_dim1], &c__1, &cs, &sn);
+	}
+	if (*wantz) {
 
-    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
-         row, and for J=2:N, PERM(J)-th row of B is to be moved to the
-         J-th row.
+/*           Apply the transformation to Z. */
 
-    (3L) The left singular vector matrix of the remaining matrix.
+	    drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz +
+		    i__ * z_dim1], &c__1, &cs, &sn);
+	}
+    }
 
-    For the right singular vector matrix, four types of orthogonal
-    matrices are involved:
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with new value of I.
+*/
 
-    (1R) The right singular vector matrix of the remaining matrix.
+    itn -= its;
+    i__ = l - 1;
+    goto L10;
 
-    (2R) If SQRE = 1, one extra Givens rotation to generate the right
-         null space.
+L150:
+    return 0;
 
-    (3R) The inverse transformation of (2L).
+/*     End of DLAHQR */
 
-    (4R) The inverse transformation of (1L).
+} /* dlahqr_ */
 
-    Arguments
-    =========
+/* Subroutine */ int dlahrd_(integer *n, integer *k, integer *nb, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *t, integer *ldt,
+	doublereal *y, integer *ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+    doublereal d__1;
 
-    ICOMPQ (input) INTEGER
-           Specifies whether singular vectors are to be computed in
-           factored form:
-           = 0: Left singular vector matrix.
-           = 1: Right singular vector matrix.
+    /* Local variables */
+    static integer i__;
+    static doublereal ei;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dgemv_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *), dcopy_(integer *, doublereal *,
+	    integer *, doublereal *, integer *), daxpy_(integer *, doublereal
+	    *, doublereal *, integer *, doublereal *, integer *), dtrmv_(char
+	    *, char *, char *, integer *, doublereal *, integer *, doublereal
+	    *, integer *), dlarfg_(integer *,
+	    doublereal *, doublereal *, integer *, doublereal *);
 
-    NL     (input) INTEGER
-           The row dimension of the upper block. NL >= 1.
 
-    NR     (input) INTEGER
-           The row dimension of the lower block. NR >= 1.
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-    SQRE   (input) INTEGER
-           = 0: the lower block is an NR-by-NR square matrix.
-           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
 
-           The bidiagonal matrix has row dimension N = NL + NR + 1,
-           and column dimension M = N + SQRE.
+    Purpose
+    =======
 
-    NRHS   (input) INTEGER
-           The number of columns of B and BX. NRHS must be at least 1.
+    DLAHRD reduces the first NB columns of a real general n-by-(n-k+1)
+    matrix A so that elements below the k-th subdiagonal are zero. The
+    reduction is performed by an orthogonal similarity transformation
+    Q' * A * Q. The routine returns the matrices V and T which determine
+    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
 
-    B      (input/output) REAL array, dimension ( LDB, NRHS )
-           On input, B contains the right hand sides of the least
-           squares problem in rows 1 through M. On output, B contains
-           the solution X in rows 1 through N.
+    This is an auxiliary routine called by DGEHRD.
 
-    LDB    (input) INTEGER
-           The leading dimension of B. LDB must be at least
-           max(1,MAX( M, N ) ).
+    Arguments
+    =========
 
-    BX     (workspace) REAL array, dimension ( LDBX, NRHS )
+    N       (input) INTEGER
+            The order of the matrix A.
 
-    LDBX   (input) INTEGER
-           The leading dimension of BX.
+    K       (input) INTEGER
+            The offset for the reduction. Elements below the k-th
+            subdiagonal in the first NB columns are reduced to zero.
 
-    PERM   (input) INTEGER array, dimension ( N )
-           The permutations (from deflation and sorting) applied
-           to the two blocks.
+    NB      (input) INTEGER
+            The number of columns to be reduced.
 
-    GIVPTR (input) INTEGER
-           The number of Givens rotations which took place in this
-           subproblem.
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1)
+            On entry, the n-by-(n-k+1) general matrix A.
+            On exit, the elements on and above the k-th subdiagonal in
+            the first NB columns are overwritten with the corresponding
+            elements of the reduced matrix; the elements below the k-th
+            subdiagonal, with the array TAU, represent the matrix Q as a
+            product of elementary reflectors. The other columns of A are
+            unchanged. See Further Details.
 
-    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
-           Each pair of numbers indicates a pair of rows/columns
-           involved in a Givens rotation.
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-    LDGCOL (input) INTEGER
-           The leading dimension of GIVCOL, must be at least N.
+    TAU     (output) DOUBLE PRECISION array, dimension (NB)
+            The scalar factors of the elementary reflectors. See Further
+            Details.
 
-    GIVNUM (input) REAL array, dimension ( LDGNUM, 2 )
-           Each number indicates the C or S value used in the
-           corresponding Givens rotation.
+    T       (output) DOUBLE PRECISION array, dimension (LDT,NB)
+            The upper triangular matrix T.
 
-    LDGNUM (input) INTEGER
-           The leading dimension of arrays DIFR, POLES and
-           GIVNUM, must be at least K.
+    LDT     (input) INTEGER
+            The leading dimension of the array T.  LDT >= NB.
 
-    POLES  (input) REAL array, dimension ( LDGNUM, 2 )
-           On entry, POLES(1:K, 1) contains the new singular
-           values obtained from solving the secular equation, and
-           POLES(1:K, 2) is an array containing the poles in the secular
-           equation.
+    Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
+            The n-by-nb matrix Y.
 
-    DIFL   (input) REAL array, dimension ( K ).
-           On entry, DIFL(I) is the distance between I-th updated
-           (undeflated) singular value and the I-th (undeflated) old
-           singular value.
+    LDY     (input) INTEGER
+            The leading dimension of the array Y. LDY >= N.
 
-    DIFR   (input) REAL array, dimension ( LDGNUM, 2 ).
-           On entry, DIFR(I, 1) contains the distances between I-th
-           updated (undeflated) singular value and the I+1-th
-           (undeflated) old singular value. And DIFR(I, 2) is the
-           normalizing factor for the I-th right singular vector.
+    Further Details
+    ===============
 
-    Z      (input) REAL array, dimension ( K )
-           Contain the components of the deflation-adjusted updating row
-           vector.
+    The matrix Q is represented as a product of nb elementary reflectors
 
-    K      (input) INTEGER
-           Contains the dimension of the non-deflated matrix,
-           This is the order of the related secular equation. 1 <= K <=N.
+       Q = H(1) H(2) . . . H(nb).
 
-    C      (input) REAL
-           C contains garbage if SQRE =0 and the C-value of a Givens
-           rotation related to the right null space if SQRE = 1.
+    Each H(i) has the form
 
-    S      (input) REAL
-           S contains garbage if SQRE =0 and the S-value of a Givens
-           rotation related to the right null space if SQRE = 1.
+       H(i) = I - tau * v * v'
 
-    WORK   (workspace) REAL array, dimension ( K )
+    where tau is a real scalar, and v is a real vector with
+    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
+    A(i+k+1:n,i), and tau in TAU(i).
 
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+    The elements of the vectors v together form the (n-k+1)-by-nb matrix
+    V which is needed, with T and Y, to apply the transformation to the
+    unreduced part of the matrix, using an update of the form:
+    A := (I - V*T*V') * (A - Y*V').
 
-    Further Details
-    ===============
+    The contents of A on exit are illustrated by the following example
+    with n = 7, k = 3 and nb = 2:
 
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( h   h   a   a   a )
+       ( v1  h   a   a   a )
+       ( v1  v2  a   a   a )
+       ( v1  v2  a   a   a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
 
     =====================================================================
 
 
-       Test the input parameters.
+       Quick return if possible
 */
 
     /* Parameter adjustments */
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    bx_dim1 = *ldbx;
-    bx_offset = 1 + bx_dim1;
-    bx -= bx_offset;
-    --perm;
-    givcol_dim1 = *ldgcol;
-    givcol_offset = 1 + givcol_dim1;
-    givcol -= givcol_offset;
-    difr_dim1 = *ldgnum;
-    difr_offset = 1 + difr_dim1;
-    difr -= difr_offset;
-    poles_dim1 = *ldgnum;
-    poles_offset = 1 + poles_dim1;
-    poles -= poles_offset;
-    givnum_dim1 = *ldgnum;
-    givnum_offset = 1 + givnum_dim1;
-    givnum -= givnum_offset;
-    --difl;
-    --z__;
-    --work;
+    --tau;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
 
     /* Function Body */
-    *info = 0;
-
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*nl < 1) {
-	*info = -2;
-    } else if (*nr < 1) {
-	*info = -3;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
-	*info = -4;
+    if (*n <= 1) {
+	return 0;
     }
 
-    n = *nl + *nr + 1;
+    i__1 = *nb;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (i__ > 1) {
 
-    if (*nrhs < 1) {
-	*info = -5;
-    } else if (*ldb < n) {
-	*info = -7;
-    } else if (*ldbx < n) {
-	*info = -9;
-    } else if (*givptr < 0) {
-	*info = -11;
-    } else if (*ldgcol < n) {
-	*info = -13;
-    } else if (*ldgnum < n) {
-	*info = -15;
-    } else if (*k < 1) {
-	*info = -20;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("SLALS0", &i__1);
-	return 0;
-    }
+/*
+             Update A(1:n,i)
 
-    m = n + *sqre;
-    nlp1 = *nl + 1;
+             Compute i-th column of A - Y * V'
+*/
 
-    if (*icompq == 0) {
+	    i__2 = i__ - 1;
+	    dgemv_("No transpose", n, &i__2, &c_b151, &y[y_offset], ldy, &a[*
+		    k + i__ - 1 + a_dim1], lda, &c_b15, &a[i__ * a_dim1 + 1],
+		    &c__1);
 
 /*
-          Apply back orthogonal transformations from the left.
+             Apply I - V * T' * V' to this column (call it b) from the
+             left, using the last column of T as workspace
 
-          Step (1L): apply back the Givens rotations performed.
+             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
+                      ( V2 )             ( b2 )
+
+             where V1 is unit lower triangular
+
+             w := V1' * b1
 */
 
-	i__1 = *givptr;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    srot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
-		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
-		    i__ + ((givnum_dim1) << (1))], &givnum[i__ + givnum_dim1])
-		    ;
-/* L10: */
-	}
+	    i__2 = i__ - 1;
+	    dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
+		    1], &c__1);
+	    i__2 = i__ - 1;
+	    dtrmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1],
+		    lda, &t[*nb * t_dim1 + 1], &c__1);
 
-/*        Step (2L): permute rows of B. */
+/*           w := w + V2'*b2 */
 
-	scopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
-	i__1 = n;
-	for (i__ = 2; i__ <= i__1; ++i__) {
-	    scopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1],
-		    ldbx);
-/* L20: */
-	}
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1],
+		    lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b15, &t[*nb *
+		    t_dim1 + 1], &c__1);
 
-/*
-          Step (3L): apply the inverse of the left singular vector
-          matrix to BX.
-*/
+/*           w := T'*w */
 
-	if (*k == 1) {
-	    scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
-	    if (z__[1] < 0.f) {
-		sscal_(nrhs, &c_b1290, &b[b_offset], ldb);
-	    }
-	} else {
-	    i__1 = *k;
-	    for (j = 1; j <= i__1; ++j) {
-		diflj = difl[j];
-		dj = poles[j + poles_dim1];
-		dsigj = -poles[j + ((poles_dim1) << (1))];
-		if (j < *k) {
-		    difrj = -difr[j + difr_dim1];
-		    dsigjp = -poles[j + 1 + ((poles_dim1) << (1))];
-		}
-		if ((z__[j] == 0.f) || (poles[j + ((poles_dim1) << (1))] ==
-			0.f)) {
-		    work[j] = 0.f;
-		} else {
-		    work[j] = -poles[j + ((poles_dim1) << (1))] * z__[j] /
-			    diflj / (poles[j + ((poles_dim1) << (1))] + dj);
-		}
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    if ((z__[i__] == 0.f) || (poles[i__ + ((poles_dim1) << (1)
-			    )] == 0.f)) {
-			work[i__] = 0.f;
-		    } else {
-			work[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
-				i__] / (slamc3_(&poles[i__ + ((poles_dim1) <<
-				(1))], &dsigj) - diflj) / (poles[i__ + ((
-				poles_dim1) << (1))] + dj);
-		    }
-/* L30: */
-		}
-		i__2 = *k;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    if ((z__[i__] == 0.f) || (poles[i__ + ((poles_dim1) << (1)
-			    )] == 0.f)) {
-			work[i__] = 0.f;
-		    } else {
-			work[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
-				i__] / (slamc3_(&poles[i__ + ((poles_dim1) <<
-				(1))], &dsigjp) + difrj) / (poles[i__ + ((
-				poles_dim1) << (1))] + dj);
-		    }
-/* L40: */
-		}
-		work[1] = -1.f;
-		temp = snrm2_(k, &work[1], &c__1);
-		sgemv_("T", k, nrhs, &c_b1011, &bx[bx_offset], ldbx, &work[1],
-			 &c__1, &c_b320, &b[j + b_dim1], ldb);
-		slascl_("G", &c__0, &c__0, &temp, &c_b1011, &c__1, nrhs, &b[j
-			+ b_dim1], ldb, info);
-/* L50: */
-	    }
-	}
+	    i__2 = i__ - 1;
+	    dtrmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		     &t[*nb * t_dim1 + 1], &c__1);
 
-/*        Move the deflated rows of BX to B also. */
+/*           b2 := b2 - V2*w */
 
-	if (*k < max(m,n)) {
-	    i__1 = n - *k;
-	    slacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1
-		    + b_dim1], ldb);
-	}
-    } else {
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[*k + i__ +
+		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b15, &a[*k
+		    + i__ + i__ * a_dim1], &c__1);
 
-/*
-          Apply back the right orthogonal transformations.
+/*           b1 := b1 - V1*w */
 
-          Step (1R): apply back the new right singular vector matrix
-          to B.
-*/
+	    i__2 = i__ - 1;
+	    dtrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
+		    , lda, &t[*nb * t_dim1 + 1], &c__1);
+	    i__2 = i__ - 1;
+	    daxpy_(&i__2, &c_b151, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 +
+		    i__ * a_dim1], &c__1);
 
-	if (*k == 1) {
-	    scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
-	} else {
-	    i__1 = *k;
-	    for (j = 1; j <= i__1; ++j) {
-		dsigj = poles[j + ((poles_dim1) << (1))];
-		if (z__[j] == 0.f) {
-		    work[j] = 0.f;
-		} else {
-		    work[j] = -z__[j] / difl[j] / (dsigj + poles[j +
-			    poles_dim1]) / difr[j + ((difr_dim1) << (1))];
-		}
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    if (z__[j] == 0.f) {
-			work[i__] = 0.f;
-		    } else {
-			r__1 = -poles[i__ + 1 + ((poles_dim1) << (1))];
-			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr[
-				i__ + difr_dim1]) / (dsigj + poles[i__ +
-				poles_dim1]) / difr[i__ + ((difr_dim1) << (1))
-				];
-		    }
-/* L60: */
-		}
-		i__2 = *k;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    if (z__[j] == 0.f) {
-			work[i__] = 0.f;
-		    } else {
-			r__1 = -poles[i__ + ((poles_dim1) << (1))];
-			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[
-				i__]) / (dsigj + poles[i__ + poles_dim1]) /
-				difr[i__ + ((difr_dim1) << (1))];
-		    }
-/* L70: */
-		}
-		sgemv_("T", k, nrhs, &c_b1011, &b[b_offset], ldb, &work[1], &
-			c__1, &c_b320, &bx[j + bx_dim1], ldbx);
-/* L80: */
-	    }
+	    a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei;
 	}
 
 /*
-          Step (2R): if SQRE = 1, apply back the rotation that is
-          related to the right null space of the subproblem.
+          Generate the elementary reflector H(i) to annihilate
+          A(k+i+1:n,i)
 */
 
-	if (*sqre == 1) {
-	    scopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
-	    srot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__,
-		    s);
-	}
-	if (*k < max(m,n)) {
-	    i__1 = n - *k;
-	    slacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 +
-		    bx_dim1], ldbx);
-	}
+	i__2 = *n - *k - i__ + 1;
+/* Computing MIN */
+	i__3 = *k + i__ + 1;
+	dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		a_dim1], &c__1, &tau[i__]);
+	ei = a[*k + i__ + i__ * a_dim1];
+	a[*k + i__ + i__ * a_dim1] = 1.;
 
-/*        Step (3R): permute rows of B. */
+/*        Compute  Y(1:n,i) */
 
-	scopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
-	if (*sqre == 1) {
-	    scopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
-	}
-	i__1 = n;
-	for (i__ = 2; i__ <= i__1; ++i__) {
-	    scopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1],
-		    ldb);
-/* L90: */
-	}
+	i__2 = *n - *k - i__ + 1;
+	dgemv_("No transpose", n, &i__2, &c_b15, &a[(i__ + 1) * a_dim1 + 1],
+		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ *
+		y_dim1 + 1], &c__1);
+	i__2 = *n - *k - i__ + 1;
+	i__3 = i__ - 1;
+	dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1], lda,
+		&a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &t[i__ * t_dim1 +
+		1], &c__1);
+	i__2 = i__ - 1;
+	dgemv_("No transpose", n, &i__2, &c_b151, &y[y_offset], ldy, &t[i__ *
+		t_dim1 + 1], &c__1, &c_b15, &y[i__ * y_dim1 + 1], &c__1);
+	dscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
 
-/*        Step (4R): apply back the Givens rotations performed. */
+/*        Compute T(1:i,i) */
 
-	for (i__ = *givptr; i__ >= 1; --i__) {
-	    r__1 = -givnum[i__ + givnum_dim1];
-	    srot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
-		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
-		    i__ + ((givnum_dim1) << (1))], &r__1);
-/* L100: */
-	}
+	i__2 = i__ - 1;
+	d__1 = -tau[i__];
+	dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		&t[i__ * t_dim1 + 1], &c__1)
+		;
+	t[i__ + i__ * t_dim1] = tau[i__];
+
+/* L10: */
     }
+    a[*k + *nb + *nb * a_dim1] = ei;
 
     return 0;
 
-/*     End of SLALS0 */
+/*     End of DLAHRD */
+
+} /* dlahrd_ */
+
+/* Subroutine */ int dlaln2_(logical *ltrans, integer *na, integer *nw,
+	doublereal *smin, doublereal *ca, doublereal *a, integer *lda,
+	doublereal *d1, doublereal *d2, doublereal *b, integer *ldb,
+	doublereal *wr, doublereal *wi, doublereal *x, integer *ldx,
+	doublereal *scale, doublereal *xnorm, integer *info)
+{
+    /* Initialized data */
 
-} /* slals0_ */
+    static logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
+    static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
+    static integer ipivot[16]	/* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2,
+	    4,3,2,1 };
 
-/* Subroutine */ int slalsa_(integer *icompq, integer *smlsiz, integer *n,
-	integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, real *
-	u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real *
-	z__, real *poles, integer *givptr, integer *givcol, integer *ldgcol,
-	integer *perm, real *givnum, real *c__, real *s, real *work, integer *
-	iwork, integer *info)
-{
     /* System generated locals */
-    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1,
-	    b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1,
-	    difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset,
-	     u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1,
-	    i__2;
-
-    /* Builtin functions */
-    integer pow_ii(integer *, integer *);
+    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
+    static doublereal equiv_0[4], equiv_1[4];
 
     /* Local variables */
-    static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl,
-	    ndb1, nlp1, lvl2, nrp1, nlvl, sqre, inode, ndiml;
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
-    static integer ndimr;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *), slals0_(integer *, integer *, integer *, integer *,
-	    integer *, real *, integer *, real *, integer *, integer *,
-	    integer *, integer *, integer *, real *, integer *, real *, real *
-	    , real *, real *, integer *, real *, real *, real *, integer *),
-	    xerbla_(char *, integer *), slasdt_(integer *, integer *,
-	    integer *, integer *, integer *, integer *, integer *);
+    static integer j;
+#define ci (equiv_0)
+#define cr (equiv_1)
+    static doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22,
+	    cr21, cr22, li21, csi, ui11, lr21, ui12, ui22;
+#define civ (equiv_0)
+    static doublereal csr, ur11, ur12, ur22;
+#define crv (equiv_1)
+    static doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs;
+    static integer icmax;
+    static doublereal bnorm, cnorm, smini;
+
+    extern /* Subroutine */ int dladiv_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *);
+    static doublereal bignum, smlnum;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
+    -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+       October 31, 1992
 
 
     Purpose
     =======
 
-    SLALSA is an itermediate step in solving the least squares problem
-    by computing the SVD of the coefficient matrix in compact form (The
-    singular vectors are computed as products of simple orthorgonal
-    matrices.).
+    DLALN2 solves a system of the form  (ca A - w D ) X = s B
+    or (ca A' - w D) X = s B   with possible scaling ("s") and
+    perturbation of A.  (A' means A-transpose.)
 
-    If ICOMPQ = 0, SLALSA applies the inverse of the left singular vector
-    matrix of an upper bidiagonal matrix to the right hand side; and if
-    ICOMPQ = 1, SLALSA applies the right singular vector matrix to the
-    right hand side. The singular vector matrices were generated in
-    compact form by SLALSA.
+    A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
+    real diagonal matrix, w is a real or complex value, and X and B are
+    NA x 1 matrices -- real if w is real, complex if w is complex.  NA
+    may be 1 or 2.
+
+    If w is complex, X and B are represented as NA x 2 matrices,
+    the first column of each being the real part and the second
+    being the imaginary part.
+
+    "s" is a scaling factor (.LE. 1), computed by DLALN2, which is
+    so chosen that X can be computed without overflow.  X is further
+    scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
+    than overflow.
+
+    If both singular values of (ca A - w D) are less than SMIN,
+    SMIN*identity will be used instead of (ca A - w D).  If only one
+    singular value is less than SMIN, one element of (ca A - w D) will be
+    perturbed enough to make the smallest singular value roughly SMIN.
+    If both singular values are at least SMIN, (ca A - w D) will not be
+    perturbed.  In any case, the perturbation will be at most some small
+    multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
+    are computed by infinity-norm approximations, and thus will only be
+    correct to a factor of 2 or so.
+
+    Note: all input quantities are assumed to be smaller than overflow
+    by a reasonable factor.  (See BIGNUM.)
 
     Arguments
-    =========
+    ==========
 
+    LTRANS  (input) LOGICAL
+            =.TRUE.:  A-transpose will be used.
+            =.FALSE.: A will be used (not transposed.)
 
-    ICOMPQ (input) INTEGER
-           Specifies whether the left or the right singular vector
-           matrix is involved.
-           = 0: Left singular vector matrix
-           = 1: Right singular vector matrix
+    NA      (input) INTEGER
+            The size of the matrix A.  It may (only) be 1 or 2.
 
-    SMLSIZ (input) INTEGER
-           The maximum size of the subproblems at the bottom of the
-           computation tree.
+    NW      (input) INTEGER
+            1 if "w" is real, 2 if "w" is complex.  It may only be 1
+            or 2.
 
-    N      (input) INTEGER
-           The row and column dimensions of the upper bidiagonal matrix.
+    SMIN    (input) DOUBLE PRECISION
+            The desired lower bound on the singular values of A.  This
+            should be a safe distance away from underflow or overflow,
+            say, between (underflow/machine precision) and  (machine
+            precision * overflow ).  (See BIGNUM and ULP.)
 
-    NRHS   (input) INTEGER
-           The number of columns of B and BX. NRHS must be at least 1.
+    CA      (input) DOUBLE PRECISION
+            The coefficient c, which A is multiplied by.
 
-    B      (input) REAL array, dimension ( LDB, NRHS )
-           On input, B contains the right hand sides of the least
-           squares problem in rows 1 through M. On output, B contains
-           the solution X in rows 1 through N.
+    A       (input) DOUBLE PRECISION array, dimension (LDA,NA)
+            The NA x NA matrix A.
 
-    LDB    (input) INTEGER
-           The leading dimension of B in the calling subprogram.
-           LDB must be at least max(1,MAX( M, N ) ).
+    LDA     (input) INTEGER
+            The leading dimension of A.  It must be at least NA.
 
-    BX     (output) REAL array, dimension ( LDBX, NRHS )
-           On exit, the result of applying the left or right singular
-           vector matrix to B.
+    D1      (input) DOUBLE PRECISION
+            The 1,1 element in the diagonal matrix D.
 
-    LDBX   (input) INTEGER
-           The leading dimension of BX.
+    D2      (input) DOUBLE PRECISION
+            The 2,2 element in the diagonal matrix D.  Not used if NW=1.
 
-    U      (input) REAL array, dimension ( LDU, SMLSIZ ).
-           On entry, U contains the left singular vector matrices of all
-           subproblems at the bottom level.
+    B       (input) DOUBLE PRECISION array, dimension (LDB,NW)
+            The NA x NW matrix B (right-hand side).  If NW=2 ("w" is
+            complex), column 1 contains the real part of B and column 2
+            contains the imaginary part.
 
-    LDU    (input) INTEGER, LDU = > N.
-           The leading dimension of arrays U, VT, DIFL, DIFR,
-           POLES, GIVNUM, and Z.
+    LDB     (input) INTEGER
+            The leading dimension of B.  It must be at least NA.
 
-    VT     (input) REAL array, dimension ( LDU, SMLSIZ+1 ).
-           On entry, VT' contains the right singular vector matrices of
-           all subproblems at the bottom level.
+    WR      (input) DOUBLE PRECISION
+            The real part of the scalar "w".
 
-    K      (input) INTEGER array, dimension ( N ).
+    WI      (input) DOUBLE PRECISION
+            The imaginary part of the scalar "w".  Not used if NW=1.
 
-    DIFL   (input) REAL array, dimension ( LDU, NLVL ).
-           where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+    X       (output) DOUBLE PRECISION array, dimension (LDX,NW)
+            The NA x NW matrix X (unknowns), as computed by DLALN2.
+            If NW=2 ("w" is complex), on exit, column 1 will contain
+            the real part of X and column 2 will contain the imaginary
+            part.
 
-    DIFR   (input) REAL array, dimension ( LDU, 2 * NLVL ).
-           On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
-           distances between singular values on the I-th level and
-           singular values on the (I -1)-th level, and DIFR(*, 2 * I)
-           record the normalizing factors of the right singular vectors
-           matrices of subproblems on I-th level.
+    LDX     (input) INTEGER
+            The leading dimension of X.  It must be at least NA.
 
-    Z      (input) REAL array, dimension ( LDU, NLVL ).
-           On entry, Z(1, I) contains the components of the deflation-
-           adjusted updating row vector for subproblems on the I-th
-           level.
+    SCALE   (output) DOUBLE PRECISION
+            The scale factor that B must be multiplied by to insure
+            that overflow does not occur when computing X.  Thus,
+            (ca A - w D) X  will be SCALE*B, not B (ignoring
+            perturbations of A.)  It will be at most 1.
 
-    POLES  (input) REAL array, dimension ( LDU, 2 * NLVL ).
-           On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
-           singular values involved in the secular equations on the I-th
-           level.
+    XNORM   (output) DOUBLE PRECISION
+            The infinity-norm of X, when X is regarded as an NA x NW
+            real matrix.
 
-    GIVPTR (input) INTEGER array, dimension ( N ).
-           On entry, GIVPTR( I ) records the number of Givens
-           rotations performed on the I-th problem on the computation
-           tree.
+    INFO    (output) INTEGER
+            An error flag.  It will be set to zero if no error occurs,
+            a negative number if an argument is in error, or a positive
+            number if  ca A - w D  had to be perturbed.
+            The possible values are:
+            = 0: No error occurred, and (ca A - w D) did not have to be
+                   perturbed.
+            = 1: (ca A - w D) had to be perturbed to make its smallest
+                 (or only) singular value greater than SMIN.
+            NOTE: In the interests of speed, this routine does not
+                  check the inputs for errors.
 
-    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
-           On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
-           locations of Givens rotations performed on the I-th level on
-           the computation tree.
+   =====================================================================
+*/
 
-    LDGCOL (input) INTEGER, LDGCOL = > N.
-           The leading dimension of arrays GIVCOL and PERM.
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
 
-    PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
-           On entry, PERM(*, I) records permutations done on the I-th
-           level of the computation tree.
+    /* Function Body */
 
-    GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ).
-           On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
-           values of Givens rotations performed on the I-th level on the
-           computation tree.
+/*     Compute BIGNUM */
 
-    C      (input) REAL array, dimension ( N ).
-           On entry, if the I-th subproblem is not square,
-           C( I ) contains the C-value of a Givens rotation related to
-           the right null space of the I-th subproblem.
+    smlnum = 2. * SAFEMINIMUM;
+    bignum = 1. / smlnum;
+    smini = max(*smin,smlnum);
 
-    S      (input) REAL array, dimension ( N ).
-           On entry, if the I-th subproblem is not square,
-           S( I ) contains the S-value of a Givens rotation related to
-           the right null space of the I-th subproblem.
+/*     Don't check for input errors */
 
-    WORK   (workspace) REAL array.
-           The dimension must be at least N.
+    *info = 0;
 
-    IWORK  (workspace) INTEGER array.
-           The dimension must be at least 3 * N
+/*     Standard Initializations */
 
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+    *scale = 1.;
 
-    Further Details
-    ===============
+    if (*na == 1) {
 
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
+/*        1 x 1  (i.e., scalar) system   C X = B */
 
-    =====================================================================
+	if (*nw == 1) {
 
+/*
+             Real 1x1 system.
 
-       Test the input parameters.
+             C = ca A - w D
 */
 
-    /* Parameter adjustments */
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    bx_dim1 = *ldbx;
-    bx_offset = 1 + bx_dim1;
-    bx -= bx_offset;
-    givnum_dim1 = *ldu;
-    givnum_offset = 1 + givnum_dim1;
-    givnum -= givnum_offset;
-    poles_dim1 = *ldu;
-    poles_offset = 1 + poles_dim1;
-    poles -= poles_offset;
-    z_dim1 = *ldu;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    difr_dim1 = *ldu;
-    difr_offset = 1 + difr_dim1;
-    difr -= difr_offset;
-    difl_dim1 = *ldu;
-    difl_offset = 1 + difl_dim1;
-    difl -= difl_offset;
-    vt_dim1 = *ldu;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    --k;
-    --givptr;
-    perm_dim1 = *ldgcol;
-    perm_offset = 1 + perm_dim1;
-    perm -= perm_offset;
-    givcol_dim1 = *ldgcol;
-    givcol_offset = 1 + givcol_dim1;
-    givcol -= givcol_offset;
-    --c__;
-    --s;
-    --work;
-    --iwork;
+	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
+	    cnorm = abs(csr);
 
-    /* Function Body */
-    *info = 0;
+/*           If | C | < SMINI, use C = SMINI */
 
-    if ((*icompq < 0) || (*icompq > 1)) {
-	*info = -1;
-    } else if (*smlsiz < 3) {
-	*info = -2;
-    } else if (*n < *smlsiz) {
-	*info = -3;
-    } else if (*nrhs < 1) {
-	*info = -4;
-    } else if (*ldb < *n) {
-	*info = -6;
-    } else if (*ldbx < *n) {
-	*info = -8;
-    } else if (*ldu < *n) {
-	*info = -10;
-    } else if (*ldgcol < *n) {
-	*info = -19;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("SLALSA", &i__1);
-	return 0;
-    }
+	    if (cnorm < smini) {
+		csr = smini;
+		cnorm = smini;
+		*info = 1;
+	    }
 
-/*     Book-keeping and  setting up the computation tree. */
+/*           Check scaling for  X = B / C */
 
-    inode = 1;
-    ndiml = inode + *n;
-    ndimr = ndiml + *n;
+	    bnorm = (d__1 = b[b_dim1 + 1], abs(d__1));
+	    if (cnorm < 1. && bnorm > 1.) {
+		if (bnorm > bignum * cnorm) {
+		    *scale = 1. / bnorm;
+		}
+	    }
 
-    slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
-	    smlsiz);
+/*           Compute X */
+
+	    x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr;
+	    *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1));
+	} else {
 
 /*
-       The following code applies back the left singular vector factors.
-       For applying back the right singular vector factors, go to 50.
+             Complex 1x1 system (w is complex)
+
+             C = ca A - w D
 */
 
-    if (*icompq == 1) {
-	goto L50;
-    }
+	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
+	    csi = -(*wi) * *d1;
+	    cnorm = abs(csr) + abs(csi);
+
+/*           If | C | < SMINI, use C = SMINI */
+
+	    if (cnorm < smini) {
+		csr = smini;
+		csi = 0.;
+		cnorm = smini;
+		*info = 1;
+	    }
+
+/*           Check scaling for  X = B / C */
+
+	    bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 <<
+		    1) + 1], abs(d__2));
+	    if (cnorm < 1. && bnorm > 1.) {
+		if (bnorm > bignum * cnorm) {
+		    *scale = 1. / bnorm;
+		}
+	    }
+
+/*           Compute X */
+
+	    d__1 = *scale * b[b_dim1 + 1];
+	    d__2 = *scale * b[(b_dim1 << 1) + 1];
+	    dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1)
+		     + 1]);
+	    *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 <<
+		    1) + 1], abs(d__2));
+	}
+
+    } else {
 
 /*
-       The nodes on the bottom level of the tree were solved
-       by SLASDQ. The corresponding left and right singular vector
-       matrices are in explicit form. First apply back the left
-       singular vector matrices.
+          2x2 System
+
+          Compute the real part of  C = ca A - w D  (or  ca A' - w D )
 */
 
-    ndb1 = (nd + 1) / 2;
-    i__1 = nd;
-    for (i__ = ndb1; i__ <= i__1; ++i__) {
+	cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1;
+	cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2;
+	if (*ltrans) {
+	    cr[2] = *ca * a[a_dim1 + 2];
+	    cr[1] = *ca * a[(a_dim1 << 1) + 1];
+	} else {
+	    cr[1] = *ca * a[a_dim1 + 2];
+	    cr[2] = *ca * a[(a_dim1 << 1) + 1];
+	}
+
+	if (*nw == 1) {
 
 /*
-          IC : center row of each node
-          NL : number of rows of left  subproblem
-          NR : number of rows of right subproblem
-          NLF: starting row of the left   subproblem
-          NRF: starting row of the right  subproblem
+             Real 2x2 system  (w is real)
+
+             Find the largest element in C
 */
 
-	i1 = i__ - 1;
-	ic = iwork[inode + i1];
-	nl = iwork[ndiml + i1];
-	nr = iwork[ndimr + i1];
-	nlf = ic - nl;
-	nrf = ic + 1;
-	sgemm_("T", "N", &nl, nrhs, &nl, &c_b1011, &u[nlf + u_dim1], ldu, &b[
-		nlf + b_dim1], ldb, &c_b320, &bx[nlf + bx_dim1], ldbx);
-	sgemm_("T", "N", &nr, nrhs, &nr, &c_b1011, &u[nrf + u_dim1], ldu, &b[
-		nrf + b_dim1], ldb, &c_b320, &bx[nrf + bx_dim1], ldbx);
+	    cmax = 0.;
+	    icmax = 0;
+
+	    for (j = 1; j <= 4; ++j) {
+		if ((d__1 = crv[j - 1], abs(d__1)) > cmax) {
+		    cmax = (d__1 = crv[j - 1], abs(d__1));
+		    icmax = j;
+		}
 /* L10: */
-    }
+	    }
+
+/*           If norm(C) < SMINI, use SMINI*identity. */
+
+	    if (cmax < smini) {
+/* Computing MAX */
+		d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[
+			b_dim1 + 2], abs(d__2));
+		bnorm = max(d__3,d__4);
+		if (smini < 1. && bnorm > 1.) {
+		    if (bnorm > bignum * smini) {
+			*scale = 1. / bnorm;
+		    }
+		}
+		temp = *scale / smini;
+		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
+		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
+		*xnorm = temp * bnorm;
+		*info = 1;
+		return 0;
+	    }
+
+/*           Gaussian elimination with complete pivoting. */
+
+	    ur11 = crv[icmax - 1];
+	    cr21 = crv[ipivot[(icmax << 2) - 3] - 1];
+	    ur12 = crv[ipivot[(icmax << 2) - 2] - 1];
+	    cr22 = crv[ipivot[(icmax << 2) - 1] - 1];
+	    ur11r = 1. / ur11;
+	    lr21 = ur11r * cr21;
+	    ur22 = cr22 - ur12 * lr21;
+
+/*           If smaller pivot < SMINI, use SMINI */
+
+	    if (abs(ur22) < smini) {
+		ur22 = smini;
+		*info = 1;
+	    }
+	    if (rswap[icmax - 1]) {
+		br1 = b[b_dim1 + 2];
+		br2 = b[b_dim1 + 1];
+	    } else {
+		br1 = b[b_dim1 + 1];
+		br2 = b[b_dim1 + 2];
+	    }
+	    br2 -= lr21 * br1;
+/* Computing MAX */
+	    d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2);
+	    bbnd = max(d__2,d__3);
+	    if (bbnd > 1. && abs(ur22) < 1.) {
+		if (bbnd >= bignum * abs(ur22)) {
+		    *scale = 1. / bbnd;
+		}
+	    }
+
+	    xr2 = br2 * *scale / ur22;
+	    xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12);
+	    if (zswap[icmax - 1]) {
+		x[x_dim1 + 1] = xr2;
+		x[x_dim1 + 2] = xr1;
+	    } else {
+		x[x_dim1 + 1] = xr1;
+		x[x_dim1 + 2] = xr2;
+	    }
+/* Computing MAX */
+	    d__1 = abs(xr1), d__2 = abs(xr2);
+	    *xnorm = max(d__1,d__2);
+
+/*           Further scaling if  norm(A) norm(X) > overflow */
+
+	    if (*xnorm > 1. && cmax > 1.) {
+		if (*xnorm > bignum / cmax) {
+		    temp = cmax / bignum;
+		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
+		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
+		    *xnorm = temp * *xnorm;
+		    *scale = temp * *scale;
+		}
+	    }
+	} else {
 
 /*
-       Next copy the rows of B that correspond to unchanged rows
-       in the bidiagonal matrix to BX.
+             Complex 2x2 system  (w is complex)
+
+             Find the largest element in C
 */
 
-    i__1 = nd;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	ic = iwork[inode + i__ - 1];
-	scopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx);
+	    ci[0] = -(*wi) * *d1;
+	    ci[1] = 0.;
+	    ci[2] = 0.;
+	    ci[3] = -(*wi) * *d2;
+	    cmax = 0.;
+	    icmax = 0;
+
+	    for (j = 1; j <= 4; ++j) {
+		if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs(
+			d__2)) > cmax) {
+		    cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1]
+			    , abs(d__2));
+		    icmax = j;
+		}
 /* L20: */
-    }
+	    }
 
-/*
-       Finally go through the left singular vector matrices of all
-       the other subproblems bottom-up on the tree.
-*/
+/*           If norm(C) < SMINI, use SMINI*identity. */
 
-    j = pow_ii(&c__2, &nlvl);
-    sqre = 0;
+	    if (cmax < smini) {
+/* Computing MAX */
+		d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1
+			<< 1) + 1], abs(d__2)), d__6 = (d__3 = b[b_dim1 + 2],
+			abs(d__3)) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4));
+		bnorm = max(d__5,d__6);
+		if (smini < 1. && bnorm > 1.) {
+		    if (bnorm > bignum * smini) {
+			*scale = 1. / bnorm;
+		    }
+		}
+		temp = *scale / smini;
+		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
+		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
+		x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1];
+		x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2];
+		*xnorm = temp * bnorm;
+		*info = 1;
+		return 0;
+	    }
 
-    for (lvl = nlvl; lvl >= 1; --lvl) {
-	lvl2 = ((lvl) << (1)) - 1;
+/*           Gaussian elimination with complete pivoting. */
 
-/*
-          find the first node LF and last node LL on
-          the current level LVL
-*/
+	    ur11 = crv[icmax - 1];
+	    ui11 = civ[icmax - 1];
+	    cr21 = crv[ipivot[(icmax << 2) - 3] - 1];
+	    ci21 = civ[ipivot[(icmax << 2) - 3] - 1];
+	    ur12 = crv[ipivot[(icmax << 2) - 2] - 1];
+	    ui12 = civ[ipivot[(icmax << 2) - 2] - 1];
+	    cr22 = crv[ipivot[(icmax << 2) - 1] - 1];
+	    ci22 = civ[ipivot[(icmax << 2) - 1] - 1];
+	    if (icmax == 1 || icmax == 4) {
 
-	if (lvl == 1) {
-	    lf = 1;
-	    ll = 1;
-	} else {
-	    i__1 = lvl - 1;
-	    lf = pow_ii(&c__2, &i__1);
-	    ll = ((lf) << (1)) - 1;
-	}
-	i__1 = ll;
-	for (i__ = lf; i__ <= i__1; ++i__) {
-	    im1 = i__ - 1;
-	    ic = iwork[inode + im1];
-	    nl = iwork[ndiml + im1];
-	    nr = iwork[ndimr + im1];
-	    nlf = ic - nl;
-	    nrf = ic + 1;
-	    --j;
-	    slals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, &
-		    b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], &
-		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
-		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
-		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
-		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
-		    j], &s[j], &work[1], info);
-/* L30: */
-	}
-/* L40: */
-    }
-    goto L90;
+/*              Code when off-diagonals of pivoted C are real */
 
-/*     ICOMPQ = 1: applying back the right singular vector factors. */
+		if (abs(ur11) > abs(ui11)) {
+		    temp = ui11 / ur11;
+/* Computing 2nd power */
+		    d__1 = temp;
+		    ur11r = 1. / (ur11 * (d__1 * d__1 + 1.));
+		    ui11r = -temp * ur11r;
+		} else {
+		    temp = ur11 / ui11;
+/* Computing 2nd power */
+		    d__1 = temp;
+		    ui11r = -1. / (ui11 * (d__1 * d__1 + 1.));
+		    ur11r = -temp * ui11r;
+		}
+		lr21 = cr21 * ur11r;
+		li21 = cr21 * ui11r;
+		ur12s = ur12 * ur11r;
+		ui12s = ur12 * ui11r;
+		ur22 = cr22 - ur12 * lr21;
+		ui22 = ci22 - ur12 * li21;
+	    } else {
 
-L50:
+/*              Code when diagonals of pivoted C are real */
 
-/*
-       First now go through the right singular vector matrices of all
-       the tree nodes top-down.
-*/
+		ur11r = 1. / ur11;
+		ui11r = 0.;
+		lr21 = cr21 * ur11r;
+		li21 = ci21 * ur11r;
+		ur12s = ur12 * ur11r;
+		ui12s = ui12 * ur11r;
+		ur22 = cr22 - ur12 * lr21 + ui12 * li21;
+		ui22 = -ur12 * li21 - ui12 * lr21;
+	    }
+	    u22abs = abs(ur22) + abs(ui22);
 
-    j = 0;
-    i__1 = nlvl;
-    for (lvl = 1; lvl <= i__1; ++lvl) {
-	lvl2 = ((lvl) << (1)) - 1;
+/*           If smaller pivot < SMINI, use SMINI */
 
-/*
-          Find the first node LF and last node LL on
-          the current level LVL.
-*/
+	    if (u22abs < smini) {
+		ur22 = smini;
+		ui22 = 0.;
+		*info = 1;
+	    }
+	    if (rswap[icmax - 1]) {
+		br2 = b[b_dim1 + 1];
+		br1 = b[b_dim1 + 2];
+		bi2 = b[(b_dim1 << 1) + 1];
+		bi1 = b[(b_dim1 << 1) + 2];
+	    } else {
+		br1 = b[b_dim1 + 1];
+		br2 = b[b_dim1 + 2];
+		bi1 = b[(b_dim1 << 1) + 1];
+		bi2 = b[(b_dim1 << 1) + 2];
+	    }
+	    br2 = br2 - lr21 * br1 + li21 * bi1;
+	    bi2 = bi2 - li21 * br1 - lr21 * bi1;
+/* Computing MAX */
+	    d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r))
+		    ), d__2 = abs(br2) + abs(bi2);
+	    bbnd = max(d__1,d__2);
+	    if (bbnd > 1. && u22abs < 1.) {
+		if (bbnd >= bignum * u22abs) {
+		    *scale = 1. / bbnd;
+		    br1 = *scale * br1;
+		    bi1 = *scale * bi1;
+		    br2 = *scale * br2;
+		    bi2 = *scale * bi2;
+		}
+	    }
 
-	if (lvl == 1) {
-	    lf = 1;
-	    ll = 1;
-	} else {
-	    i__2 = lvl - 1;
-	    lf = pow_ii(&c__2, &i__2);
-	    ll = ((lf) << (1)) - 1;
-	}
-	i__2 = lf;
-	for (i__ = ll; i__ >= i__2; --i__) {
-	    im1 = i__ - 1;
-	    ic = iwork[inode + im1];
-	    nl = iwork[ndiml + im1];
-	    nr = iwork[ndimr + im1];
-	    nlf = ic - nl;
-	    nrf = ic + 1;
-	    if (i__ == ll) {
-		sqre = 0;
+	    dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2);
+	    xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2;
+	    xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2;
+	    if (zswap[icmax - 1]) {
+		x[x_dim1 + 1] = xr2;
+		x[x_dim1 + 2] = xr1;
+		x[(x_dim1 << 1) + 1] = xi2;
+		x[(x_dim1 << 1) + 2] = xi1;
 	    } else {
-		sqre = 1;
+		x[x_dim1 + 1] = xr1;
+		x[x_dim1 + 2] = xr2;
+		x[(x_dim1 << 1) + 1] = xi1;
+		x[(x_dim1 << 1) + 2] = xi2;
 	    }
-	    ++j;
-	    slals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[
-		    nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], &
-		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
-		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
-		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
-		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
-		    j], &s[j], &work[1], info);
-/* L60: */
-	}
-/* L70: */
-    }
+/* Computing MAX */
+	    d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2);
+	    *xnorm = max(d__1,d__2);
 
-/*
-       The nodes on the bottom level of the tree were solved
-       by SLASDQ. The corresponding right singular vector
-       matrices are in explicit form. Apply them back.
-*/
+/*           Further scaling if  norm(A) norm(X) > overflow */
 
-    ndb1 = (nd + 1) / 2;
-    i__1 = nd;
-    for (i__ = ndb1; i__ <= i__1; ++i__) {
-	i1 = i__ - 1;
-	ic = iwork[inode + i1];
-	nl = iwork[ndiml + i1];
-	nr = iwork[ndimr + i1];
-	nlp1 = nl + 1;
-	if (i__ == nd) {
-	    nrp1 = nr;
-	} else {
-	    nrp1 = nr + 1;
+	    if (*xnorm > 1. && cmax > 1.) {
+		if (*xnorm > bignum / cmax) {
+		    temp = cmax / bignum;
+		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
+		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
+		    x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1];
+		    x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2];
+		    *xnorm = temp * *xnorm;
+		    *scale = temp * *scale;
+		}
+	    }
 	}
-	nlf = ic - nl;
-	nrf = ic + 1;
-	sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1011, &vt[nlf + vt_dim1],
-		ldu, &b[nlf + b_dim1], ldb, &c_b320, &bx[nlf + bx_dim1], ldbx);
-	sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1011, &vt[nrf + vt_dim1],
-		ldu, &b[nrf + b_dim1], ldb, &c_b320, &bx[nrf + bx_dim1], ldbx);
-/* L80: */
     }
 
-L90:
-
     return 0;
 
-/*     End of SLALSA */
+/*     End of DLALN2 */
+
+} /* dlaln2_ */
+
+#undef crv
+#undef civ
+#undef cr
+#undef ci
 
-} /* slalsa_ */
 
-/* Subroutine */ int slalsd_(char *uplo, integer *smlsiz, integer *n, integer
-	*nrhs, real *d__, real *e, real *b, integer *ldb, real *rcond,
-	integer *rank, real *work, integer *iwork, integer *info)
+/* Subroutine */ int dlals0_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal
+	*bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol,
+	integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *
+	poles, doublereal *difl, doublereal *difr, doublereal *z__, integer *
+	k, doublereal *c__, doublereal *s, doublereal *work, integer *info)
 {
     /* System generated locals */
-    integer b_dim1, b_offset, i__1, i__2;
-    real r__1;
-
-    /* Builtin functions */
-    double log(doublereal), r_sign(real *, real *);
+    integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset,
+	    difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1,
+	    poles_offset, i__1, i__2;
+    doublereal d__1;
 
     /* Local variables */
-    static integer c__, i__, j, k;
-    static real r__;
-    static integer s, u, z__;
-    static real cs;
-    static integer bx;
-    static real sn;
-    static integer st, vt, nm1, st1;
-    static real eps;
-    static integer iwk;
-    static real tol;
-    static integer difl, difr, perm, nsub, nlvl, sqre, bxst;
-    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
-	    integer *, real *, real *), sgemm_(char *, char *, integer *,
-	    integer *, integer *, real *, real *, integer *, real *, integer *
-	    , real *, real *, integer *);
-    static integer poles, sizei, nsize;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *);
-    static integer nwork, icmpq1, icmpq2;
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int slasda_(integer *, integer *, integer *,
-	    integer *, real *, real *, real *, integer *, real *, integer *,
-	    real *, real *, real *, real *, integer *, integer *, integer *,
-	    integer *, real *, real *, real *, real *, integer *, integer *),
-	    xerbla_(char *, integer *), slalsa_(integer *, integer *,
-	    integer *, integer *, real *, integer *, real *, integer *, real *
-	    , integer *, real *, integer *, real *, real *, real *, real *,
-	    integer *, integer *, integer *, integer *, real *, real *, real *
-	    , real *, integer *, integer *), slascl_(char *, integer *,
-	    integer *, real *, real *, integer *, integer *, real *, integer *
-	    , integer *);
-    static integer givcol;
-    extern integer isamax_(integer *, real *, integer *);
-    extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer
-	    *, integer *, integer *, real *, real *, real *, integer *, real *
-	    , integer *, real *, integer *, real *, integer *),
-	    slacpy_(char *, integer *, integer *, real *, integer *, real *,
-	    integer *), slartg_(real *, real *, real *, real *, real *
-	    ), slaset_(char *, integer *, integer *, real *, real *, real *,
+    static integer i__, j, m, n;
+    static doublereal dj;
+    static integer nlp1;
+    static doublereal temp;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
 	    integer *);
-    static real orgnrm;
-    static integer givnum;
-    extern doublereal slanst_(char *, integer *, real *, real *);
-    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
-    static integer givptr, smlszp;
+    static doublereal diflj, difrj, dsigj;
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), dcopy_(integer *,
+	    doublereal *, integer *, doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlacpy_(char *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, integer *),
+	    xerbla_(char *, integer *);
+    static doublereal dsigjp;
 
 
 /*
     -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+       December 1, 1999
 
 
     Purpose
     =======
 
-    SLALSD uses the singular value decomposition of A to solve the least
-    squares problem of finding X to minimize the Euclidean norm of each
-    column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
-    are N-by-NRHS. The solution X overwrites B.
+    DLALS0 applies back the multiplying factors of either the left or the
+    right singular vector matrix of a diagonal matrix appended by a row
+    to the right hand side matrix B in solving the least squares problem
+    using the divide-and-conquer SVD approach.
 
-    The singular values of A smaller than RCOND times the largest
-    singular value are treated as zero in solving the least squares
-    problem; in this case a minimum norm solution is returned.
-    The actual singular values are returned in D in ascending order.
+    For the left singular vector matrix, three types of orthogonal
+    matrices are involved:
 
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
+    (1L) Givens rotations: the number of such rotations is GIVPTR; the
+         pairs of columns/rows they were applied to are stored in GIVCOL;
+         and the C- and S-values of these rotations are stored in GIVNUM.
+
+    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
+         row, and for J=2:N, PERM(J)-th row of B is to be moved to the
+         J-th row.
+
+    (3L) The left singular vector matrix of the remaining matrix.
+
+    For the right singular vector matrix, four types of orthogonal
+    matrices are involved:
+
+    (1R) The right singular vector matrix of the remaining matrix.
+
+    (2R) If SQRE = 1, one extra Givens rotation to generate the right
+         null space.
+
+    (3R) The inverse transformation of (2L).
+
+    (4R) The inverse transformation of (1L).
 
     Arguments
     =========
 
-    UPLO   (input) CHARACTER*1
-           = 'U': D and E define an upper bidiagonal matrix.
-           = 'L': D and E define a  lower bidiagonal matrix.
+    ICOMPQ (input) INTEGER
+           Specifies whether singular vectors are to be computed in
+           factored form:
+           = 0: Left singular vector matrix.
+           = 1: Right singular vector matrix.
 
-    SMLSIZ (input) INTEGER
-           The maximum size of the subproblems at the bottom of the
-           computation tree.
+    NL     (input) INTEGER
+           The row dimension of the upper block. NL >= 1.
 
-    N      (input) INTEGER
-           The dimension of the  bidiagonal matrix.  N >= 0.
+    NR     (input) INTEGER
+           The row dimension of the lower block. NR >= 1.
 
-    NRHS   (input) INTEGER
-           The number of columns of B. NRHS must be at least 1.
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
 
-    D      (input/output) REAL array, dimension (N)
-           On entry D contains the main diagonal of the bidiagonal
-           matrix. On exit, if INFO = 0, D contains its singular values.
+           The bidiagonal matrix has row dimension N = NL + NR + 1,
+           and column dimension M = N + SQRE.
 
-    E      (input) REAL array, dimension (N-1)
-           Contains the super-diagonal entries of the bidiagonal matrix.
-           On exit, E has been destroyed.
+    NRHS   (input) INTEGER
+           The number of columns of B and BX. NRHS must be at least 1.
 
-    B      (input/output) REAL array, dimension (LDB,NRHS)
+    B      (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS )
            On input, B contains the right hand sides of the least
-           squares problem. On output, B contains the solution X.
+           squares problem in rows 1 through M. On output, B contains
+           the solution X in rows 1 through N.
 
     LDB    (input) INTEGER
-           The leading dimension of B in the calling subprogram.
-           LDB must be at least max(1,N).
+           The leading dimension of B. LDB must be at least
+           max(1,MAX( M, N ) ).
 
-    RCOND  (input) REAL
-           The singular values of A less than or equal to RCOND times
-           the largest singular value are treated as zero in solving
-           the least squares problem. If RCOND is negative,
-           machine precision is used instead.
-           For example, if diag(S)*X=B were the least squares problem,
-           where diag(S) is a diagonal matrix of singular values, the
-           solution would be X(i) = B(i) / S(i) if S(i) is greater than
-           RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
-           RCOND*max(S).
+    BX     (workspace) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
 
-    RANK   (output) INTEGER
-           The number of singular values of A greater than RCOND times
-           the largest singular value.
+    LDBX   (input) INTEGER
+           The leading dimension of BX.
 
-    WORK   (workspace) REAL array, dimension at least
-           (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),
-           where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).
+    PERM   (input) INTEGER array, dimension ( N )
+           The permutations (from deflation and sorting) applied
+           to the two blocks.
 
-    IWORK  (workspace) INTEGER array, dimension at least
-           (3*N*NLVL + 11*N)
+    GIVPTR (input) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem.
+
+    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
+           Each pair of numbers indicates a pair of rows/columns
+           involved in a Givens rotation.
+
+    LDGCOL (input) INTEGER
+           The leading dimension of GIVCOL, must be at least N.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           Each number indicates the C or S value used in the
+           corresponding Givens rotation.
+
+    LDGNUM (input) INTEGER
+           The leading dimension of arrays DIFR, POLES and
+           GIVNUM, must be at least K.
+
+    POLES  (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           On entry, POLES(1:K, 1) contains the new singular
+           values obtained from solving the secular equation, and
+           POLES(1:K, 2) is an array containing the poles in the secular
+           equation.
+
+    DIFL   (input) DOUBLE PRECISION array, dimension ( K ).
+           On entry, DIFL(I) is the distance between I-th updated
+           (undeflated) singular value and the I-th (undeflated) old
+           singular value.
+
+    DIFR   (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ).
+           On entry, DIFR(I, 1) contains the distances between I-th
+           updated (undeflated) singular value and the I+1-th
+           (undeflated) old singular value. And DIFR(I, 2) is the
+           normalizing factor for the I-th right singular vector.
+
+    Z      (input) DOUBLE PRECISION array, dimension ( K )
+           Contain the components of the deflation-adjusted updating row
+           vector.
+
+    K      (input) INTEGER
+           Contains the dimension of the non-deflated matrix,
+           This is the order of the related secular equation. 1 <= K <=N.
+
+    C      (input) DOUBLE PRECISION
+           C contains garbage if SQRE =0 and the C-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    S      (input) DOUBLE PRECISION
+           S contains garbage if SQRE =0 and the S-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension ( K )
 
     INFO   (output) INTEGER
-           = 0:  successful exit.
-           < 0:  if INFO = -i, the i-th argument had an illegal value.
-           > 0:  The algorithm failed to compute an singular value while
-                 working on the submatrix lying in rows and columns
-                 INFO/(N+1) through MOD(INFO,N+1).
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
 
     Further Details
     ===============
@@ -79237,1317 +14612,1211 @@ L90:
 */
 
     /* Parameter adjustments */
-    --d__;
-    --e;
     b_dim1 = *ldb;
     b_offset = 1 + b_dim1;
     b -= b_offset;
-    --work;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*n < 0) {
-	*info = -3;
-    } else if (*nrhs < 1) {
-	*info = -4;
-    } else if ((*ldb < 1) || (*ldb < *n)) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("SLALSD", &i__1);
-	return 0;
-    }
-
-    eps = slamch_("Epsilon");
-
-/*     Set up the tolerance. */
-
-    if ((*rcond <= 0.f) || (*rcond >= 1.f)) {
-	*rcond = eps;
-    }
-
-    *rank = 0;
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-	return 0;
-    } else if (*n == 1) {
-	if (d__[1] == 0.f) {
-	    slaset_("A", &c__1, nrhs, &c_b320, &c_b320, &b[b_offset], ldb);
-	} else {
-	    *rank = 1;
-	    slascl_("G", &c__0, &c__0, &d__[1], &c_b1011, &c__1, nrhs, &b[
-		    b_offset], ldb, info);
-	    d__[1] = dabs(d__[1]);
-	}
-	return 0;
-    }
-
-/*     Rotate the matrix if it is lower bidiagonal. */
-
-    if (*(unsigned char *)uplo == 'L') {
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
-	    d__[i__] = r__;
-	    e[i__] = sn * d__[i__ + 1];
-	    d__[i__ + 1] = cs * d__[i__ + 1];
-	    if (*nrhs == 1) {
-		srot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], &
-			c__1, &cs, &sn);
-	    } else {
-		work[((i__) << (1)) - 1] = cs;
-		work[i__ * 2] = sn;
-	    }
-/* L10: */
-	}
-	if (*nrhs > 1) {
-	    i__1 = *nrhs;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-		i__2 = *n - 1;
-		for (j = 1; j <= i__2; ++j) {
-		    cs = work[((j) << (1)) - 1];
-		    sn = work[j * 2];
-		    srot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ *
-			     b_dim1], &c__1, &cs, &sn);
-/* L20: */
-		}
-/* L30: */
-	    }
-	}
-    }
-
-/*     Scale. */
-
-    nm1 = *n - 1;
-    orgnrm = slanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.f) {
-	slaset_("A", n, nrhs, &c_b320, &c_b320, &b[b_offset], ldb);
-	return 0;
-    }
-
-    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, &c__1, &d__[1], n, info);
-    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &nm1, &c__1, &e[1], &nm1,
-	    info);
-
-/*
-       If N is smaller than the minimum divide size SMLSIZ, then solve
-       the problem with another solver.
-*/
-
-    if (*n <= *smlsiz) {
-	nwork = *n * *n + 1;
-	slaset_("A", n, n, &c_b320, &c_b1011, &work[1], n);
-	slasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, &
-		work[1], n, &b[b_offset], ldb, &work[nwork], info);
-	if (*info != 0) {
-	    return 0;
-	}
-	tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1));
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (d__[i__] <= tol) {
-		slaset_("A", &c__1, nrhs, &c_b320, &c_b320, &b[i__ + b_dim1],
-			ldb);
-	    } else {
-		slascl_("G", &c__0, &c__0, &d__[i__], &c_b1011, &c__1, nrhs, &
-			b[i__ + b_dim1], ldb, info);
-		++(*rank);
-	    }
-/* L40: */
-	}
-	sgemm_("T", "N", n, nrhs, n, &c_b1011, &work[1], n, &b[b_offset], ldb,
-		 &c_b320, &work[nwork], n);
-	slacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb);
+    bx_dim1 = *ldbx;
+    bx_offset = 1 + bx_dim1;
+    bx -= bx_offset;
+    --perm;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    difr_dim1 = *ldgnum;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    poles_dim1 = *ldgnum;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    givnum_dim1 = *ldgnum;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    --difl;
+    --z__;
+    --work;
 
-/*        Unscale. */
+    /* Function Body */
+    *info = 0;
 
-	slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, n, &c__1, &d__[1], n,
-		info);
-	slasrt_("D", n, &d__[1], info);
-	slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, nrhs, &b[b_offset],
-		ldb, info);
+    if (*icompq < 0 || *icompq > 1) {
+	*info = -1;
+    } else if (*nl < 1) {
+	*info = -2;
+    } else if (*nr < 1) {
+	*info = -3;
+    } else if (*sqre < 0 || *sqre > 1) {
+	*info = -4;
+    }
+
+    n = *nl + *nr + 1;
 
+    if (*nrhs < 1) {
+	*info = -5;
+    } else if (*ldb < n) {
+	*info = -7;
+    } else if (*ldbx < n) {
+	*info = -9;
+    } else if (*givptr < 0) {
+	*info = -11;
+    } else if (*ldgcol < n) {
+	*info = -13;
+    } else if (*ldgnum < n) {
+	*info = -15;
+    } else if (*k < 1) {
+	*info = -20;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLALS0", &i__1);
 	return 0;
     }
 
-/*     Book-keeping and setting up some constants. */
+    m = n + *sqre;
+    nlp1 = *nl + 1;
 
-    nlvl = (integer) (log((real) (*n) / (real) (*smlsiz + 1)) / log(2.f)) + 1;
+    if (*icompq == 0) {
 
-    smlszp = *smlsiz + 1;
+/*
+          Apply back orthogonal transformations from the left.
 
-    u = 1;
-    vt = *smlsiz * *n + 1;
-    difl = vt + smlszp * *n;
-    difr = difl + nlvl * *n;
-    z__ = difr + ((nlvl * *n) << (1));
-    c__ = z__ + nlvl * *n;
-    s = c__ + *n;
-    poles = s + *n;
-    givnum = poles + ((nlvl) << (1)) * *n;
-    bx = givnum + ((nlvl) << (1)) * *n;
-    nwork = bx + *n * *nrhs;
+          Step (1L): apply back the Givens rotations performed.
+*/
 
-    sizei = *n + 1;
-    k = sizei + *n;
-    givptr = k + *n;
-    perm = givptr + *n;
-    givcol = perm + nlvl * *n;
-    iwk = givcol + ((nlvl * *n) << (1));
+	i__1 = *givptr;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
+		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ +
+		    (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]);
+/* L10: */
+	}
 
-    st = 1;
-    sqre = 0;
-    icmpq1 = 1;
-    icmpq2 = 0;
-    nsub = 0;
+/*        Step (2L): permute rows of B. */
 
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((r__1 = d__[i__], dabs(r__1)) < eps) {
-	    d__[i__] = r_sign(&eps, &d__[i__]);
+	dcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
+	i__1 = n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    dcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1],
+		    ldbx);
+/* L20: */
 	}
-/* L50: */
-    }
-
-    i__1 = nm1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (((r__1 = e[i__], dabs(r__1)) < eps) || (i__ == nm1)) {
-	    ++nsub;
-	    iwork[nsub] = st;
 
 /*
-             Subproblem found. First determine its size and then
-             apply divide and conquer on it.
+          Step (3L): apply the inverse of the left singular vector
+          matrix to BX.
 */
 
-	    if (i__ < nm1) {
-
-/*              A subproblem with E(I) small for I < NM1. */
-
-		nsize = i__ - st + 1;
-		iwork[sizei + nsub - 1] = nsize;
-	    } else if ((r__1 = e[i__], dabs(r__1)) >= eps) {
+	if (*k == 1) {
+	    dcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
+	    if (z__[1] < 0.) {
+		dscal_(nrhs, &c_b151, &b[b_offset], ldb);
+	    }
+	} else {
+	    i__1 = *k;
+	    for (j = 1; j <= i__1; ++j) {
+		diflj = difl[j];
+		dj = poles[j + poles_dim1];
+		dsigj = -poles[j + (poles_dim1 << 1)];
+		if (j < *k) {
+		    difrj = -difr[j + difr_dim1];
+		    dsigjp = -poles[j + 1 + (poles_dim1 << 1)];
+		}
+		if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) {
+		    work[j] = 0.;
+		} else {
+		    work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj /
+			     (poles[j + (poles_dim1 << 1)] + dj);
+		}
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] ==
+			    0.) {
+			work[i__] = 0.;
+		    } else {
+			work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__]
+				/ (dlamc3_(&poles[i__ + (poles_dim1 << 1)], &
+				dsigj) - diflj) / (poles[i__ + (poles_dim1 <<
+				1)] + dj);
+		    }
+/* L30: */
+		}
+		i__2 = *k;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] ==
+			    0.) {
+			work[i__] = 0.;
+		    } else {
+			work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__]
+				/ (dlamc3_(&poles[i__ + (poles_dim1 << 1)], &
+				dsigjp) + difrj) / (poles[i__ + (poles_dim1 <<
+				 1)] + dj);
+		    }
+/* L40: */
+		}
+		work[1] = -1.;
+		temp = dnrm2_(k, &work[1], &c__1);
+		dgemv_("T", k, nrhs, &c_b15, &bx[bx_offset], ldbx, &work[1], &
+			c__1, &c_b29, &b[j + b_dim1], ldb);
+		dlascl_("G", &c__0, &c__0, &temp, &c_b15, &c__1, nrhs, &b[j +
+			b_dim1], ldb, info);
+/* L50: */
+	    }
+	}
 
-/*              A subproblem with E(NM1) not too small but I = NM1. */
+/*        Move the deflated rows of BX to B also. */
 
-		nsize = *n - st + 1;
-		iwork[sizei + nsub - 1] = nsize;
-	    } else {
+	if (*k < max(m,n)) {
+	    i__1 = n - *k;
+	    dlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1
+		    + b_dim1], ldb);
+	}
+    } else {
 
 /*
-                A subproblem with E(NM1) small. This implies an
-                1-by-1 subproblem at D(N), which is not solved
-                explicitly.
-*/
-
-		nsize = i__ - st + 1;
-		iwork[sizei + nsub - 1] = nsize;
-		++nsub;
-		iwork[nsub] = *n;
-		iwork[sizei + nsub - 1] = 1;
-		scopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n);
-	    }
-	    st1 = st - 1;
-	    if (nsize == 1) {
+          Apply back the right orthogonal transformations.
 
-/*
-                This is a 1-by-1 subproblem and is not solved
-                explicitly.
+          Step (1R): apply back the new right singular vector matrix
+          to B.
 */
 
-		scopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n);
-	    } else if (nsize <= *smlsiz) {
-
-/*              This is a small subproblem and is solved by SLASDQ. */
-
-		slaset_("A", &nsize, &nsize, &c_b320, &c_b1011, &work[vt +
-			st1], n);
-		slasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[
-			st], &work[vt + st1], n, &work[nwork], n, &b[st +
-			b_dim1], ldb, &work[nwork], info);
-		if (*info != 0) {
-		    return 0;
+	if (*k == 1) {
+	    dcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
+	} else {
+	    i__1 = *k;
+	    for (j = 1; j <= i__1; ++j) {
+		dsigj = poles[j + (poles_dim1 << 1)];
+		if (z__[j] == 0.) {
+		    work[j] = 0.;
+		} else {
+		    work[j] = -z__[j] / difl[j] / (dsigj + poles[j +
+			    poles_dim1]) / difr[j + (difr_dim1 << 1)];
 		}
-		slacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx +
-			st1], n);
-	    } else {
-
-/*              A large problem. Solve it using divide and conquer. */
-
-		slasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], &
-			work[u + st1], n, &work[vt + st1], &iwork[k + st1], &
-			work[difl + st1], &work[difr + st1], &work[z__ + st1],
-			 &work[poles + st1], &iwork[givptr + st1], &iwork[
-			givcol + st1], n, &iwork[perm + st1], &work[givnum +
-			st1], &work[c__ + st1], &work[s + st1], &work[nwork],
-			&iwork[iwk], info);
-		if (*info != 0) {
-		    return 0;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if (z__[j] == 0.) {
+			work[i__] = 0.;
+		    } else {
+			d__1 = -poles[i__ + 1 + (poles_dim1 << 1)];
+			work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[
+				i__ + difr_dim1]) / (dsigj + poles[i__ +
+				poles_dim1]) / difr[i__ + (difr_dim1 << 1)];
+		    }
+/* L60: */
 		}
-		bxst = bx + st1;
-		slalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, &
-			work[bxst], n, &work[u + st1], n, &work[vt + st1], &
-			iwork[k + st1], &work[difl + st1], &work[difr + st1],
-			&work[z__ + st1], &work[poles + st1], &iwork[givptr +
-			st1], &iwork[givcol + st1], n, &iwork[perm + st1], &
-			work[givnum + st1], &work[c__ + st1], &work[s + st1],
-			&work[nwork], &iwork[iwk], info);
-		if (*info != 0) {
-		    return 0;
+		i__2 = *k;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    if (z__[j] == 0.) {
+			work[i__] = 0.;
+		    } else {
+			d__1 = -poles[i__ + (poles_dim1 << 1)];
+			work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[
+				i__]) / (dsigj + poles[i__ + poles_dim1]) /
+				difr[i__ + (difr_dim1 << 1)];
+		    }
+/* L70: */
 		}
+		dgemv_("T", k, nrhs, &c_b15, &b[b_offset], ldb, &work[1], &
+			c__1, &c_b29, &bx[j + bx_dim1], ldbx);
+/* L80: */
 	    }
-	    st = i__ + 1;
 	}
-/* L60: */
-    }
-
-/*     Apply the singular values and treat the tiny ones as zero. */
-
-    tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1));
-
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
 
 /*
-          Some of the elements in D can be negative because 1-by-1
-          subproblems were not solved explicitly.
+          Step (2R): if SQRE = 1, apply back the rotation that is
+          related to the right null space of the subproblem.
 */
 
-	if ((r__1 = d__[i__], dabs(r__1)) <= tol) {
-	    slaset_("A", &c__1, nrhs, &c_b320, &c_b320, &work[bx + i__ - 1],
-		    n);
-	} else {
-	    ++(*rank);
-	    slascl_("G", &c__0, &c__0, &d__[i__], &c_b1011, &c__1, nrhs, &
-		    work[bx + i__ - 1], n, info);
+	if (*sqre == 1) {
+	    dcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
+	    drot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__,
+		    s);
+	}
+	if (*k < max(m,n)) {
+	    i__1 = n - *k;
+	    dlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 +
+		    bx_dim1], ldbx);
 	}
-	d__[i__] = (r__1 = d__[i__], dabs(r__1));
-/* L70: */
-    }
 
-/*     Now apply back the right singular vectors. */
+/*        Step (3R): permute rows of B. */
 
-    icmpq2 = 1;
-    i__1 = nsub;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	st = iwork[i__];
-	st1 = st - 1;
-	nsize = iwork[sizei + i__ - 1];
-	bxst = bx + st1;
-	if (nsize == 1) {
-	    scopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb);
-	} else if (nsize <= *smlsiz) {
-	    sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1011, &work[vt + st1],
-		    n, &work[bxst], n, &c_b320, &b[st + b_dim1], ldb);
-	} else {
-	    slalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st +
-		    b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[
-		    k + st1], &work[difl + st1], &work[difr + st1], &work[z__
-		    + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[
-		    givcol + st1], n, &iwork[perm + st1], &work[givnum + st1],
-		     &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[
-		    iwk], info);
-	    if (*info != 0) {
-		return 0;
-	    }
+	dcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
+	if (*sqre == 1) {
+	    dcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
+	}
+	i__1 = n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    dcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1],
+		    ldb);
+/* L90: */
 	}
-/* L80: */
-    }
 
-/*     Unscale and sort the singular values. */
+/*        Step (4R): apply back the Givens rotations performed. */
 
-    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, n, &c__1, &d__[1], n, info);
-    slasrt_("D", n, &d__[1], info);
-    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, nrhs, &b[b_offset], ldb,
-	    info);
+	for (i__ = *givptr; i__ >= 1; --i__) {
+	    d__1 = -givnum[i__ + givnum_dim1];
+	    drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
+		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ +
+		    (givnum_dim1 << 1)], &d__1);
+/* L100: */
+	}
+    }
 
     return 0;
 
-/*     End of SLALSD */
+/*     End of DLALS0 */
 
-} /* slalsd_ */
+} /* dlals0_ */
 
-doublereal slamch_(char *cmach)
+/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n,
+	integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer *
+	ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k,
+	doublereal *difl, doublereal *difr, doublereal *z__, doublereal *
+	poles, integer *givptr, integer *givcol, integer *ldgcol, integer *
+	perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *
+	work, integer *iwork, integer *info)
 {
-    /* Initialized data */
-
-    static logical first = TRUE_;
-
     /* System generated locals */
-    integer i__1;
-    real ret_val;
+    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1,
+	    b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1,
+	    difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset,
+	     u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1,
+	    i__2;
 
     /* Builtin functions */
-    double pow_ri(real *, integer *);
+    integer pow_ii(integer *, integer *);
 
     /* Local variables */
-    static real t;
-    static integer it;
-    static real rnd, eps, base;
-    static integer beta;
-    static real emin, prec, emax;
-    static integer imin, imax;
-    static logical lrnd;
-    static real rmin, rmax, rmach;
-    extern logical lsame_(char *, char *);
-    static real small, sfmin;
-    extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real
-	    *, integer *, real *, integer *, real *);
+    static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl,
+	    ndb1, nlp1, lvl2, nrp1, nlvl, sqre;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer inode, ndiml, ndimr;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlals0_(integer *, integer *, integer *,
+	     integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *), dlasdt_(integer *, integer *, integer *, integer *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+       June 30, 1999
 
 
     Purpose
     =======
 
-    SLAMCH determines single precision machine parameters.
+    DLALSA is an itermediate step in solving the least squares problem
+    by computing the SVD of the coefficient matrix in compact form (The
+    singular vectors are computed as products of simple orthorgonal
+    matrices.).
+
+    If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector
+    matrix of an upper bidiagonal matrix to the right hand side; and if
+    ICOMPQ = 1, DLALSA applies the right singular vector matrix to the
+    right hand side. The singular vector matrices were generated in
+    compact form by DLALSA.
 
     Arguments
     =========
 
-    CMACH   (input) CHARACTER*1
-            Specifies the value to be returned by SLAMCH:
-            = 'E' or 'e',   SLAMCH := eps
-            = 'S' or 's ,   SLAMCH := sfmin
-            = 'B' or 'b',   SLAMCH := base
-            = 'P' or 'p',   SLAMCH := eps*base
-            = 'N' or 'n',   SLAMCH := t
-            = 'R' or 'r',   SLAMCH := rnd
-            = 'M' or 'm',   SLAMCH := emin
-            = 'U' or 'u',   SLAMCH := rmin
-            = 'L' or 'l',   SLAMCH := emax
-            = 'O' or 'o',   SLAMCH := rmax
-
-            where
-
-            eps   = relative machine precision
-            sfmin = safe minimum, such that 1/sfmin does not overflow
-            base  = base of the machine
-            prec  = eps*base
-            t     = number of (base) digits in the mantissa
-            rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise
-            emin  = minimum exponent before (gradual) underflow
-            rmin  = underflow threshold - base**(emin-1)
-            emax  = largest exponent before overflow
-            rmax  = overflow threshold  - (base**emax)*(1-eps)
 
-   =====================================================================
-*/
+    ICOMPQ (input) INTEGER
+           Specifies whether the left or the right singular vector
+           matrix is involved.
+           = 0: Left singular vector matrix
+           = 1: Right singular vector matrix
 
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
 
-    if (first) {
-	first = FALSE_;
-	slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
-	base = (real) beta;
-	t = (real) it;
-	if (lrnd) {
-	    rnd = 1.f;
-	    i__1 = 1 - it;
-	    eps = pow_ri(&base, &i__1) / 2;
-	} else {
-	    rnd = 0.f;
-	    i__1 = 1 - it;
-	    eps = pow_ri(&base, &i__1);
-	}
-	prec = eps * base;
-	emin = (real) imin;
-	emax = (real) imax;
-	sfmin = rmin;
-	small = 1.f / rmax;
-	if (small >= sfmin) {
+    N      (input) INTEGER
+           The row and column dimensions of the upper bidiagonal matrix.
 
-/*
-             Use SMALL plus a bit, to avoid the possibility of rounding
-             causing overflow when computing  1/sfmin.
-*/
+    NRHS   (input) INTEGER
+           The number of columns of B and BX. NRHS must be at least 1.
 
-	    sfmin = small * (eps + 1.f);
-	}
-    }
+    B      (input) DOUBLE PRECISION array, dimension ( LDB, NRHS )
+           On input, B contains the right hand sides of the least
+           squares problem in rows 1 through M. On output, B contains
+           the solution X in rows 1 through N.
 
-    if (lsame_(cmach, "E")) {
-	rmach = eps;
-    } else if (lsame_(cmach, "S")) {
-	rmach = sfmin;
-    } else if (lsame_(cmach, "B")) {
-	rmach = base;
-    } else if (lsame_(cmach, "P")) {
-	rmach = prec;
-    } else if (lsame_(cmach, "N")) {
-	rmach = t;
-    } else if (lsame_(cmach, "R")) {
-	rmach = rnd;
-    } else if (lsame_(cmach, "M")) {
-	rmach = emin;
-    } else if (lsame_(cmach, "U")) {
-	rmach = rmin;
-    } else if (lsame_(cmach, "L")) {
-	rmach = emax;
-    } else if (lsame_(cmach, "O")) {
-	rmach = rmax;
-    }
+    LDB    (input) INTEGER
+           The leading dimension of B in the calling subprogram.
+           LDB must be at least max(1,MAX( M, N ) ).
 
-    ret_val = rmach;
-    return ret_val;
+    BX     (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
+           On exit, the result of applying the left or right singular
+           vector matrix to B.
 
-/*     End of SLAMCH */
+    LDBX   (input) INTEGER
+           The leading dimension of BX.
 
-} /* slamch_ */
+    U      (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
+           On entry, U contains the left singular vector matrices of all
+           subproblems at the bottom level.
 
+    LDU    (input) INTEGER, LDU = > N.
+           The leading dimension of arrays U, VT, DIFL, DIFR,
+           POLES, GIVNUM, and Z.
 
-/* *********************************************************************** */
+    VT     (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
+           On entry, VT' contains the right singular vector matrices of
+           all subproblems at the bottom level.
 
-/* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical
-	*ieee1)
-{
-    /* Initialized data */
+    K      (input) INTEGER array, dimension ( N ).
 
-    static logical first = TRUE_;
+    DIFL   (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+           where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
 
-    /* System generated locals */
-    real r__1, r__2;
+    DIFR   (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+           On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+           distances between singular values on the I-th level and
+           singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+           record the normalizing factors of the right singular vectors
+           matrices of subproblems on I-th level.
 
-    /* Local variables */
-    static real a, b, c__, f, t1, t2;
-    static integer lt;
-    static real one, qtr;
-    static logical lrnd;
-    static integer lbeta;
-    static real savec;
-    static logical lieee1;
-    extern doublereal slamc3_(real *, real *);
+    Z      (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+           On entry, Z(1, I) contains the components of the deflation-
+           adjusted updating row vector for subproblems on the I-th
+           level.
 
+    POLES  (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+           On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+           singular values involved in the secular equations on the I-th
+           level.
 
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    GIVPTR (input) INTEGER array, dimension ( N ).
+           On entry, GIVPTR( I ) records the number of Givens
+           rotations performed on the I-th problem on the computation
+           tree.
+
+    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+           On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+           locations of Givens rotations performed on the I-th level on
+           the computation tree.
 
+    LDGCOL (input) INTEGER, LDGCOL = > N.
+           The leading dimension of arrays GIVCOL and PERM.
 
-    Purpose
-    =======
+    PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
+           On entry, PERM(*, I) records permutations done on the I-th
+           level of the computation tree.
 
-    SLAMC1 determines the machine parameters given by BETA, T, RND, and
-    IEEE1.
+    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+           On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+           values of Givens rotations performed on the I-th level on the
+           computation tree.
 
-    Arguments
-    =========
+    C      (input) DOUBLE PRECISION array, dimension ( N ).
+           On entry, if the I-th subproblem is not square,
+           C( I ) contains the C-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
 
-    BETA    (output) INTEGER
-            The base of the machine.
+    S      (input) DOUBLE PRECISION array, dimension ( N ).
+           On entry, if the I-th subproblem is not square,
+           S( I ) contains the S-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
 
-    T       (output) INTEGER
-            The number of ( BETA ) digits in the mantissa.
+    WORK   (workspace) DOUBLE PRECISION array.
+           The dimension must be at least N.
 
-    RND     (output) LOGICAL
-            Specifies whether proper rounding  ( RND = .TRUE. )  or
-            chopping  ( RND = .FALSE. )  occurs in addition. This may not
-            be a reliable guide to the way in which the machine performs
-            its arithmetic.
+    IWORK  (workspace) INTEGER array.
+           The dimension must be at least 3 * N
 
-    IEEE1   (output) LOGICAL
-            Specifies whether rounding appears to be done in the IEEE
-            'round to nearest' style.
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
 
     Further Details
     ===============
 
-    The routine is based on the routine  ENVRON  by Malcolm and
-    incorporates suggestions by Gentleman and Marovich. See
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    bx_dim1 = *ldbx;
+    bx_offset = 1 + bx_dim1;
+    bx -= bx_offset;
+    givnum_dim1 = *ldu;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    poles_dim1 = *ldu;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    z_dim1 = *ldu;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    difr_dim1 = *ldu;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    difl_dim1 = *ldu;
+    difl_offset = 1 + difl_dim1;
+    difl -= difl_offset;
+    vt_dim1 = *ldu;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    --k;
+    --givptr;
+    perm_dim1 = *ldgcol;
+    perm_offset = 1 + perm_dim1;
+    perm -= perm_offset;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    --c__;
+    --s;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*icompq < 0 || *icompq > 1) {
+	*info = -1;
+    } else if (*smlsiz < 3) {
+	*info = -2;
+    } else if (*n < *smlsiz) {
+	*info = -3;
+    } else if (*nrhs < 1) {
+	*info = -4;
+    } else if (*ldb < *n) {
+	*info = -6;
+    } else if (*ldbx < *n) {
+	*info = -8;
+    } else if (*ldu < *n) {
+	*info = -10;
+    } else if (*ldgcol < *n) {
+	*info = -19;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLALSA", &i__1);
+	return 0;
+    }
+
+/*     Book-keeping and  setting up the computation tree. */
 
-       Malcolm M. A. (1972) Algorithms to reveal properties of
-          floating-point arithmetic. Comms. of the ACM, 15, 949-951.
+    inode = 1;
+    ndiml = inode + *n;
+    ndimr = ndiml + *n;
 
-       Gentleman W. M. and Marovich S. B. (1974) More on algorithms
-          that reveal properties of floating point arithmetic units.
-          Comms. of the ACM, 17, 276-277.
+    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+	    smlsiz);
 
-   =====================================================================
+/*
+       The following code applies back the left singular vector factors.
+       For applying back the right singular vector factors, go to 50.
 */
 
-
-    if (first) {
-	first = FALSE_;
-	one = 1.f;
+    if (*icompq == 1) {
+	goto L50;
+    }
 
 /*
-          LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BETA,
-          IEEE1, T and RND.
-
-          Throughout this routine  we use the function  SLAMC3  to ensure
-          that relevant values are  stored and not held in registers,  or
-          are not affected by optimizers.
-
-          Compute  a = 2.0**m  with the  smallest positive integer m such
-          that
-
-             fl( a + 1.0 ) = a.
+       The nodes on the bottom level of the tree were solved
+       by DLASDQ. The corresponding left and right singular vector
+       matrices are in explicit form. First apply back the left
+       singular vector matrices.
 */
 
-	a = 1.f;
-	c__ = 1.f;
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
 
-/* +       WHILE( C.EQ.ONE )LOOP */
-L10:
-	if (c__ == one) {
-	    a *= 2;
-	    c__ = slamc3_(&a, &one);
-	    r__1 = -a;
-	    c__ = slamc3_(&c__, &r__1);
-	    goto L10;
-	}
 /*
-   +       END WHILE
+          IC : center row of each node
+          NL : number of rows of left  subproblem
+          NR : number of rows of right subproblem
+          NLF: starting row of the left   subproblem
+          NRF: starting row of the right  subproblem
+*/
 
-          Now compute  b = 2.0**m  with the smallest positive integer m
-          such that
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nr = iwork[ndimr + i1];
+	nlf = ic - nl;
+	nrf = ic + 1;
+	dgemm_("T", "N", &nl, nrhs, &nl, &c_b15, &u[nlf + u_dim1], ldu, &b[
+		nlf + b_dim1], ldb, &c_b29, &bx[nlf + bx_dim1], ldbx);
+	dgemm_("T", "N", &nr, nrhs, &nr, &c_b15, &u[nrf + u_dim1], ldu, &b[
+		nrf + b_dim1], ldb, &c_b29, &bx[nrf + bx_dim1], ldbx);
+/* L10: */
+    }
 
-             fl( a + b ) .gt. a.
+/*
+       Next copy the rows of B that correspond to unchanged rows
+       in the bidiagonal matrix to BX.
 */
 
-	b = 1.f;
-	c__ = slamc3_(&a, &b);
+    i__1 = nd;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	ic = iwork[inode + i__ - 1];
+	dcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx);
+/* L20: */
+    }
 
-/* +       WHILE( C.EQ.A )LOOP */
-L20:
-	if (c__ == a) {
-	    b *= 2;
-	    c__ = slamc3_(&a, &b);
-	    goto L20;
-	}
 /*
-   +       END WHILE
-
-          Now compute the base.  a and c  are neighbouring floating point
-          numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and so
-          their difference is beta. Adding 0.25 to c is to ensure that it
-          is truncated to beta and not ( beta - 1 ).
+       Finally go through the left singular vector matrices of all
+       the other subproblems bottom-up on the tree.
 */
 
-	qtr = one / 4;
-	savec = c__;
-	r__1 = -a;
-	c__ = slamc3_(&c__, &r__1);
-	lbeta = c__ + qtr;
+    j = pow_ii(&c__2, &nlvl);
+    sqre = 0;
+
+    for (lvl = nlvl; lvl >= 1; --lvl) {
+	lvl2 = (lvl << 1) - 1;
 
 /*
-          Now determine whether rounding or chopping occurs,  by adding a
-          bit  less  than  beta/2  and a  bit  more  than  beta/2  to  a.
+          find the first node LF and last node LL on
+          the current level LVL
 */
 
-	b = (real) lbeta;
-	r__1 = b / 2;
-	r__2 = -b / 100;
-	f = slamc3_(&r__1, &r__2);
-	c__ = slamc3_(&f, &a);
-	if (c__ == a) {
-	    lrnd = TRUE_;
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
 	} else {
-	    lrnd = FALSE_;
+	    i__1 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__1);
+	    ll = (lf << 1) - 1;
 	}
-	r__1 = b / 2;
-	r__2 = b / 100;
-	f = slamc3_(&r__1, &r__2);
-	c__ = slamc3_(&f, &a);
-	if (lrnd && c__ == a) {
-	    lrnd = FALSE_;
+	i__1 = ll;
+	for (i__ = lf; i__ <= i__1; ++i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    --j;
+	    dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, &
+		    b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], &
+		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
+		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
+		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
+		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
+		    j], &s[j], &work[1], info);
+/* L30: */
 	}
+/* L40: */
+    }
+    goto L90;
+
+/*     ICOMPQ = 1: applying back the right singular vector factors. */
+
+L50:
 
 /*
-          Try and decide whether rounding is done in the  IEEE  'round to
-          nearest' style. B/2 is half a unit in the last place of the two
-          numbers A and SAVEC. Furthermore, A is even, i.e. has last  bit
-          zero, and SAVEC is odd. Thus adding B/2 to A should not  change
-          A, but adding B/2 to SAVEC should change SAVEC.
+       First now go through the right singular vector matrices of all
+       the tree nodes top-down.
 */
 
-	r__1 = b / 2;
-	t1 = slamc3_(&r__1, &a);
-	r__1 = b / 2;
-	t2 = slamc3_(&r__1, &savec);
-	lieee1 = t1 == a && t2 > savec && lrnd;
+    j = 0;
+    i__1 = nlvl;
+    for (lvl = 1; lvl <= i__1; ++lvl) {
+	lvl2 = (lvl << 1) - 1;
 
 /*
-          Now find  the  mantissa, t.  It should  be the  integer part of
-          log to the base beta of a,  however it is safer to determine  t
-          by powering.  So we find t as the smallest positive integer for
-          which
-
-             fl( beta**t + 1.0 ) = 1.0.
+          Find the first node LF and last node LL on
+          the current level LVL.
 */
 
-	lt = 0;
-	a = 1.f;
-	c__ = 1.f;
-
-/* +       WHILE( C.EQ.ONE )LOOP */
-L30:
-	if (c__ == one) {
-	    ++lt;
-	    a *= lbeta;
-	    c__ = slamc3_(&a, &one);
-	    r__1 = -a;
-	    c__ = slamc3_(&c__, &r__1);
-	    goto L30;
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__2 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__2);
+	    ll = (lf << 1) - 1;
 	}
-/* +       END WHILE */
-
+	i__2 = lf;
+	for (i__ = ll; i__ >= i__2; --i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    if (i__ == ll) {
+		sqre = 0;
+	    } else {
+		sqre = 1;
+	    }
+	    ++j;
+	    dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[
+		    nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], &
+		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
+		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
+		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
+		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
+		    j], &s[j], &work[1], info);
+/* L60: */
+	}
+/* L70: */
     }
 
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *ieee1 = lieee1;
-    return 0;
-
-/*     End of SLAMC1 */
-
-} /* slamc1_ */
+/*
+       The nodes on the bottom level of the tree were solved
+       by DLASDQ. The corresponding right singular vector
+       matrices are in explicit form. Apply them back.
+*/
 
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nr = iwork[ndimr + i1];
+	nlp1 = nl + 1;
+	if (i__ == nd) {
+	    nrp1 = nr;
+	} else {
+	    nrp1 = nr + 1;
+	}
+	nlf = ic - nl;
+	nrf = ic + 1;
+	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b15, &vt[nlf + vt_dim1], ldu,
+		&b[nlf + b_dim1], ldb, &c_b29, &bx[nlf + bx_dim1], ldbx);
+	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b15, &vt[nrf + vt_dim1], ldu,
+		&b[nrf + b_dim1], ldb, &c_b29, &bx[nrf + bx_dim1], ldbx);
+/* L80: */
+    }
 
-/* *********************************************************************** */
+L90:
 
-/* Subroutine */ int slamc2_(integer *beta, integer *t, logical *rnd, real *
-	eps, integer *emin, real *rmin, integer *emax, real *rmax)
-{
-    /* Initialized data */
+    return 0;
 
-    static logical first = TRUE_;
-    static logical iwarn = FALSE_;
+/*     End of DLALSA */
 
-    /* Format strings */
-    static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre"
-	    "ct:-\002,\002  EMIN = \002,i8,/\002 If, after inspection, the va"
-	    "lue EMIN looks\002,\002 acceptable please comment out \002,/\002"
-	    " the IF block as marked within the code of routine\002,\002 SLAM"
-	    "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)";
+} /* dlalsa_ */
 
+/* Subroutine */ int dlalsd_(char *uplo, integer *smlsiz, integer *n, integer
+	*nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb,
+	doublereal *rcond, integer *rank, doublereal *work, integer *iwork,
+	integer *info)
+{
     /* System generated locals */
-    integer i__1;
-    real r__1, r__2, r__3, r__4, r__5;
+    integer b_dim1, b_offset, i__1, i__2;
+    doublereal d__1;
 
     /* Builtin functions */
-    double pow_ri(real *, integer *);
-    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);
+    double log(doublereal), d_sign(doublereal *, doublereal *);
 
     /* Local variables */
-    static real a, b, c__;
-    static integer i__, lt;
-    static real one, two;
-    static logical ieee;
-    static real half;
-    static logical lrnd;
-    static real leps, zero;
-    static integer lbeta;
-    static real rbase;
-    static integer lemin, lemax, gnmin;
-    static real small;
-    static integer gpmin;
-    static real third, lrmin, lrmax, sixth;
-    static logical lieee1;
-    extern /* Subroutine */ int slamc1_(integer *, integer *, logical *,
-	    logical *);
-    extern doublereal slamc3_(real *, real *);
-    extern /* Subroutine */ int slamc4_(integer *, real *, integer *),
-	    slamc5_(integer *, integer *, integer *, logical *, integer *,
-	    real *);
-    static integer ngnmin, ngpmin;
-
-    /* Fortran I/O blocks */
-    static cilist io___3081 = { 0, 6, 0, fmt_9999, 0 };
+    static integer c__, i__, j, k;
+    static doublereal r__;
+    static integer s, u, z__;
+    static doublereal cs;
+    static integer bx;
+    static doublereal sn;
+    static integer st, vt, nm1, st1;
+    static doublereal eps;
+    static integer iwk;
+    static doublereal tol;
+    static integer difl, difr, perm, nsub;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer nlvl, sqre, bxst;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *),
+	     dcopy_(integer *, doublereal *, integer *, doublereal *, integer
+	    *);
+    static integer poles, sizei, nsize, nwork, icmpq1, icmpq2;
+
+    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *), dlalsa_(integer *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     integer *, integer *), dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *), dlaset_(char *, integer *, integer *,
+	     doublereal *, doublereal *, doublereal *, integer *),
+	    xerbla_(char *, integer *);
+    static integer givcol;
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
+    static doublereal orgnrm;
+    static integer givnum, givptr, smlszp;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+       October 31, 1999
 
 
     Purpose
     =======
 
-    SLAMC2 determines the machine parameters specified in its argument
-    list.
+    DLALSD uses the singular value decomposition of A to solve the least
+    squares problem of finding X to minimize the Euclidean norm of each
+    column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
+    are N-by-NRHS. The solution X overwrites B.
+
+    The singular values of A smaller than RCOND times the largest
+    singular value are treated as zero in solving the least squares
+    problem; in this case a minimum norm solution is returned.
+    The actual singular values are returned in D in ascending order.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
 
     Arguments
     =========
 
-    BETA    (output) INTEGER
-            The base of the machine.
+    UPLO   (input) CHARACTER*1
+           = 'U': D and E define an upper bidiagonal matrix.
+           = 'L': D and E define a  lower bidiagonal matrix.
 
-    T       (output) INTEGER
-            The number of ( BETA ) digits in the mantissa.
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
 
-    RND     (output) LOGICAL
-            Specifies whether proper rounding  ( RND = .TRUE. )  or
-            chopping  ( RND = .FALSE. )  occurs in addition. This may not
-            be a reliable guide to the way in which the machine performs
-            its arithmetic.
+    N      (input) INTEGER
+           The dimension of the  bidiagonal matrix.  N >= 0.
 
-    EPS     (output) REAL
-            The smallest positive number such that
+    NRHS   (input) INTEGER
+           The number of columns of B. NRHS must be at least 1.
 
-               fl( 1.0 - EPS ) .LT. 1.0,
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry D contains the main diagonal of the bidiagonal
+           matrix. On exit, if INFO = 0, D contains its singular values.
 
-            where fl denotes the computed value.
+    E      (input) DOUBLE PRECISION array, dimension (N-1)
+           Contains the super-diagonal entries of the bidiagonal matrix.
+           On exit, E has been destroyed.
 
-    EMIN    (output) INTEGER
-            The minimum exponent before (gradual) underflow occurs.
+    B      (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+           On input, B contains the right hand sides of the least
+           squares problem. On output, B contains the solution X.
 
-    RMIN    (output) REAL
-            The smallest normalized number for the machine, given by
-            BASE**( EMIN - 1 ), where  BASE  is the floating point value
-            of BETA.
+    LDB    (input) INTEGER
+           The leading dimension of B in the calling subprogram.
+           LDB must be at least max(1,N).
 
-    EMAX    (output) INTEGER
-            The maximum exponent before overflow occurs.
+    RCOND  (input) DOUBLE PRECISION
+           The singular values of A less than or equal to RCOND times
+           the largest singular value are treated as zero in solving
+           the least squares problem. If RCOND is negative,
+           machine precision is used instead.
+           For example, if diag(S)*X=B were the least squares problem,
+           where diag(S) is a diagonal matrix of singular values, the
+           solution would be X(i) = B(i) / S(i) if S(i) is greater than
+           RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
+           RCOND*max(S).
 
-    RMAX    (output) REAL
-            The largest positive number for the machine, given by
-            BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point
-            value of BETA.
+    RANK   (output) INTEGER
+           The number of singular values of A greater than RCOND times
+           the largest singular value.
 
-    Further Details
-    ===============
+    WORK   (workspace) DOUBLE PRECISION array, dimension at least
+           (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),
+           where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).
 
-    The computation of  EPS  is based on a routine PARANOIA by
-    W. Kahan of the University of California at Berkeley.
+    IWORK  (workspace) INTEGER array, dimension at least
+           (3*N*NLVL + 11*N)
 
-   =====================================================================
-*/
+    INFO   (output) INTEGER
+           = 0:  successful exit.
+           < 0:  if INFO = -i, the i-th argument had an illegal value.
+           > 0:  The algorithm failed to compute an singular value while
+                 working on the submatrix lying in rows and columns
+                 INFO/(N+1) through MOD(INFO,N+1).
 
+    Further Details
+    ===============
 
-    if (first) {
-	first = FALSE_;
-	zero = 0.f;
-	one = 1.f;
-	two = 2.f;
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
 
-/*
-          LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values of
-          BETA, T, RND, EPS, EMIN and RMIN.
+    =====================================================================
 
-          Throughout this routine  we use the function  SLAMC3  to ensure
-          that relevant values are stored  and not held in registers,  or
-          are not affected by optimizers.
 
-          SLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1.
+       Test the input parameters.
 */
 
-	slamc1_(&lbeta, &lt, &lrnd, &lieee1);
-
-/*        Start to find EPS. */
-
-	b = (real) lbeta;
-	i__1 = -lt;
-	a = pow_ri(&b, &i__1);
-	leps = a;
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --work;
+    --iwork;
 
-/*        Try some tricks to see whether or not this is the correct  EPS. */
+    /* Function Body */
+    *info = 0;
 
-	b = two / 3;
-	half = one / 2;
-	r__1 = -half;
-	sixth = slamc3_(&b, &r__1);
-	third = slamc3_(&sixth, &sixth);
-	r__1 = -half;
-	b = slamc3_(&third, &r__1);
-	b = slamc3_(&b, &sixth);
-	b = dabs(b);
-	if (b < leps) {
-	    b = leps;
-	}
+    if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 1) {
+	*info = -4;
+    } else if (*ldb < 1 || *ldb < *n) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLALSD", &i__1);
+	return 0;
+    }
 
-	leps = 1.f;
+    eps = EPSILON;
 
-/* +       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
-L10:
-	if (leps > b && b > zero) {
-	    leps = b;
-	    r__1 = half * leps;
-/* Computing 5th power */
-	    r__3 = two, r__4 = r__3, r__3 *= r__3;
-/* Computing 2nd power */
-	    r__5 = leps;
-	    r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5);
-	    c__ = slamc3_(&r__1, &r__2);
-	    r__1 = -c__;
-	    c__ = slamc3_(&half, &r__1);
-	    b = slamc3_(&half, &c__);
-	    r__1 = -b;
-	    c__ = slamc3_(&half, &r__1);
-	    b = slamc3_(&half, &c__);
-	    goto L10;
-	}
-/* +       END WHILE */
+/*     Set up the tolerance. */
 
-	if (a < leps) {
-	    leps = a;
-	}
+    if (*rcond <= 0. || *rcond >= 1.) {
+	*rcond = eps;
+    }
 
-/*
-          Computation of EPS complete.
+    *rank = 0;
 
-          Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3)).
-          Keep dividing  A by BETA until (gradual) underflow occurs. This
-          is detected when we cannot recover the previous A.
-*/
+/*     Quick return if possible. */
 
-	rbase = one / lbeta;
-	small = one;
-	for (i__ = 1; i__ <= 3; ++i__) {
-	    r__1 = small * rbase;
-	    small = slamc3_(&r__1, &zero);
-/* L20: */
+    if (*n == 0) {
+	return 0;
+    } else if (*n == 1) {
+	if (d__[1] == 0.) {
+	    dlaset_("A", &c__1, nrhs, &c_b29, &c_b29, &b[b_offset], ldb);
+	} else {
+	    *rank = 1;
+	    dlascl_("G", &c__0, &c__0, &d__[1], &c_b15, &c__1, nrhs, &b[
+		    b_offset], ldb, info);
+	    d__[1] = abs(d__[1]);
 	}
-	a = slamc3_(&one, &small);
-	slamc4_(&ngpmin, &one, &lbeta);
-	r__1 = -one;
-	slamc4_(&ngnmin, &r__1, &lbeta);
-	slamc4_(&gpmin, &a, &lbeta);
-	r__1 = -a;
-	slamc4_(&gnmin, &r__1, &lbeta);
-	ieee = FALSE_;
-
-	if (ngpmin == ngnmin && gpmin == gnmin) {
-	    if (ngpmin == gpmin) {
-		lemin = ngpmin;
-/*
-              ( Non twos-complement machines, no gradual underflow;
-                e.g.,  VAX )
-*/
-	    } else if (gpmin - ngpmin == 3) {
-		lemin = ngpmin - 1 + lt;
-		ieee = TRUE_;
-/*
-              ( Non twos-complement machines, with gradual underflow;
-                e.g., IEEE standard followers )
-*/
-	    } else {
-		lemin = min(ngpmin,gpmin);
-/*            ( A guess; no known machine ) */
-		iwarn = TRUE_;
-	    }
+	return 0;
+    }
 
-	} else if (ngpmin == gpmin && ngnmin == gnmin) {
-	    if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
-		lemin = max(ngpmin,ngnmin);
-/*
-              ( Twos-complement machines, no gradual underflow;
-                e.g., CYBER 205 )
-*/
-	    } else {
-		lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-		iwarn = TRUE_;
-	    }
+/*     Rotate the matrix if it is lower bidiagonal. */
 
-	} else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
-		 {
-	    if (gpmin - min(ngpmin,ngnmin) == 3) {
-		lemin = max(ngpmin,ngnmin) - 1 + lt;
-/*
-              ( Twos-complement machines with gradual underflow;
-                no known machine )
-*/
+    if (*(unsigned char *)uplo == 'L') {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (*nrhs == 1) {
+		drot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], &
+			c__1, &cs, &sn);
 	    } else {
-		lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-		iwarn = TRUE_;
+		work[(i__ << 1) - 1] = cs;
+		work[i__ * 2] = sn;
 	    }
-
-	} else {
-/* Computing MIN */
-	    i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
-	    lemin = min(i__1,gnmin);
-/*         ( A guess; no known machine ) */
-	    iwarn = TRUE_;
-	}
-/*
-   **
-   Comment out this if block if EMIN is ok
-*/
-	if (iwarn) {
-	    first = TRUE_;
-	    s_wsfe(&io___3081);
-	    do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer));
-	    e_wsfe();
+/* L10: */
 	}
-/*
-   **
-
-          Assume IEEE arithmetic if we found denormalised  numbers above,
-          or if arithmetic seems to round in the  IEEE style,  determined
-          in routine SLAMC1. A true IEEE machine should have both  things
-          true; however, faulty machines may have one or the other.
-*/
-
-	ieee = (ieee) || (lieee1);
-
-/*
-          Compute  RMIN by successive division by  BETA. We could compute
-          RMIN as BASE**( EMIN - 1 ),  but some machines underflow during
-          this computation.
-*/
-
-	lrmin = 1.f;
-	i__1 = 1 - lemin;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    r__1 = lrmin * rbase;
-	    lrmin = slamc3_(&r__1, &zero);
+	if (*nrhs > 1) {
+	    i__1 = *nrhs;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		i__2 = *n - 1;
+		for (j = 1; j <= i__2; ++j) {
+		    cs = work[(j << 1) - 1];
+		    sn = work[j * 2];
+		    drot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ *
+			     b_dim1], &c__1, &cs, &sn);
+/* L20: */
+		}
 /* L30: */
+	    }
 	}
-
-/*        Finally, call SLAMC5 to compute EMAX and RMAX. */
-
-	slamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
     }
 
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *eps = leps;
-    *emin = lemin;
-    *rmin = lrmin;
-    *emax = lemax;
-    *rmax = lrmax;
-
-    return 0;
-
-
-/*     End of SLAMC2 */
-
-} /* slamc2_ */
-
-
-/* *********************************************************************** */
-
-doublereal slamc3_(real *a, real *b)
-{
-    /* System generated locals */
-    real ret_val;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    SLAMC3  is intended to force  A  and  B  to be stored prior to doing
-    the addition of  A  and  B ,  for use in situations where optimizers
-    might hold one of these in a register.
-
-    Arguments
-    =========
-
-    A, B    (input) REAL
-            The values A and B.
-
-   =====================================================================
-*/
-
-
-    ret_val = *a + *b;
-
-    return ret_val;
-
-/*     End of SLAMC3 */
-
-} /* slamc3_ */
-
-
-/* *********************************************************************** */
-
-/* Subroutine */ int slamc4_(integer *emin, real *start, integer *base)
-{
-    /* System generated locals */
-    integer i__1;
-    real r__1;
+/*     Scale. */
 
-    /* Local variables */
-    static real a;
-    static integer i__;
-    static real b1, b2, c1, c2, d1, d2, one, zero, rbase;
-    extern doublereal slamc3_(real *, real *);
+    nm1 = *n - 1;
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	dlaset_("A", n, nrhs, &c_b29, &c_b29, &b[b_offset], ldb);
+	return 0;
+    }
 
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, info);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1,
+	    info);
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    SLAMC4 is a service routine for SLAMC2.
-
-    Arguments
-    =========
-
-    EMIN    (output) EMIN
-            The minimum exponent before (gradual) underflow, computed by
-            setting A = START and dividing by BASE until the previous A
-            can not be recovered.
-
-    START   (input) REAL
-            The starting point for determining EMIN.
-
-    BASE    (input) INTEGER
-            The base of the machine.
-
-   =====================================================================
+       If N is smaller than the minimum divide size SMLSIZ, then solve
+       the problem with another solver.
 */
 
-
-    a = *start;
-    one = 1.f;
-    rbase = one / *base;
-    zero = 0.f;
-    *emin = 1;
-    r__1 = a * rbase;
-    b1 = slamc3_(&r__1, &zero);
-    c1 = a;
-    c2 = a;
-    d1 = a;
-    d2 = a;
-/*
-   +    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.
-      $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP
-*/
-L10:
-    if (c1 == a && c2 == a && d1 == a && d2 == a) {
-	--(*emin);
-	a = b1;
-	r__1 = a / *base;
-	b1 = slamc3_(&r__1, &zero);
-	r__1 = b1 * *base;
-	c1 = slamc3_(&r__1, &zero);
-	d1 = zero;
-	i__1 = *base;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    d1 += b1;
-/* L20: */
+    if (*n <= *smlsiz) {
+	nwork = *n * *n + 1;
+	dlaset_("A", n, n, &c_b29, &c_b15, &work[1], n);
+	dlasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, &
+		work[1], n, &b[b_offset], ldb, &work[nwork], info);
+	if (*info != 0) {
+	    return 0;
 	}
-	r__1 = a * rbase;
-	b2 = slamc3_(&r__1, &zero);
-	r__1 = b2 / rbase;
-	c2 = slamc3_(&r__1, &zero);
-	d2 = zero;
-	i__1 = *base;
+	tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
+	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
-	    d2 += b2;
-/* L30: */
-	}
-	goto L10;
-    }
-/* +    END WHILE */
-
-    return 0;
-
-/*     End of SLAMC4 */
-
-} /* slamc4_ */
-
-
-/* *********************************************************************** */
-
-/* Subroutine */ int slamc5_(integer *beta, integer *p, integer *emin,
-	logical *ieee, integer *emax, real *rmax)
-{
-    /* System generated locals */
-    integer i__1;
-    real r__1;
+	    if (d__[i__] <= tol) {
+		dlaset_("A", &c__1, nrhs, &c_b29, &c_b29, &b[i__ + b_dim1],
+			ldb);
+	    } else {
+		dlascl_("G", &c__0, &c__0, &d__[i__], &c_b15, &c__1, nrhs, &b[
+			i__ + b_dim1], ldb, info);
+		++(*rank);
+	    }
+/* L40: */
+	}
+	dgemm_("T", "N", n, nrhs, n, &c_b15, &work[1], n, &b[b_offset], ldb, &
+		c_b29, &work[nwork], n);
+	dlacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb);
 
-    /* Local variables */
-    static integer i__;
-    static real y, z__;
-    static integer try__, lexp;
-    static real oldy;
-    static integer uexp, nbits;
-    extern doublereal slamc3_(real *, real *);
-    static real recbas;
-    static integer exbits, expsum;
+/*        Unscale. */
 
+	dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n,
+		info);
+	dlasrt_("D", n, &d__[1], info);
+	dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, nrhs, &b[b_offset],
+		ldb, info);
 
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+	return 0;
+    }
 
+/*     Book-keeping and setting up some constants. */
 
-    Purpose
-    =======
+    nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) /
+	    log(2.)) + 1;
 
-    SLAMC5 attempts to compute RMAX, the largest machine floating-point
-    number, without overflow.  It assumes that EMAX + abs(EMIN) sum
-    approximately to a power of 2.  It will fail on machines where this
-    assumption does not hold, for example, the Cyber 205 (EMIN = -28625,
-    EMAX = 28718).  It will also fail if the value supplied for EMIN is
-    too large (i.e. too close to zero), probably with overflow.
+    smlszp = *smlsiz + 1;
 
-    Arguments
-    =========
+    u = 1;
+    vt = *smlsiz * *n + 1;
+    difl = vt + smlszp * *n;
+    difr = difl + nlvl * *n;
+    z__ = difr + (nlvl * *n << 1);
+    c__ = z__ + nlvl * *n;
+    s = c__ + *n;
+    poles = s + *n;
+    givnum = poles + (nlvl << 1) * *n;
+    bx = givnum + (nlvl << 1) * *n;
+    nwork = bx + *n * *nrhs;
 
-    BETA    (input) INTEGER
-            The base of floating-point arithmetic.
+    sizei = *n + 1;
+    k = sizei + *n;
+    givptr = k + *n;
+    perm = givptr + *n;
+    givcol = perm + nlvl * *n;
+    iwk = givcol + (nlvl * *n << 1);
 
-    P       (input) INTEGER
-            The number of base BETA digits in the mantissa of a
-            floating-point value.
+    st = 1;
+    sqre = 0;
+    icmpq1 = 1;
+    icmpq2 = 0;
+    nsub = 0;
 
-    EMIN    (input) INTEGER
-            The minimum exponent before (gradual) underflow.
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) < eps) {
+	    d__[i__] = d_sign(&eps, &d__[i__]);
+	}
+/* L50: */
+    }
 
-    IEEE    (input) LOGICAL
-            A logical flag specifying whether or not the arithmetic
-            system is thought to comply with the IEEE standard.
+    i__1 = nm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) {
+	    ++nsub;
+	    iwork[nsub] = st;
 
-    EMAX    (output) INTEGER
-            The largest exponent before overflow
+/*
+             Subproblem found. First determine its size and then
+             apply divide and conquer on it.
+*/
 
-    RMAX    (output) REAL
-            The largest machine floating-point number.
+	    if (i__ < nm1) {
 
-   =====================================================================
+/*              A subproblem with E(I) small for I < NM1. */
 
+		nsize = i__ - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
 
-       First compute LEXP and UEXP, two powers of 2 that bound
-       abs(EMIN). We then assume that EMAX + abs(EMIN) will sum
-       approximately to the bound that is closest to abs(EMIN).
-       (EMAX is the exponent of the required number RMAX).
-*/
+/*              A subproblem with E(NM1) not too small but I = NM1. */
 
-    lexp = 1;
-    exbits = 1;
-L10:
-    try__ = (lexp) << (1);
-    if (try__ <= -(*emin)) {
-	lexp = try__;
-	++exbits;
-	goto L10;
-    }
-    if (lexp == -(*emin)) {
-	uexp = lexp;
-    } else {
-	uexp = try__;
-	++exbits;
-    }
+		nsize = *n - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+	    } else {
 
 /*
-       Now -LEXP is less than or equal to EMIN, and -UEXP is greater
-       than or equal to EMIN. EXBITS is the number of bits needed to
-       store the exponent.
+                A subproblem with E(NM1) small. This implies an
+                1-by-1 subproblem at D(N), which is not solved
+                explicitly.
 */
 
-    if (uexp + *emin > -lexp - *emin) {
-	expsum = (lexp) << (1);
-    } else {
-	expsum = (uexp) << (1);
-    }
+		nsize = i__ - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+		++nsub;
+		iwork[nsub] = *n;
+		iwork[sizei + nsub - 1] = 1;
+		dcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n);
+	    }
+	    st1 = st - 1;
+	    if (nsize == 1) {
 
 /*
-       EXPSUM is the exponent range, approximately equal to
-       EMAX - EMIN + 1 .
+                This is a 1-by-1 subproblem and is not solved
+                explicitly.
 */
 
-    *emax = expsum + *emin - 1;
-    nbits = exbits + 1 + *p;
+		dcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n);
+	    } else if (nsize <= *smlsiz) {
 
-/*
-       NBITS is the total number of bits needed to store a
-       floating-point number.
-*/
+/*              This is a small subproblem and is solved by DLASDQ. */
 
-    if (nbits % 2 == 1 && *beta == 2) {
+		dlaset_("A", &nsize, &nsize, &c_b29, &c_b15, &work[vt + st1],
+			n);
+		dlasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[
+			st], &work[vt + st1], n, &work[nwork], n, &b[st +
+			b_dim1], ldb, &work[nwork], info);
+		if (*info != 0) {
+		    return 0;
+		}
+		dlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx +
+			st1], n);
+	    } else {
 
-/*
-          Either there are an odd number of bits used to store a
-          floating-point number, which is unlikely, or some bits are
-          not used in the representation of numbers, which is possible,
-          (e.g. Cray machines) or the mantissa has an implicit bit,
-          (e.g. IEEE machines, Dec Vax machines), which is perhaps the
-          most likely. We have to assume the last alternative.
-          If this is true, then we need to reduce EMAX by one because
-          there must be some way of representing zero in an implicit-bit
-          system. On machines like Cray, we are reducing EMAX by one
-          unnecessarily.
-*/
+/*              A large problem. Solve it using divide and conquer. */
 
-	--(*emax);
+		dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], &
+			work[u + st1], n, &work[vt + st1], &iwork[k + st1], &
+			work[difl + st1], &work[difr + st1], &work[z__ + st1],
+			 &work[poles + st1], &iwork[givptr + st1], &iwork[
+			givcol + st1], n, &iwork[perm + st1], &work[givnum +
+			st1], &work[c__ + st1], &work[s + st1], &work[nwork],
+			&iwork[iwk], info);
+		if (*info != 0) {
+		    return 0;
+		}
+		bxst = bx + st1;
+		dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, &
+			work[bxst], n, &work[u + st1], n, &work[vt + st1], &
+			iwork[k + st1], &work[difl + st1], &work[difr + st1],
+			&work[z__ + st1], &work[poles + st1], &iwork[givptr +
+			st1], &iwork[givcol + st1], n, &iwork[perm + st1], &
+			work[givnum + st1], &work[c__ + st1], &work[s + st1],
+			&work[nwork], &iwork[iwk], info);
+		if (*info != 0) {
+		    return 0;
+		}
+	    }
+	    st = i__ + 1;
+	}
+/* L60: */
     }
 
-    if (*ieee) {
+/*     Apply the singular values and treat the tiny ones as zero. */
 
-/*
-          Assume we are on an IEEE machine which reserves one exponent
-          for infinity and NaN.
-*/
+    tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
 
-	--(*emax);
-    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
 
 /*
-       Now create RMAX, the largest machine number, which should
-       be equal to (1.0 - BETA**(-P)) * BETA**EMAX .
-
-       First compute 1.0 - BETA**(-P), being careful that the
-       result is less than 1.0 .
+          Some of the elements in D can be negative because 1-by-1
+          subproblems were not solved explicitly.
 */
 
-    recbas = 1.f / *beta;
-    z__ = *beta - 1.f;
-    y = 0.f;
-    i__1 = *p;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	z__ *= recbas;
-	if (y < 1.f) {
-	    oldy = y;
+	if ((d__1 = d__[i__], abs(d__1)) <= tol) {
+	    dlaset_("A", &c__1, nrhs, &c_b29, &c_b29, &work[bx + i__ - 1], n);
+	} else {
+	    ++(*rank);
+	    dlascl_("G", &c__0, &c__0, &d__[i__], &c_b15, &c__1, nrhs, &work[
+		    bx + i__ - 1], n, info);
 	}
-	y = slamc3_(&y, &z__);
-/* L20: */
-    }
-    if (y >= 1.f) {
-	y = oldy;
+	d__[i__] = (d__1 = d__[i__], abs(d__1));
+/* L70: */
     }
 
-/*     Now multiply by BETA**EMAX to get RMAX. */
+/*     Now apply back the right singular vectors. */
 
-    i__1 = *emax;
+    icmpq2 = 1;
+    i__1 = nsub;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	r__1 = y * *beta;
-	y = slamc3_(&r__1, &c_b320);
-/* L30: */
+	st = iwork[i__];
+	st1 = st - 1;
+	nsize = iwork[sizei + i__ - 1];
+	bxst = bx + st1;
+	if (nsize == 1) {
+	    dcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb);
+	} else if (nsize <= *smlsiz) {
+	    dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b15, &work[vt + st1], n,
+		     &work[bxst], n, &c_b29, &b[st + b_dim1], ldb);
+	} else {
+	    dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st +
+		    b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[
+		    k + st1], &work[difl + st1], &work[difr + st1], &work[z__
+		    + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[
+		    givcol + st1], n, &iwork[perm + st1], &work[givnum + st1],
+		     &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[
+		    iwk], info);
+	    if (*info != 0) {
+		return 0;
+	    }
+	}
+/* L80: */
     }
 
-    *rmax = y;
+/*     Unscale and sort the singular values. */
+
+    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, info);
+    dlasrt_("D", n, &d__[1], info);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, nrhs, &b[b_offset], ldb,
+	    info);
+
     return 0;
 
-/*     End of SLAMC5 */
+/*     End of DLALSD */
 
-} /* slamc5_ */
+} /* dlalsd_ */
 
-/* Subroutine */ int slamrg_(integer *n1, integer *n2, real *a, integer *
-	strd1, integer *strd2, integer *index)
+/* Subroutine */ int dlamrg_(integer *n1, integer *n2, doublereal *a, integer
+	*dtrd1, integer *dtrd2, integer *index)
 {
     /* System generated locals */
     integer i__1;
@@ -80566,7 +15835,7 @@ L10:
     Purpose
     =======
 
-    SLAMRG will create a permutation list which will merge the elements
+    DLAMRG will create a permutation list which will merge the elements
     of A (which is composed of two independently sorted sets) into a
     single set which is sorted in ascending order.
 
@@ -80578,17 +15847,17 @@ L10:
            These arguements contain the respective lengths of the two
            sorted lists to be merged.
 
-    A      (input) REAL array, dimension (N1+N2)
+    A      (input) DOUBLE PRECISION array, dimension (N1+N2)
            The first N1 elements of A contain a list of numbers which
            are sorted in either ascending or descending order.  Likewise
            for the final N2 elements.
 
-    STRD1  (input) INTEGER
-    STRD2  (input) INTEGER
+    DTRD1  (input) INTEGER
+    DTRD2  (input) INTEGER
            These are the strides to be taken through the array A.
            Allowable strides are 1 and -1.  They indicate whether a
-           subset of A is sorted in ascending (STRDx = 1) or descending
-           (STRDx = -1) order.
+           subset of A is sorted in ascending (DTRDx = 1) or descending
+           (DTRDx = -1) order.
 
     INDEX  (output) INTEGER array, dimension (N1+N2)
            On exit this array will contain a permutation such that
@@ -80606,12 +15875,12 @@ L10:
     /* Function Body */
     n1sv = *n1;
     n2sv = *n2;
-    if (*strd1 > 0) {
+    if (*dtrd1 > 0) {
 	ind1 = 1;
     } else {
 	ind1 = *n1;
     }
-    if (*strd2 > 0) {
+    if (*dtrd2 > 0) {
 	ind2 = *n1 + 1;
     } else {
 	ind2 = *n1 + *n2;
@@ -80623,12 +15892,12 @@ L10:
 	if (a[ind1] <= a[ind2]) {
 	    index[i__] = ind1;
 	    ++i__;
-	    ind1 += *strd1;
+	    ind1 += *dtrd1;
 	    --n1sv;
 	} else {
 	    index[i__] = ind2;
 	    ++i__;
-	    ind2 += *strd2;
+	    ind2 += *dtrd2;
 	    --n2sv;
 	}
 	goto L10;
@@ -80639,7 +15908,7 @@ L10:
 	for (n1sv = 1; n1sv <= i__1; ++n1sv) {
 	    index[i__] = ind2;
 	    ++i__;
-	    ind2 += *strd2;
+	    ind2 += *dtrd2;
 /* L20: */
 	}
     } else {
@@ -80648,34 +15917,34 @@ L10:
 	for (n2sv = 1; n2sv <= i__1; ++n2sv) {
 	    index[i__] = ind1;
 	    ++i__;
-	    ind1 += *strd1;
+	    ind1 += *dtrd1;
 /* L30: */
 	}
     }
 
     return 0;
 
-/*     End of SLAMRG */
+/*     End of DLAMRG */
 
-} /* slamrg_ */
+} /* dlamrg_ */
 
-doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda,
-	real *work)
+doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer
+	*lda, doublereal *work)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2;
-    real ret_val, r__1, r__2, r__3;
+    doublereal ret_val, d__1, d__2, d__3;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
     static integer i__, j;
-    static real sum, scale;
+    static doublereal sum, scale;
     extern logical lsame_(char *, char *);
-    static real value;
-    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
-	    real *);
+    static doublereal value;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
+	    doublereal *, doublereal *);
 
 
 /*
@@ -80688,16 +15957,16 @@ doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda,
     Purpose
     =======
 
-    SLANGE  returns the value of the one norm,  or the Frobenius norm, or
+    DLANGE  returns the value of the one norm,  or the Frobenius norm, or
     the  infinity norm,  or the  element of  largest absolute value  of a
     real matrix A.
 
     Description
     ===========
 
-    SLANGE returns the value
+    DLANGE returns the value
 
-       SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+       DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
                 (
                 ( norm1(A),         NORM = '1', 'O' or 'o'
                 (
@@ -80714,24 +15983,24 @@ doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda,
     =========
 
     NORM    (input) CHARACTER*1
-            Specifies the value to be returned in SLANGE as described
+            Specifies the value to be returned in DLANGE as described
             above.
 
     M       (input) INTEGER
             The number of rows of the matrix A.  M >= 0.  When M = 0,
-            SLANGE is set to zero.
+            DLANGE is set to zero.
 
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.  When N = 0,
-            SLANGE is set to zero.
+            DLANGE is set to zero.
 
-    A       (input) REAL array, dimension (LDA,N)
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
             The m by n matrix A.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(M,1).
 
-    WORK    (workspace) REAL array, dimension (LWORK),
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
             where LWORK >= M when NORM = 'I'; otherwise, WORK is not
             referenced.
 
@@ -80747,38 +16016,38 @@ doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda,
 
     /* Function Body */
     if (min(*m,*n) == 0) {
-	value = 0.f;
+	value = 0.;
     } else if (lsame_(norm, "M")) {
 
 /*        Find max(abs(A(i,j))). */
 
-	value = 0.f;
+	value = 0.;
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = *m;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
-		r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
-		value = dmax(r__2,r__3);
+		d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
+		value = max(d__2,d__3);
 /* L10: */
 	    }
 /* L20: */
 	}
-    } else if ((lsame_(norm, "O")) || (*(unsigned char *
-	    )norm == '1')) {
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1') {
 
 /*        Find norm1(A). */
 
-	value = 0.f;
+	value = 0.;
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
-	    sum = 0.f;
+	    sum = 0.;
 	    i__2 = *m;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
-		sum += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		sum += (d__1 = a[i__ + j * a_dim1], abs(d__1));
 /* L30: */
 	    }
-	    value = dmax(value,sum);
+	    value = max(value,sum);
 /* L40: */
 	}
     } else if (lsame_(norm, "I")) {
@@ -80787,36 +16056,35 @@ doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda,
 
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
-	    work[i__] = 0.f;
+	    work[i__] = 0.;
 /* L50: */
 	}
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = *m;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[i__] += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1));
 /* L60: */
 	    }
 /* L70: */
 	}
-	value = 0.f;
+	value = 0.;
 	i__1 = *m;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
-	    r__1 = value, r__2 = work[i__];
-	    value = dmax(r__1,r__2);
+	    d__1 = value, d__2 = work[i__];
+	    value = max(d__1,d__2);
 /* L80: */
 	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
 
 /*        Find normF(A). */
 
-	scale = 0.f;
-	sum = 1.f;
+	scale = 0.;
+	sum = 1.;
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
-	    slassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+	    dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
 /* L90: */
 	}
 	value = scale * sqrt(sum);
@@ -80825,26 +16093,27 @@ doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda,
     ret_val = value;
     return ret_val;
 
-/*     End of SLANGE */
+/*     End of DLANGE */
 
-} /* slange_ */
+} /* dlange_ */
 
-doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work)
+doublereal dlanhs_(char *norm, integer *n, doublereal *a, integer *lda,
+	doublereal *work)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    real ret_val, r__1, r__2, r__3;
+    doublereal ret_val, d__1, d__2, d__3;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
     static integer i__, j;
-    static real sum, scale;
+    static doublereal sum, scale;
     extern logical lsame_(char *, char *);
-    static real value;
-    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
-	    real *);
+    static doublereal value;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
+	    doublereal *, doublereal *);
 
 
 /*
@@ -80857,16 +16126,16 @@ doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work)
     Purpose
     =======
 
-    SLANHS  returns the value of the one norm,  or the Frobenius norm, or
+    DLANHS  returns the value of the one norm,  or the Frobenius norm, or
     the  infinity norm,  or the  element of  largest absolute value  of a
     Hessenberg matrix A.
 
     Description
     ===========
 
-    SLANHS returns the value
+    DLANHS returns the value
 
-       SLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+       DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
                 (
                 ( norm1(A),         NORM = '1', 'O' or 'o'
                 (
@@ -80883,21 +16152,21 @@ doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work)
     =========
 
     NORM    (input) CHARACTER*1
-            Specifies the value to be returned in SLANHS as described
+            Specifies the value to be returned in DLANHS as described
             above.
 
     N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, SLANHS is
+            The order of the matrix A.  N >= 0.  When N = 0, DLANHS is
             set to zero.
 
-    A       (input) REAL array, dimension (LDA,N)
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
             The n by n upper Hessenberg matrix A; the part of A below the
             first sub-diagonal is not referenced.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(N,1).
 
-    WORK    (workspace) REAL array, dimension (LWORK),
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
             where LWORK >= N when NORM = 'I'; otherwise, WORK is not
             referenced.
 
@@ -80913,12 +16182,12 @@ doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work)
 
     /* Function Body */
     if (*n == 0) {
-	value = 0.f;
+	value = 0.;
     } else if (lsame_(norm, "M")) {
 
 /*        Find max(abs(A(i,j))). */
 
-	value = 0.f;
+	value = 0.;
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
@@ -80926,29 +16195,29 @@ doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work)
 	    i__2 = min(i__3,i__4);
 	    for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
-		r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
-		value = dmax(r__2,r__3);
+		d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
+		value = max(d__2,d__3);
 /* L10: */
 	    }
 /* L20: */
 	}
-    } else if ((lsame_(norm, "O")) || (*(unsigned char *
-	    )norm == '1')) {
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1') {
 
 /*        Find norm1(A). */
 
-	value = 0.f;
+	value = 0.;
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
-	    sum = 0.f;
+	    sum = 0.;
 /* Computing MIN */
 	    i__3 = *n, i__4 = j + 1;
 	    i__2 = min(i__3,i__4);
 	    for (i__ = 1; i__ <= i__2; ++i__) {
-		sum += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		sum += (d__1 = a[i__ + j * a_dim1], abs(d__1));
 /* L30: */
 	    }
-	    value = dmax(value,sum);
+	    value = max(value,sum);
 /* L40: */
 	}
     } else if (lsame_(norm, "I")) {
@@ -80957,7 +16226,7 @@ doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work)
 
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
-	    work[i__] = 0.f;
+	    work[i__] = 0.;
 /* L50: */
 	}
 	i__1 = *n;
@@ -80966,32 +16235,31 @@ doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work)
 	    i__3 = *n, i__4 = j + 1;
 	    i__2 = min(i__3,i__4);
 	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[i__] += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1));
 /* L60: */
 	    }
 /* L70: */
 	}
-	value = 0.f;
+	value = 0.;
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
-	    r__1 = value, r__2 = work[i__];
-	    value = dmax(r__1,r__2);
+	    d__1 = value, d__2 = work[i__];
+	    value = max(d__1,d__2);
 /* L80: */
 	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
 
 /*        Find normF(A). */
 
-	scale = 0.f;
-	sum = 1.f;
+	scale = 0.;
+	sum = 1.;
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 /* Computing MIN */
 	    i__3 = *n, i__4 = j + 1;
 	    i__2 = min(i__3,i__4);
-	    slassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+	    dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
 /* L90: */
 	}
 	value = scale * sqrt(sum);
@@ -81000,26 +16268,26 @@ doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work)
     ret_val = value;
     return ret_val;
 
-/*     End of SLANHS */
+/*     End of DLANHS */
 
-} /* slanhs_ */
+} /* dlanhs_ */
 
-doublereal slanst_(char *norm, integer *n, real *d__, real *e)
+doublereal dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e)
 {
     /* System generated locals */
     integer i__1;
-    real ret_val, r__1, r__2, r__3, r__4, r__5;
+    doublereal ret_val, d__1, d__2, d__3, d__4, d__5;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
     static integer i__;
-    static real sum, scale;
+    static doublereal sum, scale;
     extern logical lsame_(char *, char *);
-    static real anorm;
-    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
-	    real *);
+    static doublereal anorm;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
+	    doublereal *, doublereal *);
 
 
 /*
@@ -81032,16 +16300,16 @@ doublereal slanst_(char *norm, integer *n, real *d__, real *e)
     Purpose
     =======
 
-    SLANST  returns the value of the one norm,  or the Frobenius norm, or
+    DLANST  returns the value of the one norm,  or the Frobenius norm, or
     the  infinity norm,  or the  element of  largest absolute value  of a
     real symmetric tridiagonal matrix A.
 
     Description
     ===========
 
-    SLANST returns the value
+    DLANST returns the value
 
-       SLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+       DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm'
                 (
                 ( norm1(A),         NORM = '1', 'O' or 'o'
                 (
@@ -81058,17 +16326,17 @@ doublereal slanst_(char *norm, integer *n, real *d__, real *e)
     =========
 
     NORM    (input) CHARACTER*1
-            Specifies the value to be returned in SLANST as described
+            Specifies the value to be returned in DLANST as described
             above.
 
     N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, SLANST is
+            The order of the matrix A.  N >= 0.  When N = 0, DLANST is
             set to zero.
 
-    D       (input) REAL array, dimension (N)
+    D       (input) DOUBLE PRECISION array, dimension (N)
             The diagonal elements of A.
 
-    E       (input) REAL array, dimension (N-1)
+    E       (input) DOUBLE PRECISION array, dimension (N-1)
             The (n-1) sub-diagonal or super-diagonal elements of A.
 
     =====================================================================
@@ -81081,83 +16349,82 @@ doublereal slanst_(char *norm, integer *n, real *d__, real *e)
 
     /* Function Body */
     if (*n <= 0) {
-	anorm = 0.f;
+	anorm = 0.;
     } else if (lsame_(norm, "M")) {
 
 /*        Find max(abs(A(i,j))). */
 
-	anorm = (r__1 = d__[*n], dabs(r__1));
+	anorm = (d__1 = d__[*n], abs(d__1));
 	i__1 = *n - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
-	    r__2 = anorm, r__3 = (r__1 = d__[i__], dabs(r__1));
-	    anorm = dmax(r__2,r__3);
+	    d__2 = anorm, d__3 = (d__1 = d__[i__], abs(d__1));
+	    anorm = max(d__2,d__3);
 /* Computing MAX */
-	    r__2 = anorm, r__3 = (r__1 = e[i__], dabs(r__1));
-	    anorm = dmax(r__2,r__3);
+	    d__2 = anorm, d__3 = (d__1 = e[i__], abs(d__1));
+	    anorm = max(d__2,d__3);
 /* L10: */
 	}
-    } else if (((lsame_(norm, "O")) || (*(unsigned char
-	    *)norm == '1')) || (lsame_(norm, "I"))) {
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1' || lsame_(norm, "I")) {
 
 /*        Find norm1(A). */
 
 	if (*n == 1) {
-	    anorm = dabs(d__[1]);
+	    anorm = abs(d__[1]);
 	} else {
 /* Computing MAX */
-	    r__3 = dabs(d__[1]) + dabs(e[1]), r__4 = (r__1 = e[*n - 1], dabs(
-		    r__1)) + (r__2 = d__[*n], dabs(r__2));
-	    anorm = dmax(r__3,r__4);
+	    d__3 = abs(d__[1]) + abs(e[1]), d__4 = (d__1 = e[*n - 1], abs(
+		    d__1)) + (d__2 = d__[*n], abs(d__2));
+	    anorm = max(d__3,d__4);
 	    i__1 = *n - 1;
 	    for (i__ = 2; i__ <= i__1; ++i__) {
 /* Computing MAX */
-		r__4 = anorm, r__5 = (r__1 = d__[i__], dabs(r__1)) + (r__2 =
-			e[i__], dabs(r__2)) + (r__3 = e[i__ - 1], dabs(r__3));
-		anorm = dmax(r__4,r__5);
+		d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[
+			i__], abs(d__2)) + (d__3 = e[i__ - 1], abs(d__3));
+		anorm = max(d__4,d__5);
 /* L20: */
 	    }
 	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
 
 /*        Find normF(A). */
 
-	scale = 0.f;
-	sum = 1.f;
+	scale = 0.;
+	sum = 1.;
 	if (*n > 1) {
 	    i__1 = *n - 1;
-	    slassq_(&i__1, &e[1], &c__1, &scale, &sum);
+	    dlassq_(&i__1, &e[1], &c__1, &scale, &sum);
 	    sum *= 2;
 	}
-	slassq_(n, &d__[1], &c__1, &scale, &sum);
+	dlassq_(n, &d__[1], &c__1, &scale, &sum);
 	anorm = scale * sqrt(sum);
     }
 
     ret_val = anorm;
     return ret_val;
 
-/*     End of SLANST */
+/*     End of DLANST */
 
-} /* slanst_ */
+} /* dlanst_ */
 
-doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
-	real *work)
+doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer
+	*lda, doublereal *work)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2;
-    real ret_val, r__1, r__2, r__3;
+    doublereal ret_val, d__1, d__2, d__3;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
     static integer i__, j;
-    static real sum, absa, scale;
+    static doublereal sum, absa, scale;
     extern logical lsame_(char *, char *);
-    static real value;
-    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
-	    real *);
+    static doublereal value;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
+	    doublereal *, doublereal *);
 
 
 /*
@@ -81170,16 +16437,16 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
     Purpose
     =======
 
-    SLANSY  returns the value of the one norm,  or the Frobenius norm, or
+    DLANSY  returns the value of the one norm,  or the Frobenius norm, or
     the  infinity norm,  or the  element of  largest absolute value  of a
     real symmetric matrix A.
 
     Description
     ===========
 
-    SLANSY returns the value
+    DLANSY returns the value
 
-       SLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+       DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
                 (
                 ( norm1(A),         NORM = '1', 'O' or 'o'
                 (
@@ -81196,7 +16463,7 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
     =========
 
     NORM    (input) CHARACTER*1
-            Specifies the value to be returned in SLANSY as described
+            Specifies the value to be returned in DLANSY as described
             above.
 
     UPLO    (input) CHARACTER*1
@@ -81206,10 +16473,10 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
             = 'L':  Lower triangular part of A is referenced
 
     N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, SLANSY is
+            The order of the matrix A.  N >= 0.  When N = 0, DLANSY is
             set to zero.
 
-    A       (input) REAL array, dimension (LDA,N)
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
             The symmetric matrix A.  If UPLO = 'U', the leading n by n
             upper triangular part of A contains the upper triangular part
             of the matrix A, and the strictly lower triangular part of A
@@ -81221,7 +16488,7 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(N,1).
 
-    WORK    (workspace) REAL array, dimension (LWORK),
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
             where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
             WORK is not referenced.
 
@@ -81237,21 +16504,21 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
 
     /* Function Body */
     if (*n == 0) {
-	value = 0.f;
+	value = 0.;
     } else if (lsame_(norm, "M")) {
 
 /*        Find max(abs(A(i,j))). */
 
-	value = 0.f;
+	value = 0.;
 	if (lsame_(uplo, "U")) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = j;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 /* Computing MAX */
-		    r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(
-			    r__1));
-		    value = dmax(r__2,r__3);
+		    d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(
+			    d__1));
+		    value = max(d__2,d__3);
 /* L10: */
 		}
 /* L20: */
@@ -81262,109 +16529,108 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
 		i__2 = *n;
 		for (i__ = j; i__ <= i__2; ++i__) {
 /* Computing MAX */
-		    r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(
-			    r__1));
-		    value = dmax(r__2,r__3);
+		    d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(
+			    d__1));
+		    value = max(d__2,d__3);
 /* L30: */
 		}
 /* L40: */
 	    }
 	}
-    } else if (((lsame_(norm, "I")) || (lsame_(norm,
-	    "O"))) || (*(unsigned char *)norm == '1')) {
+    } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') {
 
 /*        Find normI(A) ( = norm1(A), since A is symmetric). */
 
-	value = 0.f;
+	value = 0.;
 	if (lsame_(uplo, "U")) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		sum = 0.f;
+		sum = 0.;
 		i__2 = j - 1;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    absa = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		    absa = (d__1 = a[i__ + j * a_dim1], abs(d__1));
 		    sum += absa;
 		    work[i__] += absa;
 /* L50: */
 		}
-		work[j] = sum + (r__1 = a[j + j * a_dim1], dabs(r__1));
+		work[j] = sum + (d__1 = a[j + j * a_dim1], abs(d__1));
 /* L60: */
 	    }
 	    i__1 = *n;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
-		r__1 = value, r__2 = work[i__];
-		value = dmax(r__1,r__2);
+		d__1 = value, d__2 = work[i__];
+		value = max(d__1,d__2);
 /* L70: */
 	    }
 	} else {
 	    i__1 = *n;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
-		work[i__] = 0.f;
+		work[i__] = 0.;
 /* L80: */
 	    }
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		sum = work[j] + (r__1 = a[j + j * a_dim1], dabs(r__1));
+		sum = work[j] + (d__1 = a[j + j * a_dim1], abs(d__1));
 		i__2 = *n;
 		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    absa = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		    absa = (d__1 = a[i__ + j * a_dim1], abs(d__1));
 		    sum += absa;
 		    work[i__] += absa;
 /* L90: */
 		}
-		value = dmax(value,sum);
+		value = max(value,sum);
 /* L100: */
 	    }
 	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
 
 /*        Find normF(A). */
 
-	scale = 0.f;
-	sum = 1.f;
+	scale = 0.;
+	sum = 1.;
 	if (lsame_(uplo, "U")) {
 	    i__1 = *n;
 	    for (j = 2; j <= i__1; ++j) {
 		i__2 = j - 1;
-		slassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+		dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
 /* L110: */
 	    }
 	} else {
 	    i__1 = *n - 1;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *n - j;
-		slassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum);
+		dlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum);
 /* L120: */
 	    }
 	}
 	sum *= 2;
 	i__1 = *lda + 1;
-	slassq_(n, &a[a_offset], &i__1, &scale, &sum);
+	dlassq_(n, &a[a_offset], &i__1, &scale, &sum);
 	value = scale * sqrt(sum);
     }
 
     ret_val = value;
     return ret_val;
 
-/*     End of SLANSY */
+/*     End of DLANSY */
 
-} /* slansy_ */
+} /* dlansy_ */
 
-/* Subroutine */ int slanv2_(real *a, real *b, real *c__, real *d__, real *
-	rt1r, real *rt1i, real *rt2r, real *rt2i, real *cs, real *sn)
+/* Subroutine */ int dlanv2_(doublereal *a, doublereal *b, doublereal *c__,
+	doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r,
+	 doublereal *rt2i, doublereal *cs, doublereal *sn)
 {
     /* System generated locals */
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Builtin functions */
-    double r_sign(real *, real *), sqrt(doublereal);
+    double d_sign(doublereal *, doublereal *), sqrt(doublereal);
 
     /* Local variables */
-    static real p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, temp,
-	    scale, bcmax, bcmis, sigma;
-    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    static doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau,
+	    temp, scale, bcmax, bcmis, sigma;
+
 
 
 /*
@@ -81377,7 +16643,7 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
     Purpose
     =======
 
-    SLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric
+    DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric
     matrix in standard form:
 
          [ A  B ] = [ CS -SN ] [ AA  BB ] [ CS  SN ]
@@ -81391,23 +16657,23 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
     Arguments
     =========
 
-    A       (input/output) REAL
-    B       (input/output) REAL
-    C       (input/output) REAL
-    D       (input/output) REAL
+    A       (input/output) DOUBLE PRECISION
+    B       (input/output) DOUBLE PRECISION
+    C       (input/output) DOUBLE PRECISION
+    D       (input/output) DOUBLE PRECISION
             On entry, the elements of the input matrix.
             On exit, they are overwritten by the elements of the
             standardised Schur form.
 
-    RT1R    (output) REAL
-    RT1I    (output) REAL
-    RT2R    (output) REAL
-    RT2I    (output) REAL
+    RT1R    (output) DOUBLE PRECISION
+    RT1I    (output) DOUBLE PRECISION
+    RT2R    (output) DOUBLE PRECISION
+    RT2I    (output) DOUBLE PRECISION
             The real and imaginary parts of the eigenvalues. If the
             eigenvalues are a complex conjugate pair, RT1I > 0.
 
-    CS      (output) REAL
-    SN      (output) REAL
+    CS      (output) DOUBLE PRECISION
+    SN      (output) DOUBLE PRECISION
             Parameters of the rotation matrix.
 
     Further Details
@@ -81422,42 +16688,41 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
 */
 
 
-    eps = slamch_("P");
-    if (*c__ == 0.f) {
-	*cs = 1.f;
-	*sn = 0.f;
+    eps = PRECISION;
+    if (*c__ == 0.) {
+	*cs = 1.;
+	*sn = 0.;
 	goto L10;
 
-    } else if (*b == 0.f) {
+    } else if (*b == 0.) {
 
 /*        Swap rows and columns */
 
-	*cs = 0.f;
-	*sn = 1.f;
+	*cs = 0.;
+	*sn = 1.;
 	temp = *d__;
 	*d__ = *a;
 	*a = temp;
 	*b = -(*c__);
-	*c__ = 0.f;
+	*c__ = 0.;
 	goto L10;
-    } else if (*a - *d__ == 0.f && r_sign(&c_b1011, b) != r_sign(&c_b1011,
-	    c__)) {
-	*cs = 1.f;
-	*sn = 0.f;
+    } else if (*a - *d__ == 0. && d_sign(&c_b15, b) != d_sign(&c_b15, c__)) {
+	*cs = 1.;
+	*sn = 0.;
 	goto L10;
     } else {
 
 	temp = *a - *d__;
-	p = temp * .5f;
+	p = temp * .5;
 /* Computing MAX */
-	r__1 = dabs(*b), r__2 = dabs(*c__);
-	bcmax = dmax(r__1,r__2);
+	d__1 = abs(*b), d__2 = abs(*c__);
+	bcmax = max(d__1,d__2);
 /* Computing MIN */
-	r__1 = dabs(*b), r__2 = dabs(*c__);
-	bcmis = dmin(r__1,r__2) * r_sign(&c_b1011, b) * r_sign(&c_b1011, c__);
+	d__1 = abs(*b), d__2 = abs(*c__);
+	bcmis = min(d__1,d__2) * d_sign(&c_b15, b) * d_sign(&c_b15, c__);
 /* Computing MAX */
-	r__1 = dabs(p);
-	scale = dmax(r__1,bcmax);
+	d__1 = abs(p);
+	scale = max(d__1,bcmax);
 	z__ = p / scale * p + bcmax / scale * bcmis;
 
 /*
@@ -81465,22 +16730,22 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
           decision on the nature of eigenvalues
 */
 
-	if (z__ >= eps * 4.f) {
+	if (z__ >= eps * 4.) {
 
 /*           Real eigenvalues. Compute A and D. */
 
-	    r__1 = sqrt(scale) * sqrt(z__);
-	    z__ = p + r_sign(&r__1, &p);
+	    d__1 = sqrt(scale) * sqrt(z__);
+	    z__ = p + d_sign(&d__1, &p);
 	    *a = *d__ + z__;
 	    *d__ -= bcmax / z__ * bcmis;
 
 /*           Compute B and the rotation matrix */
 
-	    tau = slapy2_(c__, &z__);
+	    tau = dlapy2_(c__, &z__);
 	    *cs = z__ / tau;
 	    *sn = *c__ / tau;
 	    *b -= *c__;
-	    *c__ = 0.f;
+	    *c__ = 0.;
 	} else {
 
 /*
@@ -81489,9 +16754,9 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
 */
 
 	    sigma = *b + *c__;
-	    tau = slapy2_(&sigma, &temp);
-	    *cs = sqrt((dabs(sigma) / tau + 1.f) * .5f);
-	    *sn = -(p / (tau * *cs)) * r_sign(&c_b1011, &sigma);
+	    tau = dlapy2_(&sigma, &temp);
+	    *cs = sqrt((abs(sigma) / tau + 1.) * .5);
+	    *sn = -(p / (tau * *cs)) * d_sign(&c_b15, &sigma);
 
 /*
              Compute [ AA  BB ] = [ A  B ] [ CS -SN ]
@@ -81513,25 +16778,25 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
 	    *c__ = -aa * *sn + cc * *cs;
 	    *d__ = -bb * *sn + dd * *cs;
 
-	    temp = (*a + *d__) * .5f;
+	    temp = (*a + *d__) * .5;
 	    *a = temp;
 	    *d__ = temp;
 
-	    if (*c__ != 0.f) {
-		if (*b != 0.f) {
-		    if (r_sign(&c_b1011, b) == r_sign(&c_b1011, c__)) {
+	    if (*c__ != 0.) {
+		if (*b != 0.) {
+		    if (d_sign(&c_b15, b) == d_sign(&c_b15, c__)) {
 
 /*                    Real eigenvalues: reduce to upper triangular form */
 
-			sab = sqrt((dabs(*b)));
-			sac = sqrt((dabs(*c__)));
-			r__1 = sab * sac;
-			p = r_sign(&r__1, c__);
-			tau = 1.f / sqrt((r__1 = *b + *c__, dabs(r__1)));
+			sab = sqrt((abs(*b)));
+			sac = sqrt((abs(*c__)));
+			d__1 = sab * sac;
+			p = d_sign(&d__1, c__);
+			tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1)));
 			*a = temp + p;
 			*d__ = temp - p;
 			*b -= *c__;
-			*c__ = 0.f;
+			*c__ = 0.;
 			cs1 = sab * tau;
 			sn1 = sac * tau;
 			temp = *cs * cs1 - *sn * sn1;
@@ -81540,7 +16805,7 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
 		    }
 		} else {
 		    *b = -(*c__);
-		    *c__ = 0.f;
+		    *c__ = 0.;
 		    temp = *cs;
 		    *cs = -(*sn);
 		    *sn = temp;
@@ -81556,29 +16821,29 @@ L10:
 
     *rt1r = *a;
     *rt2r = *d__;
-    if (*c__ == 0.f) {
-	*rt1i = 0.f;
-	*rt2i = 0.f;
+    if (*c__ == 0.) {
+	*rt1i = 0.;
+	*rt2i = 0.;
     } else {
-	*rt1i = sqrt((dabs(*b))) * sqrt((dabs(*c__)));
+	*rt1i = sqrt((abs(*b))) * sqrt((abs(*c__)));
 	*rt2i = -(*rt1i);
     }
     return 0;
 
-/*     End of SLANV2 */
+/*     End of DLANV2 */
 
-} /* slanv2_ */
+} /* dlanv2_ */
 
-doublereal slapy2_(real *x, real *y)
+doublereal dlapy2_(doublereal *x, doublereal *y)
 {
     /* System generated locals */
-    real ret_val, r__1;
+    doublereal ret_val, d__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static real w, z__, xabs, yabs;
+    static doublereal w, z__, xabs, yabs;
 
 
 /*
@@ -81591,47 +16856,47 @@ doublereal slapy2_(real *x, real *y)
     Purpose
     =======
 
-    SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
+    DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
     overflow.
 
     Arguments
     =========
 
-    X       (input) REAL
-    Y       (input) REAL
+    X       (input) DOUBLE PRECISION
+    Y       (input) DOUBLE PRECISION
             X and Y specify the values x and y.
 
     =====================================================================
 */
 
 
-    xabs = dabs(*x);
-    yabs = dabs(*y);
-    w = dmax(xabs,yabs);
-    z__ = dmin(xabs,yabs);
-    if (z__ == 0.f) {
+    xabs = abs(*x);
+    yabs = abs(*y);
+    w = max(xabs,yabs);
+    z__ = min(xabs,yabs);
+    if (z__ == 0.) {
 	ret_val = w;
     } else {
 /* Computing 2nd power */
-	r__1 = z__ / w;
-	ret_val = w * sqrt(r__1 * r__1 + 1.f);
+	d__1 = z__ / w;
+	ret_val = w * sqrt(d__1 * d__1 + 1.);
     }
     return ret_val;
 
-/*     End of SLAPY2 */
+/*     End of DLAPY2 */
 
-} /* slapy2_ */
+} /* dlapy2_ */
 
-doublereal slapy3_(real *x, real *y, real *z__)
+doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 {
     /* System generated locals */
-    real ret_val, r__1, r__2, r__3;
+    doublereal ret_val, d__1, d__2, d__3;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static real w, xabs, yabs, zabs;
+    static doublereal w, xabs, yabs, zabs;
 
 
 /*
@@ -81644,57 +16909,60 @@ doublereal slapy3_(real *x, real *y, real *z__)
     Purpose
     =======
 
-    SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause
+    DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause
     unnecessary overflow.
 
     Arguments
     =========
 
-    X       (input) REAL
-    Y       (input) REAL
-    Z       (input) REAL
+    X       (input) DOUBLE PRECISION
+    Y       (input) DOUBLE PRECISION
+    Z       (input) DOUBLE PRECISION
             X, Y and Z specify the values x, y and z.
 
     =====================================================================
 */
 
 
-    xabs = dabs(*x);
-    yabs = dabs(*y);
-    zabs = dabs(*z__);
+    xabs = abs(*x);
+    yabs = abs(*y);
+    zabs = abs(*z__);
 /* Computing MAX */
-    r__1 = max(xabs,yabs);
-    w = dmax(r__1,zabs);
-    if (w == 0.f) {
-	ret_val = 0.f;
+    d__1 = max(xabs,yabs);
+    w = max(d__1,zabs);
+    if (w == 0.) {
+	ret_val = 0.;
     } else {
 /* Computing 2nd power */
-	r__1 = xabs / w;
+	d__1 = xabs / w;
 /* Computing 2nd power */
-	r__2 = yabs / w;
+	d__2 = yabs / w;
 /* Computing 2nd power */
-	r__3 = zabs / w;
-	ret_val = w * sqrt(r__1 * r__1 + r__2 * r__2 + r__3 * r__3);
+	d__3 = zabs / w;
+	ret_val = w * sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3);
     }
     return ret_val;
 
-/*     End of SLAPY3 */
+/*     End of DLAPY3 */
 
-} /* slapy3_ */
+} /* dlapy3_ */
 
-/* Subroutine */ int slarf_(char *side, integer *m, integer *n, real *v,
-	integer *incv, real *tau, real *c__, integer *ldc, real *work)
+/* Subroutine */ int dlarf_(char *side, integer *m, integer *n, doublereal *v,
+	 integer *incv, doublereal *tau, doublereal *c__, integer *ldc,
+	doublereal *work)
 {
     /* System generated locals */
     integer c_dim1, c_offset;
-    real r__1;
+    doublereal d__1;
 
     /* Local variables */
-    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
-	    integer *, real *, integer *, real *, integer *);
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
-	    real *, integer *, real *, integer *, real *, real *, integer *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
 
 
 /*
@@ -81707,7 +16975,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     Purpose
     =======
 
-    SLARF applies a real elementary reflector H to a real m by n matrix
+    DLARF applies a real elementary reflector H to a real m by n matrix
     C, from either the left or the right. H is represented in the form
 
           H = I - tau * v * v'
@@ -81729,7 +16997,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     N       (input) INTEGER
             The number of columns of the matrix C.
 
-    V       (input) REAL array, dimension
+    V       (input) DOUBLE PRECISION array, dimension
                        (1 + (M-1)*abs(INCV)) if SIDE = 'L'
                     or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
             The vector v in the representation of H. V is not used if
@@ -81738,10 +17006,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
     INCV    (input) INTEGER
             The increment between elements of v. INCV <> 0.
 
-    TAU     (input) REAL
+    TAU     (input) DOUBLE PRECISION
             The value tau in the representation of H.
 
-    C       (input/output) REAL array, dimension (LDC,N)
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
             On entry, the m by n matrix C.
             On exit, C is overwritten by the matrix H * C if SIDE = 'L',
             or C * H if SIDE = 'R'.
@@ -81749,7 +17017,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace) REAL array, dimension
+    WORK    (workspace) DOUBLE PRECISION array, dimension
                            (N) if SIDE = 'L'
                         or (M) if SIDE = 'R'
 
@@ -81769,47 +17037,47 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 /*        Form  H * C */
 
-	if (*tau != 0.f) {
+	if (*tau != 0.) {
 
 /*           w := C' * v */
 
-	    sgemv_("Transpose", m, n, &c_b1011, &c__[c_offset], ldc, &v[1],
-		    incv, &c_b320, &work[1], &c__1);
+	    dgemv_("Transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1],
+		    incv, &c_b29, &work[1], &c__1);
 
 /*           C := C - v * w' */
 
-	    r__1 = -(*tau);
-	    sger_(m, n, &r__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
+	    d__1 = -(*tau);
+	    dger_(m, n, &d__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
 		    ldc);
 	}
     } else {
 
 /*        Form  C * H */
 
-	if (*tau != 0.f) {
+	if (*tau != 0.) {
 
 /*           w := C * v */
 
-	    sgemv_("No transpose", m, n, &c_b1011, &c__[c_offset], ldc, &v[1],
-		     incv, &c_b320, &work[1], &c__1);
+	    dgemv_("No transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1],
+		    incv, &c_b29, &work[1], &c__1);
 
 /*           C := C - w * v' */
 
-	    r__1 = -(*tau);
-	    sger_(m, n, &r__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
+	    d__1 = -(*tau);
+	    dger_(m, n, &d__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
 		    ldc);
 	}
     }
     return 0;
 
-/*     End of SLARF */
+/*     End of DLARF */
 
-} /* slarf_ */
+} /* dlarf_ */
 
-/* Subroutine */ int slarfb_(char *side, char *trans, char *direct, char *
-	storev, integer *m, integer *n, integer *k, real *v, integer *ldv,
-	real *t, integer *ldt, real *c__, integer *ldc, real *work, integer *
-	ldwork)
+/* Subroutine */ int dlarfb_(char *side, char *trans, char *direct, char *
+	storev, integer *m, integer *n, integer *k, doublereal *v, integer *
+	ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc,
+	doublereal *work, integer *ldwork)
 {
     /* System generated locals */
     integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
@@ -81817,13 +17085,14 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
     /* Local variables */
     static integer i__, j;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *), scopy_(integer *, real *,
-	    integer *, real *, integer *), strmm_(char *, char *, char *,
-	    char *, integer *, integer *, real *, real *, integer *, real *,
-	    integer *);
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dtrmm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *);
     static char transt[1];
 
 
@@ -81837,7 +17106,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     Purpose
     =======
 
-    SLARFB applies a real block reflector H or its transpose H' to a
+    DLARFB applies a real block reflector H or its transpose H' to a
     real m by n matrix C, from either the left or the right.
 
     Arguments
@@ -81873,7 +17142,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
             The order of the matrix T (= the number of elementary
             reflectors whose product defines the block reflector).
 
-    V       (input) REAL array, dimension
+    V       (input) DOUBLE PRECISION array, dimension
                                   (LDV,K) if STOREV = 'C'
                                   (LDV,M) if STOREV = 'R' and SIDE = 'L'
                                   (LDV,N) if STOREV = 'R' and SIDE = 'R'
@@ -81885,21 +17154,21 @@ doublereal slapy3_(real *x, real *y, real *z__)
             if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
             if STOREV = 'R', LDV >= K.
 
-    T       (input) REAL array, dimension (LDT,K)
+    T       (input) DOUBLE PRECISION array, dimension (LDT,K)
             The triangular k by k matrix T in the representation of the
             block reflector.
 
     LDT     (input) INTEGER
             The leading dimension of the array T. LDT >= K.
 
-    C       (input/output) REAL array, dimension (LDC,N)
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
             On entry, the m by n matrix C.
             On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDA >= max(1,M).
 
-    WORK    (workspace) REAL array, dimension (LDWORK,K)
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,K)
 
     LDWORK  (input) INTEGER
             The leading dimension of the array WORK.
@@ -81927,7 +17196,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     work -= work_offset;
 
     /* Function Body */
-    if ((*m <= 0) || (*n <= 0)) {
+    if (*m <= 0 || *n <= 0) {
 	return 0;
     }
 
@@ -81960,30 +17229,29 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+		    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
 			     &c__1);
 /* L10: */
 		}
 
 /*              W := W * V1 */
 
-		strmm_("Right", "Lower", "No transpose", "Unit", n, k, &
-			c_b1011, &v[v_offset], ldv, &work[work_offset],
-			ldwork);
+		dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b15,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
 		if (*m > *k) {
 
 /*                 W := W + C2'*V2 */
 
 		    i__1 = *m - *k;
-		    sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b1011,
-			     &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1],
-			    ldv, &c_b1011, &work[work_offset], ldwork);
+		    dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b15, &
+			    c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1],
+			    ldv, &c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b1011, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b15, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V * W' */
 
@@ -81992,15 +17260,15 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*                 C2 := C2 - V2 * W' */
 
 		    i__1 = *m - *k;
-		    sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b1290,
-			     &v[*k + 1 + v_dim1], ldv, &work[work_offset],
-			    ldwork, &c_b1011, &c__[*k + 1 + c_dim1], ldc);
+		    dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b151,
+			    &v[*k + 1 + v_dim1], ldv, &work[work_offset],
+			    ldwork, &c_b15, &c__[*k + 1 + c_dim1], ldc);
 		}
 
 /*              W := W * V1' */
 
-		strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b1011,
-			&v[v_offset], ldv, &work[work_offset], ldwork);
+		dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b15, &
+			v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W' */
 
@@ -82026,31 +17294,30 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+		    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
 			    work_dim1 + 1], &c__1);
 /* L40: */
 		}
 
 /*              W := W * V1 */
 
-		strmm_("Right", "Lower", "No transpose", "Unit", m, k, &
-			c_b1011, &v[v_offset], ldv, &work[work_offset],
-			ldwork);
+		dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b15,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
 		if (*n > *k) {
 
 /*                 W := W + C2 * V2 */
 
 		    i__1 = *n - *k;
-		    sgemm_("No transpose", "No transpose", m, k, &i__1, &
-			    c_b1011, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k
-			    + 1 + v_dim1], ldv, &c_b1011, &work[work_offset],
+		    dgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
+			    1 + v_dim1], ldv, &c_b15, &work[work_offset],
 			    ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b1011, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b15, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V' */
 
@@ -82059,15 +17326,15 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*                 C2 := C2 - W * V2' */
 
 		    i__1 = *n - *k;
-		    sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b1290,
-			     &work[work_offset], ldwork, &v[*k + 1 + v_dim1],
-			    ldv, &c_b1011, &c__[(*k + 1) * c_dim1 + 1], ldc);
+		    dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b151,
+			    &work[work_offset], ldwork, &v[*k + 1 + v_dim1],
+			    ldv, &c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc);
 		}
 
 /*              W := W * V1' */
 
-		strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b1011,
-			&v[v_offset], ldv, &work[work_offset], ldwork);
+		dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b15, &
+			v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
@@ -82103,30 +17370,30 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+		    dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
 			    work_dim1 + 1], &c__1);
 /* L70: */
 		}
 
 /*              W := W * V2 */
 
-		strmm_("Right", "Upper", "No transpose", "Unit", n, k, &
-			c_b1011, &v[*m - *k + 1 + v_dim1], ldv, &work[
-			work_offset], ldwork);
+		dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b15,
+			 &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
 		if (*m > *k) {
 
 /*                 W := W + C1'*V1 */
 
 		    i__1 = *m - *k;
-		    sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b1011,
-			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b1011,
-			     &work[work_offset], ldwork);
+		    dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b15, &
+			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
+			    work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b1011, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b15, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V * W' */
 
@@ -82135,15 +17402,16 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*                 C1 := C1 - V1 * W' */
 
 		    i__1 = *m - *k;
-		    sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b1290,
-			     &v[v_offset], ldv, &work[work_offset], ldwork, &
-			    c_b1011, &c__[c_offset], ldc);
+		    dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b151,
+			    &v[v_offset], ldv, &work[work_offset], ldwork, &
+			    c_b15, &c__[c_offset], ldc)
+			    ;
 		}
 
 /*              W := W * V2' */
 
-		strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b1011,
-			&v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
+		dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b15, &
+			v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
 			ldwork);
 
 /*              C2 := C2 - W' */
@@ -82171,30 +17439,30 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+		    dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
 			    j * work_dim1 + 1], &c__1);
 /* L100: */
 		}
 
 /*              W := W * V2 */
 
-		strmm_("Right", "Upper", "No transpose", "Unit", m, k, &
-			c_b1011, &v[*n - *k + 1 + v_dim1], ldv, &work[
-			work_offset], ldwork);
+		dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b15,
+			 &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
 		if (*n > *k) {
 
 /*                 W := W + C1 * V1 */
 
 		    i__1 = *n - *k;
-		    sgemm_("No transpose", "No transpose", m, k, &i__1, &
-			    c_b1011, &c__[c_offset], ldc, &v[v_offset], ldv, &
-			    c_b1011, &work[work_offset], ldwork);
+		    dgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b1011, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b15, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V' */
 
@@ -82203,15 +17471,16 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*                 C1 := C1 - W * V1' */
 
 		    i__1 = *n - *k;
-		    sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b1290,
-			     &work[work_offset], ldwork, &v[v_offset], ldv, &
-			    c_b1011, &c__[c_offset], ldc);
+		    dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b151,
+			    &work[work_offset], ldwork, &v[v_offset], ldv, &
+			    c_b15, &c__[c_offset], ldc)
+			    ;
 		}
 
 /*              W := W * V2' */
 
-		strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b1011,
-			&v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
+		dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b15, &
+			v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
 			ldwork);
 
 /*              C2 := C2 - W */
@@ -82251,29 +17520,29 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+		    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
 			     &c__1);
 /* L130: */
 		}
 
 /*              W := W * V1' */
 
-		strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b1011,
-			&v[v_offset], ldv, &work[work_offset], ldwork);
+		dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b15, &
+			v[v_offset], ldv, &work[work_offset], ldwork);
 		if (*m > *k) {
 
 /*                 W := W + C2'*V2' */
 
 		    i__1 = *m - *k;
-		    sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b1011, &
+		    dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b15, &
 			    c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 +
-			    1], ldv, &c_b1011, &work[work_offset], ldwork);
+			    1], ldv, &c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b1011, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b15, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V' * W' */
 
@@ -82282,16 +17551,15 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*                 C2 := C2 - V2' * W' */
 
 		    i__1 = *m - *k;
-		    sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b1290, &
-			    v[(*k + 1) * v_dim1 + 1], ldv, &work[work_offset],
-			     ldwork, &c_b1011, &c__[*k + 1 + c_dim1], ldc);
+		    dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b151, &v[
+			    (*k + 1) * v_dim1 + 1], ldv, &work[work_offset],
+			    ldwork, &c_b15, &c__[*k + 1 + c_dim1], ldc);
 		}
 
 /*              W := W * V1 */
 
-		strmm_("Right", "Upper", "No transpose", "Unit", n, k, &
-			c_b1011, &v[v_offset], ldv, &work[work_offset],
-			ldwork);
+		dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b15,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W' */
 
@@ -82317,30 +17585,30 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+		    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
 			    work_dim1 + 1], &c__1);
 /* L160: */
 		}
 
 /*              W := W * V1' */
 
-		strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b1011,
-			&v[v_offset], ldv, &work[work_offset], ldwork);
+		dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b15, &
+			v[v_offset], ldv, &work[work_offset], ldwork);
 		if (*n > *k) {
 
 /*                 W := W + C2 * V2' */
 
 		    i__1 = *n - *k;
-		    sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b1011,
-			     &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) *
-			    v_dim1 + 1], ldv, &c_b1011, &work[work_offset],
+		    dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b15, &
+			    c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) *
+			    v_dim1 + 1], ldv, &c_b15, &work[work_offset],
 			    ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b1011, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b15, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V */
 
@@ -82349,17 +17617,16 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*                 C2 := C2 - W * V2 */
 
 		    i__1 = *n - *k;
-		    sgemm_("No transpose", "No transpose", m, &i__1, k, &
-			    c_b1290, &work[work_offset], ldwork, &v[(*k + 1) *
-			     v_dim1 + 1], ldv, &c_b1011, &c__[(*k + 1) *
-			    c_dim1 + 1], ldc);
+		    dgemm_("No transpose", "No transpose", m, &i__1, k, &
+			    c_b151, &work[work_offset], ldwork, &v[(*k + 1) *
+			    v_dim1 + 1], ldv, &c_b15, &c__[(*k + 1) * c_dim1
+			    + 1], ldc);
 		}
 
 /*              W := W * V1 */
 
-		strmm_("Right", "Upper", "No transpose", "Unit", m, k, &
-			c_b1011, &v[v_offset], ldv, &work[work_offset],
-			ldwork);
+		dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b15,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
@@ -82395,30 +17662,30 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+		    dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
 			    work_dim1 + 1], &c__1);
 /* L190: */
 		}
 
 /*              W := W * V2' */
 
-		strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b1011,
-			&v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
+		dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b15, &
+			v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[work_offset]
+			, ldwork);
 		if (*m > *k) {
 
 /*                 W := W + C1'*V1' */
 
 		    i__1 = *m - *k;
-		    sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b1011, &
-			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b1011, &
+		    dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b15, &
+			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
 			    work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b1011, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b15, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V' * W' */
 
@@ -82427,15 +17694,15 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*                 C1 := C1 - V1' * W' */
 
 		    i__1 = *m - *k;
-		    sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b1290, &
-			    v[v_offset], ldv, &work[work_offset], ldwork, &
-			    c_b1011, &c__[c_offset], ldc);
+		    dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b151, &v[
+			    v_offset], ldv, &work[work_offset], ldwork, &
+			    c_b15, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2 */
 
-		strmm_("Right", "Lower", "No transpose", "Unit", n, k, &
-			c_b1011, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+		dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b15,
+			 &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
 			work_offset], ldwork);
 
 /*              C2 := C2 - W' */
@@ -82463,30 +17730,30 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+		    dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
 			    j * work_dim1 + 1], &c__1);
 /* L220: */
 		}
 
 /*              W := W * V2' */
 
-		strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b1011,
-			&v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
+		dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b15, &
+			v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[work_offset]
+			, ldwork);
 		if (*n > *k) {
 
 /*                 W := W + C1 * V1' */
 
 		    i__1 = *n - *k;
-		    sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b1011,
-			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b1011,
-			     &work[work_offset], ldwork);
+		    dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b15, &
+			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
+			    work[work_offset], ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b1011, &
-			t[t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b15, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V */
 
@@ -82495,15 +17762,15 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*                 C1 := C1 - W * V1 */
 
 		    i__1 = *n - *k;
-		    sgemm_("No transpose", "No transpose", m, &i__1, k, &
-			    c_b1290, &work[work_offset], ldwork, &v[v_offset],
-			     ldv, &c_b1011, &c__[c_offset], ldc);
+		    dgemm_("No transpose", "No transpose", m, &i__1, k, &
+			    c_b151, &work[work_offset], ldwork, &v[v_offset],
+			    ldv, &c_b15, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2 */
 
-		strmm_("Right", "Lower", "No transpose", "Unit", m, k, &
-			c_b1011, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+		dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b15,
+			 &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
 			work_offset], ldwork);
 
 /*              C1 := C1 - W */
@@ -82526,28 +17793,29 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
     return 0;
 
-/*     End of SLARFB */
+/*     End of DLARFB */
 
-} /* slarfb_ */
+} /* dlarfb_ */
 
-/* Subroutine */ int slarfg_(integer *n, real *alpha, real *x, integer *incx,
-	real *tau)
+/* Subroutine */ int dlarfg_(integer *n, doublereal *alpha, doublereal *x,
+	integer *incx, doublereal *tau)
 {
     /* System generated locals */
     integer i__1;
-    real r__1;
+    doublereal d__1;
 
     /* Builtin functions */
-    double r_sign(real *, real *);
+    double d_sign(doublereal *, doublereal *);
 
     /* Local variables */
     static integer j, knt;
-    static real beta;
-    extern doublereal snrm2_(integer *, real *, integer *);
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
-    static real xnorm;
-    extern doublereal slapy2_(real *, real *), slamch_(char *);
-    static real safmin, rsafmn;
+    static doublereal beta;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    static doublereal xnorm;
+
+    static doublereal safmin, rsafmn;
 
 
 /*
@@ -82560,7 +17828,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     Purpose
     =======
 
-    SLARFG generates a real elementary reflector H of order n, such
+    DLARFG generates a real elementary reflector H of order n, such
     that
 
           H * ( alpha ) = ( beta ),   H' * H = I.
@@ -82586,11 +17854,11 @@ doublereal slapy3_(real *x, real *y, real *z__)
     N       (input) INTEGER
             The order of the elementary reflector.
 
-    ALPHA   (input/output) REAL
+    ALPHA   (input/output) DOUBLE PRECISION
             On entry, the value alpha.
             On exit, it is overwritten with the value beta.
 
-    X       (input/output) REAL array, dimension
+    X       (input/output) DOUBLE PRECISION array, dimension
                            (1+(N-2)*abs(INCX))
             On entry, the vector x.
             On exit, it is overwritten with the vector v.
@@ -82598,7 +17866,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     INCX    (input) INTEGER
             The increment between elements of X. INCX > 0.
 
-    TAU     (output) REAL
+    TAU     (output) DOUBLE PRECISION
             The value tau.
 
     =====================================================================
@@ -82610,51 +17878,51 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
     /* Function Body */
     if (*n <= 1) {
-	*tau = 0.f;
+	*tau = 0.;
 	return 0;
     }
 
     i__1 = *n - 1;
-    xnorm = snrm2_(&i__1, &x[1], incx);
+    xnorm = dnrm2_(&i__1, &x[1], incx);
 
-    if (xnorm == 0.f) {
+    if (xnorm == 0.) {
 
 /*        H  =  I */
 
-	*tau = 0.f;
+	*tau = 0.;
     } else {
 
 /*        general case */
 
-	r__1 = slapy2_(alpha, &xnorm);
-	beta = -r_sign(&r__1, alpha);
-	safmin = slamch_("S") / slamch_("E");
-	if (dabs(beta) < safmin) {
+	d__1 = dlapy2_(alpha, &xnorm);
+	beta = -d_sign(&d__1, alpha);
+	safmin = SAFEMINIMUM / EPSILON;
+	if (abs(beta) < safmin) {
 
 /*           XNORM, BETA may be inaccurate; scale X and recompute them */
 
-	    rsafmn = 1.f / safmin;
+	    rsafmn = 1. / safmin;
 	    knt = 0;
 L10:
 	    ++knt;
 	    i__1 = *n - 1;
-	    sscal_(&i__1, &rsafmn, &x[1], incx);
+	    dscal_(&i__1, &rsafmn, &x[1], incx);
 	    beta *= rsafmn;
 	    *alpha *= rsafmn;
-	    if (dabs(beta) < safmin) {
+	    if (abs(beta) < safmin) {
 		goto L10;
 	    }
 
 /*           New BETA is at most 1, at least SAFMIN */
 
 	    i__1 = *n - 1;
-	    xnorm = snrm2_(&i__1, &x[1], incx);
-	    r__1 = slapy2_(alpha, &xnorm);
-	    beta = -r_sign(&r__1, alpha);
+	    xnorm = dnrm2_(&i__1, &x[1], incx);
+	    d__1 = dlapy2_(alpha, &xnorm);
+	    beta = -d_sign(&d__1, alpha);
 	    *tau = (beta - *alpha) / beta;
 	    i__1 = *n - 1;
-	    r__1 = 1.f / (*alpha - beta);
-	    sscal_(&i__1, &r__1, &x[1], incx);
+	    d__1 = 1. / (*alpha - beta);
+	    dscal_(&i__1, &d__1, &x[1], incx);
 
 /*           If ALPHA is subnormal, it may lose relative accuracy */
 
@@ -82667,32 +17935,35 @@ L10:
 	} else {
 	    *tau = (beta - *alpha) / beta;
 	    i__1 = *n - 1;
-	    r__1 = 1.f / (*alpha - beta);
-	    sscal_(&i__1, &r__1, &x[1], incx);
+	    d__1 = 1. / (*alpha - beta);
+	    dscal_(&i__1, &d__1, &x[1], incx);
 	    *alpha = beta;
 	}
     }
 
     return 0;
 
-/*     End of SLARFG */
+/*     End of DLARFG */
 
-} /* slarfg_ */
+} /* dlarfg_ */
 
-/* Subroutine */ int slarft_(char *direct, char *storev, integer *n, integer *
-	k, real *v, integer *ldv, real *tau, real *t, integer *ldt)
+/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer *
+	k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t,
+	integer *ldt)
 {
     /* System generated locals */
     integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
-    real r__1;
+    doublereal d__1;
 
     /* Local variables */
     static integer i__, j;
-    static real vii;
+    static doublereal vii;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
-	    real *, integer *, real *, integer *, real *, real *, integer *), strmv_(char *, char *, char *, integer *, real *,
-	    integer *, real *, integer *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), dtrmv_(char *,
+	    char *, char *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
 
 
 /*
@@ -82705,7 +17976,7 @@ L10:
     Purpose
     =======
 
-    SLARFT forms the triangular factor T of a real block reflector H
+    DLARFT forms the triangular factor T of a real block reflector H
     of order n, which is defined as a product of k elementary reflectors.
 
     If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
@@ -82744,7 +18015,7 @@ L10:
             The order of the triangular factor T (= the number of
             elementary reflectors). K >= 1.
 
-    V       (input/output) REAL array, dimension
+    V       (input/output) DOUBLE PRECISION array, dimension
                                  (LDV,K) if STOREV = 'C'
                                  (LDV,N) if STOREV = 'R'
             The matrix V. See further details.
@@ -82753,11 +18024,11 @@ L10:
             The leading dimension of the array V.
             If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
 
-    TAU     (input) REAL array, dimension (K)
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i).
 
-    T       (output) REAL array, dimension (LDT,K)
+    T       (output) DOUBLE PRECISION array, dimension (LDT,K)
             The k by k triangular factor T of the block reflector.
             If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
             lower triangular. The rest of the array is not used.
@@ -82813,13 +18084,13 @@ L10:
     if (lsame_(direct, "F")) {
 	i__1 = *k;
 	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (tau[i__] == 0.f) {
+	    if (tau[i__] == 0.) {
 
 /*              H(i)  =  I */
 
 		i__2 = i__;
 		for (j = 1; j <= i__2; ++j) {
-		    t[j + i__ * t_dim1] = 0.f;
+		    t[j + i__ * t_dim1] = 0.;
 /* L10: */
 		}
 	    } else {
@@ -82827,16 +18098,16 @@ L10:
 /*              general case */
 
 		vii = v[i__ + i__ * v_dim1];
-		v[i__ + i__ * v_dim1] = 1.f;
+		v[i__ + i__ * v_dim1] = 1.;
 		if (lsame_(storev, "C")) {
 
 /*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
 
 		    i__2 = *n - i__ + 1;
 		    i__3 = i__ - 1;
-		    r__1 = -tau[i__];
-		    sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + v_dim1],
-			     ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b320, &t[
+		    d__1 = -tau[i__];
+		    dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1],
+			     ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b29, &t[
 			    i__ * t_dim1 + 1], &c__1);
 		} else {
 
@@ -82844,17 +18115,17 @@ L10:
 
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__ + 1;
-		    r__1 = -tau[i__];
-		    sgemv_("No transpose", &i__2, &i__3, &r__1, &v[i__ *
+		    d__1 = -tau[i__];
+		    dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ *
 			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
-			    c_b320, &t[i__ * t_dim1 + 1], &c__1);
+			    c_b29, &t[i__ * t_dim1 + 1], &c__1);
 		}
 		v[i__ + i__ * v_dim1] = vii;
 
 /*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
 
 		i__2 = i__ - 1;
-		strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+		dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
 			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
 		t[i__ + i__ * t_dim1] = tau[i__];
 	    }
@@ -82862,13 +18133,13 @@ L10:
 	}
     } else {
 	for (i__ = *k; i__ >= 1; --i__) {
-	    if (tau[i__] == 0.f) {
+	    if (tau[i__] == 0.) {
 
 /*              H(i)  =  I */
 
 		i__1 = *k;
 		for (j = i__; j <= i__1; ++j) {
-		    t[j + i__ * t_dim1] = 0.f;
+		    t[j + i__ * t_dim1] = 0.;
 /* L30: */
 		}
 	    } else {
@@ -82878,7 +18149,7 @@ L10:
 		if (i__ < *k) {
 		    if (lsame_(storev, "C")) {
 			vii = v[*n - *k + i__ + i__ * v_dim1];
-			v[*n - *k + i__ + i__ * v_dim1] = 1.f;
+			v[*n - *k + i__ + i__ * v_dim1] = 1.;
 
 /*
                       T(i+1:k,i) :=
@@ -82887,15 +18158,15 @@ L10:
 
 			i__1 = *n - *k + i__;
 			i__2 = *k - i__;
-			r__1 = -tau[i__];
-			sgemv_("Transpose", &i__1, &i__2, &r__1, &v[(i__ + 1)
+			d__1 = -tau[i__];
+			dgemv_("Transpose", &i__1, &i__2, &d__1, &v[(i__ + 1)
 				* v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], &
-				c__1, &c_b320, &t[i__ + 1 + i__ * t_dim1], &
+				c__1, &c_b29, &t[i__ + 1 + i__ * t_dim1], &
 				c__1);
 			v[*n - *k + i__ + i__ * v_dim1] = vii;
 		    } else {
 			vii = v[i__ + (*n - *k + i__) * v_dim1];
-			v[i__ + (*n - *k + i__) * v_dim1] = 1.f;
+			v[i__ + (*n - *k + i__) * v_dim1] = 1.;
 
 /*
                       T(i+1:k,i) :=
@@ -82904,17 +18175,17 @@ L10:
 
 			i__1 = *k - i__;
 			i__2 = *n - *k + i__;
-			r__1 = -tau[i__];
-			sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ +
+			d__1 = -tau[i__];
+			dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ +
 				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
-				c_b320, &t[i__ + 1 + i__ * t_dim1], &c__1);
+				c_b29, &t[i__ + 1 + i__ * t_dim1], &c__1);
 			v[i__ + (*n - *k + i__) * v_dim1] = vii;
 		    }
 
 /*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
 
 		    i__1 = *k - i__;
-		    strmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
+		    dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
 			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
 			     t_dim1], &c__1)
 			    ;
@@ -82926,26 +18197,28 @@ L10:
     }
     return 0;
 
-/*     End of SLARFT */
+/*     End of DLARFT */
 
-} /* slarft_ */
+} /* dlarft_ */
 
-/* Subroutine */ int slarfx_(char *side, integer *m, integer *n, real *v,
-	real *tau, real *c__, integer *ldc, real *work)
+/* Subroutine */ int dlarfx_(char *side, integer *m, integer *n, doublereal *
+	v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work)
 {
     /* System generated locals */
     integer c_dim1, c_offset, i__1;
-    real r__1;
+    doublereal d__1;
 
     /* Local variables */
     static integer j;
-    static real t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6,
-	    v7, v8, v9, t10, v10, sum;
-    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
-	    integer *, real *, integer *, real *, integer *);
+    static doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5,
+	    v6, v7, v8, v9, t10, v10, sum;
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
-	    real *, integer *, real *, integer *, real *, real *, integer *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
 
 
 /*
@@ -82958,7 +18231,7 @@ L10:
     Purpose
     =======
 
-    SLARFX applies a real elementary reflector H to a real m by n
+    DLARFX applies a real elementary reflector H to a real m by n
     matrix C, from either the left or the right. H is represented in the
     form
 
@@ -82983,14 +18256,14 @@ L10:
     N       (input) INTEGER
             The number of columns of the matrix C.
 
-    V       (input) REAL array, dimension (M) if SIDE = 'L'
+    V       (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L'
                                        or (N) if SIDE = 'R'
             The vector v in the representation of H.
 
-    TAU     (input) REAL
+    TAU     (input) DOUBLE PRECISION
             The value tau in the representation of H.
 
-    C       (input/output) REAL array, dimension (LDC,N)
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
             On entry, the m by n matrix C.
             On exit, C is overwritten by the matrix H * C if SIDE = 'L',
             or C * H if SIDE = 'R'.
@@ -82998,7 +18271,7 @@ L10:
     LDC     (input) INTEGER
             The leading dimension of the array C. LDA >= (1,M).
 
-    WORK    (workspace) REAL array, dimension
+    WORK    (workspace) DOUBLE PRECISION array, dimension
                         (N) if SIDE = 'L'
                         or (M) if SIDE = 'R'
             WORK is not referenced if H has order < 11.
@@ -83015,7 +18288,7 @@ L10:
     --work;
 
     /* Function Body */
-    if (*tau == 0.f) {
+    if (*tau == 0.) {
 	return 0;
     }
     if (lsame_(side, "L")) {
@@ -83041,20 +18314,20 @@ L10:
           w := C'*v
 */
 
-	sgemv_("Transpose", m, n, &c_b1011, &c__[c_offset], ldc, &v[1], &c__1,
-		 &c_b320, &work[1], &c__1);
+	dgemv_("Transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], &c__1, &
+		c_b29, &work[1], &c__1);
 
 /*        C := C - tau * v * w' */
 
-	r__1 = -(*tau);
-	sger_(m, n, &r__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc)
+	d__1 = -(*tau);
+	dger_(m, n, &d__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc)
 		;
 	goto L410;
 L10:
 
 /*        Special code for 1 x 1 Householder */
 
-	t1 = 1.f - *tau * v[1] * v[1];
+	t1 = 1. - *tau * v[1] * v[1];
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1];
@@ -83356,20 +18629,20 @@ L190:
           w := C * v
 */
 
-	sgemv_("No transpose", m, n, &c_b1011, &c__[c_offset], ldc, &v[1], &
-		c__1, &c_b320, &work[1], &c__1);
+	dgemv_("No transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], &
+		c__1, &c_b29, &work[1], &c__1);
 
 /*        C := C - tau * w * v' */
 
-	r__1 = -(*tau);
-	sger_(m, n, &r__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc)
+	d__1 = -(*tau);
+	dger_(m, n, &d__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc)
 		;
 	goto L410;
 L210:
 
 /*        Special code for 1 x 1 Householder */
 
-	t1 = 1.f - *tau * v[1] * v[1];
+	t1 = 1. - *tau * v[1] * v[1];
 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
 	    c__[j + c_dim1] = t1 * c__[j + c_dim1];
@@ -83386,9 +18659,9 @@ L230:
 	t2 = *tau * v2;
 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))];
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)];
 	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
 /* L240: */
 	}
 	goto L410;
@@ -83404,10 +18677,10 @@ L250:
 	t3 = *tau * v3;
 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3];
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3];
 	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
 	    c__[j + c_dim1 * 3] -= sum * t3;
 /* L260: */
 	}
@@ -83426,12 +18699,12 @@ L270:
 	t4 = *tau * v4;
 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))];
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)];
 	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
 	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
 /* L280: */
 	}
 	goto L410;
@@ -83451,13 +18724,13 @@ L290:
 	t5 = *tau * v5;
 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
-		    v5 * c__[j + c_dim1 * 5];
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
+		    c__[j + c_dim1 * 5];
 	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
 	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
 	    c__[j + c_dim1 * 5] -= sum * t5;
 /* L300: */
 	}
@@ -83480,13 +18753,13 @@ L310:
 	t6 = *tau * v6;
 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
-		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6];
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
+		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6];
 	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
 	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
 	    c__[j + c_dim1 * 5] -= sum * t5;
 	    c__[j + c_dim1 * 6] -= sum * t6;
 /* L320: */
@@ -83512,14 +18785,14 @@ L330:
 	t7 = *tau * v7;
 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
-		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
-		     c__[j + c_dim1 * 7];
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
+		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[
+		    j + c_dim1 * 7];
 	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
 	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
 	    c__[j + c_dim1 * 5] -= sum * t5;
 	    c__[j + c_dim1 * 6] -= sum * t6;
 	    c__[j + c_dim1 * 7] -= sum * t7;
@@ -83548,18 +18821,18 @@ L350:
 	t8 = *tau * v8;
 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
-		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
-		     c__[j + c_dim1 * 7] + v8 * c__[j + ((c_dim1) << (3))];
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
+		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[
+		    j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)];
 	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
 	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
 	    c__[j + c_dim1 * 5] -= sum * t5;
 	    c__[j + c_dim1 * 6] -= sum * t6;
 	    c__[j + c_dim1 * 7] -= sum * t7;
-	    c__[j + ((c_dim1) << (3))] -= sum * t8;
+	    c__[j + (c_dim1 << 3)] -= sum * t8;
 /* L360: */
 	}
 	goto L410;
@@ -83587,19 +18860,19 @@ L370:
 	t9 = *tau * v9;
 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
-		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
-		     c__[j + c_dim1 * 7] + v8 * c__[j + ((c_dim1) << (3))] +
-		    v9 * c__[j + c_dim1 * 9];
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
+		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[
+		    j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[
+		    j + c_dim1 * 9];
 	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
 	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
 	    c__[j + c_dim1 * 5] -= sum * t5;
 	    c__[j + c_dim1 * 6] -= sum * t6;
 	    c__[j + c_dim1 * 7] -= sum * t7;
-	    c__[j + ((c_dim1) << (3))] -= sum * t8;
+	    c__[j + (c_dim1 << 3)] -= sum * t8;
 	    c__[j + c_dim1 * 9] -= sum * t9;
 /* L380: */
 	}
@@ -83630,19 +18903,19 @@ L390:
 	t10 = *tau * v10;
 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
-		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
-		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
-		     c__[j + c_dim1 * 7] + v8 * c__[j + ((c_dim1) << (3))] +
-		    v9 * c__[j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10];
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
+		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[
+		    j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[
+		    j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10];
 	    c__[j + c_dim1] -= sum * t1;
-	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
 	    c__[j + c_dim1 * 3] -= sum * t3;
-	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
 	    c__[j + c_dim1 * 5] -= sum * t5;
 	    c__[j + c_dim1 * 6] -= sum * t6;
 	    c__[j + c_dim1 * 7] -= sum * t7;
-	    c__[j + ((c_dim1) << (3))] -= sum * t8;
+	    c__[j + (c_dim1 << 3)] -= sum * t8;
 	    c__[j + c_dim1 * 9] -= sum * t9;
 	    c__[j + c_dim1 * 10] -= sum * t10;
 /* L400: */
@@ -83652,11 +18925,12 @@ L390:
 L410:
     return 0;
 
-/*     End of SLARFX */
+/*     End of DLARFX */
 
-} /* slarfx_ */
+} /* dlarfx_ */
 
-/* Subroutine */ int slartg_(real *f, real *g, real *cs, real *sn, real *r__)
+/* Subroutine */ int dlartg_(doublereal *f, doublereal *g, doublereal *cs,
+	doublereal *sn, doublereal *r__)
 {
     /* Initialized data */
 
@@ -83664,18 +18938,18 @@ L410:
 
     /* System generated locals */
     integer i__1;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Builtin functions */
-    double log(doublereal), pow_ri(real *, integer *), sqrt(doublereal);
+    double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal);
 
     /* Local variables */
     static integer i__;
-    static real f1, g1, eps, scale;
+    static doublereal f1, g1, eps, scale;
     static integer count;
-    static real safmn2, safmx2;
-    extern doublereal slamch_(char *);
-    static real safmin;
+    static doublereal safmn2, safmx2;
+
+    static doublereal safmin;
 
 
 /*
@@ -83688,17 +18962,17 @@ L410:
     Purpose
     =======
 
-    SLARTG generate a plane rotation so that
+    DLARTG generate a plane rotation so that
 
        [  CS  SN  ]  .  [ F ]  =  [ R ]   where CS**2 + SN**2 = 1.
        [ -SN  CS  ]     [ G ]     [ 0 ]
 
-    This is a slower, more accurate version of the BLAS1 routine SROTG,
+    This is a slower, more accurate version of the BLAS1 routine DROTG,
     with the following other differences:
        F and G are unchanged on return.
        If G=0, then CS=1 and SN=0.
        If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any
-          floating point operations (saves work in SBDSQR when
+          floating point operations (saves work in DBDSQR when
           there are zeros on the diagonal).
 
     If F exceeds G in magnitude, CS will be positive.
@@ -83706,19 +18980,19 @@ L410:
     Arguments
     =========
 
-    F       (input) REAL
+    F       (input) DOUBLE PRECISION
             The first component of vector to be rotated.
 
-    G       (input) REAL
+    G       (input) DOUBLE PRECISION
             The second component of vector to be rotated.
 
-    CS      (output) REAL
+    CS      (output) DOUBLE PRECISION
             The cosine of the rotation.
 
-    SN      (output) REAL
+    SN      (output) DOUBLE PRECISION
             The sine of the rotation.
 
-    R       (output) REAL
+    R       (output) DOUBLE PRECISION
             The nonzero component of the rotated vector.
 
     =====================================================================
@@ -83727,28 +19001,28 @@ L410:
 
     if (first) {
 	first = FALSE_;
-	safmin = slamch_("S");
-	eps = slamch_("E");
-	r__1 = slamch_("B");
-	i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) /
-		2.f);
-	safmn2 = pow_ri(&r__1, &i__1);
-	safmx2 = 1.f / safmn2;
-    }
-    if (*g == 0.f) {
-	*cs = 1.f;
-	*sn = 0.f;
+	safmin = SAFEMINIMUM;
+	eps = EPSILON;
+	d__1 = BASE;
+	i__1 = (integer) (log(safmin / eps) / log(BASE) /
+		2.);
+	safmn2 = pow_di(&d__1, &i__1);
+	safmx2 = 1. / safmn2;
+    }
+    if (*g == 0.) {
+	*cs = 1.;
+	*sn = 0.;
 	*r__ = *f;
-    } else if (*f == 0.f) {
-	*cs = 0.f;
-	*sn = 1.f;
+    } else if (*f == 0.) {
+	*cs = 0.;
+	*sn = 1.;
 	*r__ = *g;
     } else {
 	f1 = *f;
 	g1 = *g;
 /* Computing MAX */
-	r__1 = dabs(f1), r__2 = dabs(g1);
-	scale = dmax(r__1,r__2);
+	d__1 = abs(f1), d__2 = abs(g1);
+	scale = max(d__1,d__2);
 	if (scale >= safmx2) {
 	    count = 0;
 L10:
@@ -83756,16 +19030,16 @@ L10:
 	    f1 *= safmn2;
 	    g1 *= safmn2;
 /* Computing MAX */
-	    r__1 = dabs(f1), r__2 = dabs(g1);
-	    scale = dmax(r__1,r__2);
+	    d__1 = abs(f1), d__2 = abs(g1);
+	    scale = max(d__1,d__2);
 	    if (scale >= safmx2) {
 		goto L10;
 	    }
 /* Computing 2nd power */
-	    r__1 = f1;
+	    d__1 = f1;
 /* Computing 2nd power */
-	    r__2 = g1;
-	    *r__ = sqrt(r__1 * r__1 + r__2 * r__2);
+	    d__2 = g1;
+	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
 	    *cs = f1 / *r__;
 	    *sn = g1 / *r__;
 	    i__1 = count;
@@ -83780,16 +19054,16 @@ L30:
 	    f1 *= safmx2;
 	    g1 *= safmx2;
 /* Computing MAX */
-	    r__1 = dabs(f1), r__2 = dabs(g1);
-	    scale = dmax(r__1,r__2);
+	    d__1 = abs(f1), d__2 = abs(g1);
+	    scale = max(d__1,d__2);
 	    if (scale <= safmn2) {
 		goto L30;
 	    }
 /* Computing 2nd power */
-	    r__1 = f1;
+	    d__1 = f1;
 /* Computing 2nd power */
-	    r__2 = g1;
-	    *r__ = sqrt(r__1 * r__1 + r__2 * r__2);
+	    d__2 = g1;
+	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
 	    *cs = f1 / *r__;
 	    *sn = g1 / *r__;
 	    i__1 = count;
@@ -83799,14 +19073,14 @@ L30:
 	    }
 	} else {
 /* Computing 2nd power */
-	    r__1 = f1;
+	    d__1 = f1;
 /* Computing 2nd power */
-	    r__2 = g1;
-	    *r__ = sqrt(r__1 * r__1 + r__2 * r__2);
+	    d__2 = g1;
+	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
 	    *cs = f1 / *r__;
 	    *sn = g1 / *r__;
 	}
-	if (dabs(*f) > dabs(*g) && *cs < 0.f) {
+	if (abs(*f) > abs(*g) && *cs < 0.) {
 	    *cs = -(*cs);
 	    *sn = -(*sn);
 	    *r__ = -(*r__);
@@ -83814,21 +19088,21 @@ L30:
     }
     return 0;
 
-/*     End of SLARTG */
+/*     End of DLARTG */
 
-} /* slartg_ */
+} /* dlartg_ */
 
-/* Subroutine */ int slas2_(real *f, real *g, real *h__, real *ssmin, real *
-	ssmax)
+/* Subroutine */ int dlas2_(doublereal *f, doublereal *g, doublereal *h__,
+	doublereal *ssmin, doublereal *ssmax)
 {
     /* System generated locals */
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static real c__, fa, ga, ha, as, at, au, fhmn, fhmx;
+    static doublereal c__, fa, ga, ha, as, at, au, fhmn, fhmx;
 
 
 /*
@@ -83841,7 +19115,7 @@ L30:
     Purpose
     =======
 
-    SLAS2  computes the singular values of the 2-by-2 matrix
+    DLAS2  computes the singular values of the 2-by-2 matrix
        [  F   G  ]
        [  0   H  ].
     On return, SSMIN is the smaller singular value and SSMAX is the
@@ -83850,19 +19124,19 @@ L30:
     Arguments
     =========
 
-    F       (input) REAL
+    F       (input) DOUBLE PRECISION
             The (1,1) element of the 2-by-2 matrix.
 
-    G       (input) REAL
+    G       (input) DOUBLE PRECISION
             The (1,2) element of the 2-by-2 matrix.
 
-    H       (input) REAL
+    H       (input) DOUBLE PRECISION
             The (2,2) element of the 2-by-2 matrix.
 
-    SSMIN   (output) REAL
+    SSMIN   (output) DOUBLE PRECISION
             The smaller singular value.
 
-    SSMAX   (output) REAL
+    SSMAX   (output) DOUBLE PRECISION
             The larger singular value.
 
     Further Details
@@ -83888,33 +19162,33 @@ L30:
 */
 
 
-    fa = dabs(*f);
-    ga = dabs(*g);
-    ha = dabs(*h__);
-    fhmn = dmin(fa,ha);
-    fhmx = dmax(fa,ha);
-    if (fhmn == 0.f) {
-	*ssmin = 0.f;
-	if (fhmx == 0.f) {
+    fa = abs(*f);
+    ga = abs(*g);
+    ha = abs(*h__);
+    fhmn = min(fa,ha);
+    fhmx = max(fa,ha);
+    if (fhmn == 0.) {
+	*ssmin = 0.;
+	if (fhmx == 0.) {
 	    *ssmax = ga;
 	} else {
 /* Computing 2nd power */
-	    r__1 = dmin(fhmx,ga) / dmax(fhmx,ga);
-	    *ssmax = dmax(fhmx,ga) * sqrt(r__1 * r__1 + 1.f);
+	    d__1 = min(fhmx,ga) / max(fhmx,ga);
+	    *ssmax = max(fhmx,ga) * sqrt(d__1 * d__1 + 1.);
 	}
     } else {
 	if (ga < fhmx) {
-	    as = fhmn / fhmx + 1.f;
+	    as = fhmn / fhmx + 1.;
 	    at = (fhmx - fhmn) / fhmx;
 /* Computing 2nd power */
-	    r__1 = ga / fhmx;
-	    au = r__1 * r__1;
-	    c__ = 2.f / (sqrt(as * as + au) + sqrt(at * at + au));
+	    d__1 = ga / fhmx;
+	    au = d__1 * d__1;
+	    c__ = 2. / (sqrt(as * as + au) + sqrt(at * at + au));
 	    *ssmin = fhmn * c__;
 	    *ssmax = fhmx / c__;
 	} else {
 	    au = fhmx / ga;
-	    if (au == 0.f) {
+	    if (au == 0.) {
 
 /*
                 Avoid possible harmful underflow if exponent range
@@ -83925,14 +19199,13 @@ L30:
 		*ssmin = fhmn * fhmx / ga;
 		*ssmax = ga;
 	    } else {
-		as = fhmn / fhmx + 1.f;
+		as = fhmn / fhmx + 1.;
 		at = (fhmx - fhmn) / fhmx;
 /* Computing 2nd power */
-		r__1 = as * au;
+		d__1 = as * au;
 /* Computing 2nd power */
-		r__2 = at * au;
-		c__ = 1.f / (sqrt(r__1 * r__1 + 1.f) + sqrt(r__2 * r__2 + 1.f)
-			);
+		d__2 = at * au;
+		c__ = 1. / (sqrt(d__1 * d__1 + 1.) + sqrt(d__2 * d__2 + 1.));
 		*ssmin = fhmn * c__ * au;
 		*ssmin += *ssmin;
 		*ssmax = ga / (c__ + c__);
@@ -83941,29 +19214,29 @@ L30:
     }
     return 0;
 
-/*     End of SLAS2 */
+/*     End of DLAS2 */
 
-} /* slas2_ */
+} /* dlas2_ */
 
-/* Subroutine */ int slascl_(char *type__, integer *kl, integer *ku, real *
-	cfrom, real *cto, integer *m, integer *n, real *a, integer *lda,
-	integer *info)
+/* Subroutine */ int dlascl_(char *type__, integer *kl, integer *ku,
+	doublereal *cfrom, doublereal *cto, integer *m, integer *n,
+	doublereal *a, integer *lda, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
 
     /* Local variables */
     static integer i__, j, k1, k2, k3, k4;
-    static real mul, cto1;
+    static doublereal mul, cto1;
     static logical done;
-    static real ctoc;
+    static doublereal ctoc;
     extern logical lsame_(char *, char *);
     static integer itype;
-    static real cfrom1;
-    extern doublereal slamch_(char *);
-    static real cfromc;
+    static doublereal cfrom1;
+
+    static doublereal cfromc;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static real bignum, smlnum;
+    static doublereal bignum, smlnum;
 
 
 /*
@@ -83976,7 +19249,7 @@ L30:
     Purpose
     =======
 
-    SLASCL multiplies the M by N real matrix A by the real scalar
+    DLASCL multiplies the M by N real matrix A by the real scalar
     CTO/CFROM.  This is done without over/underflow as long as the final
     result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
     A may be full, upper triangular, lower triangular, upper Hessenberg,
@@ -84008,8 +19281,8 @@ L30:
             The upper bandwidth of A.  Referenced only if TYPE = 'B',
             'Q' or 'Z'.
 
-    CFROM   (input) REAL
-    CTO     (input) REAL
+    CFROM   (input) DOUBLE PRECISION
+    CTO     (input) DOUBLE PRECISION
             The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
             without over/underflow if the final result CTO*A(I,J)/CFROM
             can be represented without over/underflow.  CFROM must be
@@ -84021,7 +19294,7 @@ L30:
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,M)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,M)
             The matrix to be multiplied by CTO/CFROM.  See TYPE for the
             storage type.
 
@@ -84066,29 +19339,27 @@ L30:
 
     if (itype == -1) {
 	*info = -1;
-    } else if (*cfrom == 0.f) {
+    } else if (*cfrom == 0.) {
 	*info = -4;
     } else if (*m < 0) {
 	*info = -6;
-    } else if (((*n < 0) || (itype == 4 && *n != *m)) || (itype == 5 && *n !=
-	    *m)) {
+    } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) {
 	*info = -7;
     } else if (itype <= 3 && *lda < max(1,*m)) {
 	*info = -9;
     } else if (itype >= 4) {
 /* Computing MAX */
 	i__1 = *m - 1;
-	if ((*kl < 0) || (*kl > max(i__1,0))) {
+	if (*kl < 0 || *kl > max(i__1,0)) {
 	    *info = -2;
 	} else /* if(complicated condition) */ {
 /* Computing MAX */
 	    i__1 = *n - 1;
-	    if (((*ku < 0) || (*ku > max(i__1,0))) || (((itype == 4) || (
-		    itype == 5)) && *kl != *ku)) {
+	    if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) &&
+		    *kl != *ku) {
 		*info = -3;
-	    } else if (((itype == 4 && *lda < *kl + 1) || (itype == 5 && *lda
-		    < *ku + 1)) || (itype == 6 && *lda < ((*kl) << (1)) + *ku
-		    + 1)) {
+	    } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < *
+		    ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) {
 		*info = -9;
 	    }
 	}
@@ -84096,20 +19367,20 @@ L30:
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLASCL", &i__1);
+	xerbla_("DLASCL", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
-    if ((*n == 0) || (*m == 0)) {
+    if (*n == 0 || *m == 0) {
 	return 0;
     }
 
 /*     Get machine parameters */
 
-    smlnum = slamch_("S");
-    bignum = 1.f / smlnum;
+    smlnum = SAFEMINIMUM;
+    bignum = 1. / smlnum;
 
     cfromc = *cfrom;
     ctoc = *cto;
@@ -84117,11 +19388,11 @@ L30:
 L10:
     cfrom1 = cfromc * smlnum;
     cto1 = ctoc / bignum;
-    if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) {
+    if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
 	mul = smlnum;
 	done = FALSE_;
 	cfromc = cfrom1;
-    } else if (dabs(cto1) > dabs(cfromc)) {
+    } else if (abs(cto1) > abs(cfromc)) {
 	mul = bignum;
 	done = FALSE_;
 	ctoc = cto1;
@@ -84230,7 +19501,7 @@ L10:
 
 	k1 = *kl + *ku + 2;
 	k2 = *kl + 1;
-	k3 = ((*kl) << (1)) + *ku + 1;
+	k3 = (*kl << 1) + *ku + 1;
 	k4 = *kl + *ku + 1 + *m;
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
@@ -84254,13 +19525,14 @@ L10:
 
     return 0;
 
-/*     End of SLASCL */
+/*     End of DLASCL */
 
-} /* slascl_ */
+} /* dlascl_ */
 
-/* Subroutine */ int slasd0_(integer *n, integer *sqre, real *d__, real *e,
-	real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz,
-	integer *iwork, real *work, integer *info)
+/* Subroutine */ int dlasd0_(integer *n, integer *sqre, doublereal *d__,
+	doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *
+	ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer *
+	info)
 {
     /* System generated locals */
     integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
@@ -84271,17 +19543,19 @@ L10:
     /* Local variables */
     static integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf,
 	    iwk, lvl, ndb1, nlp1, nrp1;
-    static real beta;
+    static doublereal beta;
     static integer idxq, nlvl;
-    static real alpha;
+    static doublereal alpha;
     static integer inode, ndiml, idxqc, ndimr, itemp, sqrei;
-    extern /* Subroutine */ int slasd1_(integer *, integer *, integer *, real
-	    *, real *, real *, real *, integer *, real *, integer *, integer *
-	    , integer *, real *, integer *), xerbla_(char *, integer *), slasdq_(char *, integer *, integer *, integer *, integer
-	    *, integer *, real *, real *, real *, integer *, real *, integer *
-	    , real *, integer *, real *, integer *), slasdt_(integer *
-	    , integer *, integer *, integer *, integer *, integer *, integer *
-	    );
+    extern /* Subroutine */ int dlasd1_(integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     doublereal *, integer *, integer *, integer *, doublereal *,
+	    integer *), dlasdq_(char *, integer *, integer *, integer *,
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlasdt_(integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *), xerbla_(
+	    char *, integer *);
 
 
 /*
@@ -84294,13 +19568,13 @@ L10:
     Purpose
     =======
 
-    Using a divide and conquer approach, SLASD0 computes the singular
+    Using a divide and conquer approach, DLASD0 computes the singular
     value decomposition (SVD) of a real upper bidiagonal N-by-M
     matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
     The algorithm computes orthogonal matrices U and VT such that
     B = U * S * VT. The singular values S are overwritten on D.
 
-    A related subroutine, SLASDA, computes only the singular values,
+    A related subroutine, DLASDA, computes only the singular values,
     and optionally, the singular vectors in compact form.
 
     Arguments
@@ -84315,22 +19589,22 @@ L10:
            = 0: The bidiagonal matrix has column dimension M = N;
            = 1: The bidiagonal matrix has column dimension M = N+1;
 
-    D      (input/output) REAL array, dimension (N)
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
            On entry D contains the main diagonal of the bidiagonal
            matrix.
            On exit D, if INFO = 0, contains its singular values.
 
-    E      (input) REAL array, dimension (M-1)
+    E      (input) DOUBLE PRECISION array, dimension (M-1)
            Contains the subdiagonal entries of the bidiagonal matrix.
            On exit, E has been destroyed.
 
-    U      (output) REAL array, dimension at least (LDQ, N)
+    U      (output) DOUBLE PRECISION array, dimension at least (LDQ, N)
            On exit, U contains the left singular vectors.
 
     LDU    (input) INTEGER
            On entry, leading dimension of U.
 
-    VT     (output) REAL array, dimension at least (LDVT, M)
+    VT     (output) DOUBLE PRECISION array, dimension at least (LDVT, M)
            On exit, VT' contains the right singular vectors.
 
     LDVT   (input) INTEGER
@@ -84343,7 +19617,7 @@ L10:
     IWORK  INTEGER work array.
            Dimension must be at least (8 * N)
 
-    WORK   REAL work array.
+    WORK   DOUBLE PRECISION work array.
            Dimension must be at least (3 * M**2 + 2 * M)
 
     INFO   (output) INTEGER
@@ -84381,7 +19655,7 @@ L10:
 
     if (*n < 0) {
 	*info = -1;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
+    } else if (*sqre < 0 || *sqre > 1) {
 	*info = -2;
     }
 
@@ -84396,14 +19670,14 @@ L10:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLASD0", &i__1);
+	xerbla_("DLASD0", &i__1);
 	return 0;
     }
 
-/*     If the input matrix is too small, call SLASDQ to find the SVD. */
+/*     If the input matrix is too small, call DLASDQ to find the SVD. */
 
     if (*n <= *smlsiz) {
-	slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset],
+	dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset],
 		ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info);
 	return 0;
     }
@@ -84415,12 +19689,12 @@ L10:
     ndimr = ndiml + *n;
     idxq = ndimr + *n;
     iwk = idxq + *n;
-    slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
 	    smlsiz);
 
 /*
        For the nodes on bottom level of the tree, solve
-       their subproblems by SLASDQ.
+       their subproblems by DLASDQ.
 */
 
     ndb1 = (nd + 1) / 2;
@@ -84445,7 +19719,7 @@ L10:
 	nlf = ic - nl;
 	nrf = ic + 1;
 	sqrei = 1;
-	slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[
+	dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[
 		nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[
 		nlf + nlf * u_dim1], ldu, &work[1], info);
 	if (*info != 0) {
@@ -84463,7 +19737,7 @@ L10:
 	    sqrei = 1;
 	}
 	nrp1 = nr + sqrei;
-	slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[
+	dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[
 		nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[
 		nrf + nrf * u_dim1], ldu, &work[1], info);
 	if (*info != 0) {
@@ -84493,7 +19767,7 @@ L10:
 	} else {
 	    i__1 = lvl - 1;
 	    lf = pow_ii(&c__2, &i__1);
-	    ll = ((lf) << (1)) - 1;
+	    ll = (lf << 1) - 1;
 	}
 	i__1 = ll;
 	for (i__ = lf; i__ <= i__1; ++i__) {
@@ -84510,7 +19784,7 @@ L10:
 	    idxqc = idxq + nlf - 1;
 	    alpha = d__[ic];
 	    beta = e[ic];
-	    slasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf *
+	    dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf *
 		     u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[
 		    idxqc], &iwork[iwk], &work[1], info);
 	    if (*info != 0) {
@@ -84523,36 +19797,38 @@ L10:
 
     return 0;
 
-/*     End of SLASD0 */
+/*     End of DLASD0 */
 
-} /* slasd0_ */
+} /* dlasd0_ */
 
-/* Subroutine */ int slasd1_(integer *nl, integer *nr, integer *sqre, real *
-	d__, real *alpha, real *beta, real *u, integer *ldu, real *vt,
-	integer *ldvt, integer *idxq, integer *iwork, real *work, integer *
-	info)
+/* Subroutine */ int dlasd1_(integer *nl, integer *nr, integer *sqre,
+	doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u,
+	integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer *
+	iwork, doublereal *work, integer *info)
 {
     /* System generated locals */
     integer u_dim1, u_offset, vt_dim1, vt_offset, i__1;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Local variables */
     static integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2,
 	    idxc, idxp, ldvt2;
-    extern /* Subroutine */ int slasd2_(integer *, integer *, integer *,
-	    integer *, real *, real *, real *, real *, real *, integer *,
-	    real *, integer *, real *, real *, integer *, real *, integer *,
-	    integer *, integer *, integer *, integer *, integer *, integer *),
-	     slasd3_(integer *, integer *, integer *, integer *, real *, real
-	    *, integer *, real *, real *, integer *, real *, integer *, real *
-	    , integer *, real *, integer *, integer *, integer *, real *,
-	    integer *);
+    extern /* Subroutine */ int dlasd2_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *), dlasd3_(
+	    integer *, integer *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, doublereal *, integer *),
+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, integer *, doublereal *, integer *, integer *),
+	     dlamrg_(integer *, integer *, doublereal *, integer *, integer *,
+	     integer *);
     static integer isigma;
-    extern /* Subroutine */ int xerbla_(char *, integer *), slascl_(
-	    char *, integer *, integer *, real *, real *, integer *, integer *
-	    , real *, integer *, integer *), slamrg_(integer *,
-	    integer *, real *, integer *, integer *, integer *);
-    static real orgnrm;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal orgnrm;
     static integer coltyp;
 
 
@@ -84566,13 +19842,13 @@ L10:
     Purpose
     =======
 
-    SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
-    where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0.
+    DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
+    where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0.
 
-    A related subroutine SLASD7 handles the case in which the singular
+    A related subroutine DLASD7 handles the case in which the singular
     values (and the singular vectors in factored form) are desired.
 
-    SLASD1 computes the SVD as follows:
+    DLASD1 computes the SVD as follows:
 
                   ( D1(in)  0    0     0 )
       B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
@@ -84592,12 +19868,12 @@ L10:
        when there are multiple singular values or when there are zeros in
        the Z vector.  For each such occurence the dimension of the
        secular equation problem is reduced by one.  This stage is
-       performed by the routine SLASD2.
+       performed by the routine DLASD2.
 
        The second stage consists of calculating the updated
        singular values. This is done by finding the square roots of the
-       roots of the secular equation via the routine SLASD4 (as called
-       by SLASD3). This routine also calculates the singular vectors of
+       roots of the secular equation via the routine DLASD4 (as called
+       by DLASD3). This routine also calculates the singular vectors of
        the current problem.
 
        The final stage consists of computing the updated singular vectors
@@ -84621,21 +19897,21 @@ L10:
            The bidiagonal matrix has row dimension N = NL + NR + 1,
            and column dimension M = N + SQRE.
 
-    D      (input/output) REAL array,
+    D      (input/output) DOUBLE PRECISION array,
                           dimension (N = NL+NR+1).
            On entry D(1:NL,1:NL) contains the singular values of the
            upper block; and D(NL+2:N) contains the singular values of
            the lower block. On exit D(1:N) contains the singular values
            of the modified matrix.
 
-    ALPHA  (input) REAL
+    ALPHA  (input) DOUBLE PRECISION
            Contains the diagonal element associated with the added row.
 
-    BETA   (input) REAL
+    BETA   (input) DOUBLE PRECISION
            Contains the off-diagonal element associated with the added
            row.
 
-    U      (input/output) REAL array, dimension(LDU,N)
+    U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
            On entry U(1:NL, 1:NL) contains the left singular vectors of
            the upper block; U(NL+2:N, NL+2:N) contains the left singular
            vectors of the lower block. On exit U contains the left
@@ -84644,7 +19920,7 @@ L10:
     LDU    (input) INTEGER
            The leading dimension of the array U.  LDU >= max( 1, N ).
 
-    VT     (input/output) REAL array, dimension(LDVT,M)
+    VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
            where M = N + SQRE.
            On entry VT(1:NL+1, 1:NL+1)' contains the right singular
            vectors of the upper block; VT(NL+2:M, NL+2:M)' contains
@@ -84662,7 +19938,7 @@ L10:
 
     IWORK  (workspace) INTEGER array, dimension( 4 * N )
 
-    WORK   (workspace) REAL array, dimension( 3*M**2 + 2*M )
+    WORK   (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M )
 
     INFO   (output) INTEGER
             = 0:  successful exit.
@@ -84701,12 +19977,12 @@ L10:
 	*info = -1;
     } else if (*nr < 1) {
 	*info = -2;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
+    } else if (*sqre < 0 || *sqre > 1) {
 	*info = -3;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLASD1", &i__1);
+	xerbla_("DLASD1", &i__1);
 	return 0;
     }
 
@@ -84716,7 +19992,7 @@ L10:
 /*
        The following values are for bookkeeping purposes only.  They are
        integer pointers which indicate the portion of the workspace
-       used by a particular array in SLASD2 and SLASD3.
+       used by a particular array in DLASD2 and DLASD3.
 */
 
     ldu2 = n;
@@ -84738,24 +20014,23 @@ L10:
 
    Computing MAX
 */
-    r__1 = dabs(*alpha), r__2 = dabs(*beta);
-    orgnrm = dmax(r__1,r__2);
-    d__[*nl + 1] = 0.f;
+    d__1 = abs(*alpha), d__2 = abs(*beta);
+    orgnrm = max(d__1,d__2);
+    d__[*nl + 1] = 0.;
     i__1 = n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) {
-	    orgnrm = (r__1 = d__[i__], dabs(r__1));
+	if ((d__1 = d__[i__], abs(d__1)) > orgnrm) {
+	    orgnrm = (d__1 = d__[i__], abs(d__1));
 	}
 /* L10: */
     }
-    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &n, &c__1, &d__[1], &n,
-	    info);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &n, &c__1, &d__[1], &n, info);
     *alpha /= orgnrm;
     *beta /= orgnrm;
 
 /*     Deflate singular values. */
 
-    slasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset],
+    dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset],
 	    ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, &
 	    work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], &
 	    idxq[1], &iwork[coltyp], info);
@@ -84763,7 +20038,7 @@ L10:
 /*     Solve Secular Equation and update singular vectors. */
 
     ldq = k;
-    slasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[
+    dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[
 	    u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[
 	    ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info);
     if (*info != 0) {
@@ -84772,53 +20047,54 @@ L10:
 
 /*     Unscale. */
 
-    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, &n, &c__1, &d__[1], &n,
-	    info);
+    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &n, &c__1, &d__[1], &n, info);
 
 /*     Prepare the IDXQ sorting permutation. */
 
     n1 = k;
     n2 = n - k;
-    slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
+    dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
 
     return 0;
 
-/*     End of SLASD1 */
+/*     End of DLASD1 */
 
-} /* slasd1_ */
+} /* dlasd1_ */
 
-/* Subroutine */ int slasd2_(integer *nl, integer *nr, integer *sqre, integer
-	*k, real *d__, real *z__, real *alpha, real *beta, real *u, integer *
-	ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2,
-	real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc,
-	 integer *idxq, integer *coltyp, integer *info)
+/* Subroutine */ int dlasd2_(integer *nl, integer *nr, integer *sqre, integer
+	*k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal *
+	beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt,
+	doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2,
+	integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *
+	idxq, integer *coltyp, integer *info)
 {
     /* System generated locals */
     integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset,
 	    vt2_dim1, vt2_offset, i__1;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Local variables */
-    static real c__;
+    static doublereal c__;
     static integer i__, j, m, n;
-    static real s;
+    static doublereal s;
     static integer k2;
-    static real z1;
+    static doublereal z1;
     static integer ct, jp;
-    static real eps, tau, tol;
-    static integer psm[4], nlp1, nlp2, idxi, idxj, ctot[4];
-    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
-	    integer *, real *, real *);
-    static integer idxjp, jprev;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+    static doublereal eps, tau, tol;
+    static integer psm[4], nlp1, nlp2, idxi, idxj;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer ctot[4], idxjp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer jprev;
+
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *), xerbla_(char *,
 	    integer *);
-    extern doublereal slapy2_(real *, real *), slamch_(char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
-	    integer *, integer *, real *, integer *, integer *, integer *);
-    static real hlftol;
-    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
-	    integer *, real *, integer *), slaset_(char *, integer *,
-	    integer *, real *, real *, real *, integer *);
+    static doublereal hlftol;
 
 
 /*
@@ -84831,14 +20107,14 @@ L10:
     Purpose
     =======
 
-    SLASD2 merges the two sets of singular values together into a single
+    DLASD2 merges the two sets of singular values together into a single
     sorted set.  Then it tries to deflate the size of the problem.
     There are two ways in which deflation can occur:  when two or more
     singular values are close together or if there is a tiny entry in the
     Z vector.  For each such occurrence the order of the related secular
     equation problem is reduced by one.
 
-    SLASD2 is called from SLASD1.
+    DLASD2 is called from DLASD1.
 
     Arguments
     =========
@@ -84860,20 +20136,20 @@ L10:
            Contains the dimension of the non-deflated matrix,
            This is the order of the related secular equation. 1 <= K <=N.
 
-    D      (input/output) REAL array, dimension(N)
+    D      (input/output) DOUBLE PRECISION array, dimension(N)
            On entry D contains the singular values of the two submatrices
            to be combined.  On exit D contains the trailing (N-K) updated
            singular values (those which were deflated) sorted into
            increasing order.
 
-    ALPHA  (input) REAL
+    ALPHA  (input) DOUBLE PRECISION
            Contains the diagonal element associated with the added row.
 
-    BETA   (input) REAL
+    BETA   (input) DOUBLE PRECISION
            Contains the off-diagonal element associated with the added
            row.
 
-    U      (input/output) REAL array, dimension(LDU,N)
+    U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
            On entry U contains the left singular vectors of two
            submatrices in the two square blocks with corners at (1,1),
            (NL, NL), and (NL+2, NL+2), (N,N).
@@ -84883,17 +20159,17 @@ L10:
     LDU    (input) INTEGER
            The leading dimension of the array U.  LDU >= N.
 
-    Z      (output) REAL array, dimension(N)
+    Z      (output) DOUBLE PRECISION array, dimension(N)
            On exit Z contains the updating row vector in the secular
            equation.
 
-    DSIGMA (output) REAL array, dimension (N)
+    DSIGMA (output) DOUBLE PRECISION array, dimension (N)
            Contains a copy of the diagonal elements (K-1 singular values
            and one zero) in the secular equation.
 
-    U2     (output) REAL array, dimension(LDU2,N)
+    U2     (output) DOUBLE PRECISION array, dimension(LDU2,N)
            Contains a copy of the first K-1 left singular vectors which
-           will be used by SLASD3 in a matrix multiply (SGEMM) to solve
+           will be used by DLASD3 in a matrix multiply (DGEMM) to solve
            for the new left singular vectors. U2 is arranged into four
            blocks. The first block contains a column with 1 at NL+1 and
            zero everywhere else; the second block contains non-zero
@@ -84903,7 +20179,7 @@ L10:
     LDU2   (input) INTEGER
            The leading dimension of the array U2.  LDU2 >= N.
 
-    VT     (input/output) REAL array, dimension(LDVT,M)
+    VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
            On entry VT' contains the right singular vectors of two
            submatrices in the two square blocks with corners at (1,1),
            (NL+1, NL+1), and (NL+2, NL+2), (M,M).
@@ -84915,9 +20191,9 @@ L10:
     LDVT   (input) INTEGER
            The leading dimension of the array VT.  LDVT >= M.
 
-    VT2    (output) REAL array, dimension(LDVT2,N)
+    VT2    (output) DOUBLE PRECISION array, dimension(LDVT2,N)
            VT2' contains a copy of the first K right singular vectors
-           which will be used by SLASD3 in a matrix multiply (SGEMM) to
+           which will be used by DLASD3 in a matrix multiply (DGEMM) to
            solve for the new right singular vectors. VT2 is arranged into
            three blocks. The first block contains a row that corresponds
            to the special 0 diagonal element in SIGMA; the second block
@@ -85027,7 +20303,7 @@ L10:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLASD2", &i__1);
+	xerbla_("DLASD2", &i__1);
 	return 0;
     }
 
@@ -85090,7 +20366,7 @@ L10:
 /* L60: */
     }
 
-    slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
+    dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
 
     i__1 = n;
     for (i__ = 2; i__ <= i__1; ++i__) {
@@ -85103,13 +20379,13 @@ L10:
 
 /*     Calculate the allowable deflation tolerance */
 
-    eps = slamch_("Epsilon");
+    eps = EPSILON;
 /* Computing MAX */
-    r__1 = dabs(*alpha), r__2 = dabs(*beta);
-    tol = dmax(r__1,r__2);
+    d__1 = abs(*alpha), d__2 = abs(*beta);
+    tol = max(d__1,d__2);
 /* Computing MAX */
-    r__2 = (r__1 = d__[n], dabs(r__1));
-    tol = eps * 8.f * dmax(r__2,tol);
+    d__2 = (d__1 = d__[n], abs(d__1));
+    tol = eps * 8. * max(d__2,tol);
 
 /*
        There are 2 kinds of deflation -- first a value in the z-vector
@@ -85136,7 +20412,7 @@ L10:
     k2 = n + 1;
     i__1 = n;
     for (j = 2; j <= i__1; ++j) {
-	if ((r__1 = z__[j], dabs(r__1)) <= tol) {
+	if ((d__1 = z__[j], abs(d__1)) <= tol) {
 
 /*           Deflate due to small z component. */
 
@@ -85159,7 +20435,7 @@ L100:
     if (j > n) {
 	goto L110;
     }
-    if ((r__1 = z__[j], dabs(r__1)) <= tol) {
+    if ((d__1 = z__[j], abs(d__1)) <= tol) {
 
 /*        Deflate due to small z component. */
 
@@ -85170,7 +20446,7 @@ L100:
 
 /*        Check if singular values are close enough to allow deflation. */
 
-	if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) {
+	if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) {
 
 /*           Deflation is possible. */
 
@@ -85182,11 +20458,11 @@ L100:
              destructive underflow.
 */
 
-	    tau = slapy2_(&c__, &s);
+	    tau = dlapy2_(&c__, &s);
 	    c__ /= tau;
 	    s = -s / tau;
 	    z__[j] = tau;
-	    z__[jprev] = 0.f;
+	    z__[jprev] = 0.;
 
 /*
              Apply back the Givens rotation to the left and right
@@ -85201,9 +20477,9 @@ L100:
 	    if (idxj <= nlp1) {
 		--idxj;
 	    }
-	    srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &
+	    drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &
 		    c__1, &c__, &s);
-	    srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &
+	    drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &
 		    c__, &s);
 	    if (coltyp[j] != coltyp[jprev]) {
 		coltyp[j] = 3;
@@ -85290,30 +20566,30 @@ L120:
 	if (idxj <= nlp1) {
 	    --idxj;
 	}
-	scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
-	scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
+	dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
+	dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
 /* L160: */
     }
 
 /*     Determine DSIGMA(1), DSIGMA(2) and Z(1) */
 
-    dsigma[1] = 0.f;
-    hlftol = tol / 2.f;
-    if (dabs(dsigma[2]) <= hlftol) {
+    dsigma[1] = 0.;
+    hlftol = tol / 2.;
+    if (abs(dsigma[2]) <= hlftol) {
 	dsigma[2] = hlftol;
     }
     if (m > n) {
-	z__[1] = slapy2_(&z1, &z__[m]);
+	z__[1] = dlapy2_(&z1, &z__[m]);
 	if (z__[1] <= tol) {
-	    c__ = 1.f;
-	    s = 0.f;
+	    c__ = 1.;
+	    s = 0.;
 	    z__[1] = tol;
 	} else {
 	    c__ = z1 / z__[1];
 	    s = z__[m] / z__[1];
 	}
     } else {
-	if (dabs(z1) <= tol) {
+	if (abs(z1) <= tol) {
 	    z__[1] = tol;
 	} else {
 	    z__[1] = z1;
@@ -85323,16 +20599,15 @@ L120:
 /*     Move the rest of the updating row to Z. */
 
     i__1 = *k - 1;
-    scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);
+    dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);
 
 /*
        Determine the first column of U2, the first row of VT2 and the
        last row of VT.
 */
 
-    slaset_("A", &n, &c__1, &c_b320, &c_b320, &u2[u2_offset], ldu2)
-	    ;
-    u2[nlp1 + u2_dim1] = 1.f;
+    dlaset_("A", &n, &c__1, &c_b29, &c_b29, &u2[u2_offset], ldu2);
+    u2[nlp1 + u2_dim1] = 1.;
     if (m > n) {
 	i__1 = nlp1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
@@ -85347,10 +20622,10 @@ L120:
 /* L180: */
 	}
     } else {
-	scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
+	dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
     }
     if (m > n) {
-	scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
+	dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
     }
 
 /*
@@ -85360,16 +20635,16 @@ L120:
 
     if (n > *k) {
 	i__1 = n - *k;
-	scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
 	i__1 = n - *k;
-	slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1)
+	dlacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1)
 		 * u_dim1 + 1], ldu);
 	i__1 = n - *k;
-	slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 +
+	dlacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 +
 		vt_dim1], ldvt);
     }
 
-/*     Copy CTOT into COLTYP for referencing in SLASD3. */
+/*     Copy CTOT into COLTYP for referencing in DLASD3. */
 
     for (j = 1; j <= 4; ++j) {
 	coltyp[j] = ctot[j - 1];
@@ -85378,43 +20653,45 @@ L120:
 
     return 0;
 
-/*     End of SLASD2 */
+/*     End of DLASD2 */
 
-} /* slasd2_ */
+} /* dlasd2_ */
 
-/* Subroutine */ int slasd3_(integer *nl, integer *nr, integer *sqre, integer
-	*k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer *
-	ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2,
-	integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer *
-	info)
+/* Subroutine */ int dlasd3_(integer *nl, integer *nr, integer *sqre, integer
+	*k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma,
+	doublereal *u, integer *ldu, doublereal *u2, integer *ldu2,
+	doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2,
+	integer *idxc, integer *ctot, doublereal *z__, integer *info)
 {
     /* System generated locals */
     integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1,
 	    vt_offset, vt2_dim1, vt2_offset, i__1, i__2;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Builtin functions */
-    double sqrt(doublereal), r_sign(real *, real *);
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
 
     /* Local variables */
     static integer i__, j, m, n, jc;
-    static real rho;
+    static doublereal rho;
     static integer nlp1, nlp2, nrp1;
-    static real temp;
-    extern doublereal snrm2_(integer *, real *, integer *);
+    static doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
     static integer ctemp;
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
     static integer ktemp;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *);
-    extern doublereal slamc3_(real *, real *);
-    extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *,
-	    real *, real *, real *, real *, integer *), xerbla_(char *,
-	    integer *), slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *,
-	    real *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *), dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlacpy_(char *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, integer *),
+	    xerbla_(char *, integer *);
 
 
 /*
@@ -85427,9 +20704,9 @@ L120:
     Purpose
     =======
 
-    SLASD3 finds all the square roots of the roots of the secular
+    DLASD3 finds all the square roots of the roots of the secular
     equation, as defined by the values in D and Z.  It makes the
-    appropriate calls to SLASD4 and then updates the singular
+    appropriate calls to DLASD4 and then updates the singular
     vectors by matrix multiplication.
 
     This code makes very mild assumptions about floating point
@@ -85439,7 +20716,7 @@ L120:
     It could conceivably fail on hexadecimal or decimal machines
     without guard digits, but we know of none.
 
-    SLASD3 is called from SLASD1.
+    DLASD3 is called from DLASD1.
 
     Arguments
     =========
@@ -85460,43 +20737,43 @@ L120:
     K      (input) INTEGER
            The size of the secular equation, 1 =< K = < N.
 
-    D      (output) REAL array, dimension(K)
+    D      (output) DOUBLE PRECISION array, dimension(K)
            On exit the square roots of the roots of the secular equation,
            in ascending order.
 
-    Q      (workspace) REAL array,
+    Q      (workspace) DOUBLE PRECISION array,
                        dimension at least (LDQ,K).
 
     LDQ    (input) INTEGER
            The leading dimension of the array Q.  LDQ >= K.
 
-    DSIGMA (input) REAL array, dimension(K)
+    DSIGMA (input) DOUBLE PRECISION array, dimension(K)
            The first K elements of this array contain the old roots
            of the deflated updating problem.  These are the poles
            of the secular equation.
 
-    U      (input) REAL array, dimension (LDU, N)
+    U      (input) DOUBLE PRECISION array, dimension (LDU, N)
            The last N - K columns of this matrix contain the deflated
            left singular vectors.
 
     LDU    (input) INTEGER
            The leading dimension of the array U.  LDU >= N.
 
-    U2     (input) REAL array, dimension (LDU2, N)
+    U2     (input) DOUBLE PRECISION array, dimension (LDU2, N)
            The first K columns of this matrix contain the non-deflated
            left singular vectors for the split problem.
 
     LDU2   (input) INTEGER
            The leading dimension of the array U2.  LDU2 >= N.
 
-    VT     (input) REAL array, dimension (LDVT, M)
+    VT     (input) DOUBLE PRECISION array, dimension (LDVT, M)
            The last M - K columns of VT' contain the deflated
            right singular vectors.
 
     LDVT   (input) INTEGER
            The leading dimension of the array VT.  LDVT >= N.
 
-    VT2    (input) REAL array, dimension (LDVT2, N)
+    VT2    (input) DOUBLE PRECISION array, dimension (LDVT2, N)
            The first K columns of VT2' contain the non-deflated
            right singular vectors for the split problem.
 
@@ -85511,7 +20788,7 @@ L120:
            and the third is dense. The first column of U and the row of
            VT are treated separately, however.
 
-           The rows of the singular vectors found by SLASD4
+           The rows of the singular vectors found by DLASD4
            must be likewise permuted before the matrix multiplies can
            take place.
 
@@ -85520,7 +20797,7 @@ L120:
            in U (or rows in VT), as described in IDXC. The fourth column
            type is any column which has been deflated.
 
-    Z      (input) REAL array, dimension (K)
+    Z      (input) DOUBLE PRECISION array, dimension (K)
            The first K elements of this array contain the components
            of the deflation-adjusted updating row vector.
 
@@ -85580,7 +20857,7 @@ L120:
     nlp1 = *nl + 1;
     nlp2 = *nl + 2;
 
-    if ((*k < 1) || (*k > n)) {
+    if (*k < 1 || *k > n) {
 	*info = -4;
     } else if (*ldq < *k) {
 	*info = -7;
@@ -85595,17 +20872,17 @@ L120:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLASD3", &i__1);
+	xerbla_("DLASD3", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
     if (*k == 1) {
-	d__[1] = dabs(z__[1]);
-	scopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
-	if (z__[1] > 0.f) {
-	    scopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
+	d__[1] = abs(z__[1]);
+	dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
+	if (z__[1] > 0.) {
+	    dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
 	} else {
 	    i__1 = n;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
@@ -85637,25 +20914,25 @@ L120:
 
     i__1 = *k;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
+	dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
 /* L20: */
     }
 
 /*     Keep a copy of Z. */
 
-    scopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);
+    dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);
 
 /*     Normalize Z. */
 
-    rho = snrm2_(k, &z__[1], &c__1);
-    slascl_("G", &c__0, &c__0, &rho, &c_b1011, k, &c__1, &z__[1], k, info);
+    rho = dnrm2_(k, &z__[1], &c__1);
+    dlascl_("G", &c__0, &c__0, &rho, &c_b15, k, &c__1, &z__[1], k, info);
     rho *= rho;
 
 /*     Find the new singular values. */
 
     i__1 = *k;
     for (j = 1; j <= i__1; ++j) {
-	slasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j],
+	dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j],
 		 &vt[j * vt_dim1 + 1], info);
 
 /*        If the zero finder fails, the computation is terminated. */
@@ -85683,8 +20960,8 @@ L120:
 		    i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
 /* L50: */
 	}
-	r__2 = sqrt((r__1 = z__[i__], dabs(r__1)));
-	z__[i__] = r_sign(&r__2, &q[i__ + q_dim1]);
+	d__2 = sqrt((d__1 = z__[i__], abs(d__1)));
+	z__[i__] = d_sign(&d__2, &q[i__ + q_dim1]);
 /* L60: */
     }
 
@@ -85697,7 +20974,7 @@ L120:
     for (i__ = 1; i__ <= i__1; ++i__) {
 	vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ *
 		vt_dim1 + 1];
-	u[i__ * u_dim1 + 1] = -1.f;
+	u[i__ * u_dim1 + 1] = -1.;
 	i__2 = *k;
 	for (j = 2; j <= i__2; ++j) {
 	    vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__
@@ -85705,7 +20982,7 @@ L120:
 	    u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1];
 /* L70: */
 	}
-	temp = snrm2_(k, &u[i__ * u_dim1 + 1], &c__1);
+	temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1);
 	q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp;
 	i__2 = *k;
 	for (j = 2; j <= i__2; ++j) {
@@ -85719,38 +20996,38 @@ L120:
 /*     Update the left singular vector matrix. */
 
     if (*k == 2) {
-	sgemm_("N", "N", &n, k, k, &c_b1011, &u2[u2_offset], ldu2, &q[
-		q_offset], ldq, &c_b320, &u[u_offset], ldu);
+	dgemm_("N", "N", &n, k, k, &c_b15, &u2[u2_offset], ldu2, &q[q_offset],
+		 ldq, &c_b29, &u[u_offset], ldu);
 	goto L100;
     }
     if (ctot[1] > 0) {
-	sgemm_("N", "N", nl, k, &ctot[1], &c_b1011, &u2[((u2_dim1) << (1)) +
-		1], ldu2, &q[q_dim1 + 2], ldq, &c_b320, &u[u_dim1 + 1], ldu);
+	dgemm_("N", "N", nl, k, &ctot[1], &c_b15, &u2[(u2_dim1 << 1) + 1],
+		ldu2, &q[q_dim1 + 2], ldq, &c_b29, &u[u_dim1 + 1], ldu);
 	if (ctot[3] > 0) {
 	    ktemp = ctot[1] + 2 + ctot[2];
-	    sgemm_("N", "N", nl, k, &ctot[3], &c_b1011, &u2[ktemp * u2_dim1 +
-		    1], ldu2, &q[ktemp + q_dim1], ldq, &c_b1011, &u[u_dim1 +
-		    1], ldu);
+	    dgemm_("N", "N", nl, k, &ctot[3], &c_b15, &u2[ktemp * u2_dim1 + 1]
+		    , ldu2, &q[ktemp + q_dim1], ldq, &c_b15, &u[u_dim1 + 1],
+		    ldu);
 	}
     } else if (ctot[3] > 0) {
 	ktemp = ctot[1] + 2 + ctot[2];
-	sgemm_("N", "N", nl, k, &ctot[3], &c_b1011, &u2[ktemp * u2_dim1 + 1],
-		ldu2, &q[ktemp + q_dim1], ldq, &c_b320, &u[u_dim1 + 1], ldu);
+	dgemm_("N", "N", nl, k, &ctot[3], &c_b15, &u2[ktemp * u2_dim1 + 1],
+		ldu2, &q[ktemp + q_dim1], ldq, &c_b29, &u[u_dim1 + 1], ldu);
     } else {
-	slacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
+	dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
     }
-    scopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
+    dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
     ktemp = ctot[1] + 2;
     ctemp = ctot[2] + ctot[3];
-    sgemm_("N", "N", nr, k, &ctemp, &c_b1011, &u2[nlp2 + ktemp * u2_dim1],
-	    ldu2, &q[ktemp + q_dim1], ldq, &c_b320, &u[nlp2 + u_dim1], ldu);
+    dgemm_("N", "N", nr, k, &ctemp, &c_b15, &u2[nlp2 + ktemp * u2_dim1], ldu2,
+	     &q[ktemp + q_dim1], ldq, &c_b29, &u[nlp2 + u_dim1], ldu);
 
 /*     Generate the right singular vectors. */
 
 L100:
     i__1 = *k;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	temp = snrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1);
+	temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1);
 	q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp;
 	i__2 = *k;
 	for (j = 2; j <= i__2; ++j) {
@@ -85764,18 +21041,18 @@ L100:
 /*     Update the right singular vector matrix. */
 
     if (*k == 2) {
-	sgemm_("N", "N", k, &m, k, &c_b1011, &q[q_offset], ldq, &vt2[
-		vt2_offset], ldvt2, &c_b320, &vt[vt_offset], ldvt);
+	dgemm_("N", "N", k, &m, k, &c_b15, &q[q_offset], ldq, &vt2[vt2_offset]
+		, ldvt2, &c_b29, &vt[vt_offset], ldvt);
 	return 0;
     }
     ktemp = ctot[1] + 1;
-    sgemm_("N", "N", k, &nlp1, &ktemp, &c_b1011, &q[q_dim1 + 1], ldq, &vt2[
-	    vt2_dim1 + 1], ldvt2, &c_b320, &vt[vt_dim1 + 1], ldvt);
+    dgemm_("N", "N", k, &nlp1, &ktemp, &c_b15, &q[q_dim1 + 1], ldq, &vt2[
+	    vt2_dim1 + 1], ldvt2, &c_b29, &vt[vt_dim1 + 1], ldvt);
     ktemp = ctot[1] + 2 + ctot[2];
     if (ktemp <= *ldvt2) {
-	sgemm_("N", "N", k, &nlp1, &ctot[3], &c_b1011, &q[ktemp * q_dim1 + 1],
-		 ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b1011, &vt[vt_dim1 +
-		1], ldvt);
+	dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b15, &q[ktemp * q_dim1 + 1],
+		ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b15, &vt[vt_dim1 + 1],
+		ldvt);
     }
 
     ktemp = ctot[1] + 1;
@@ -85793,52 +21070,53 @@ L100:
 	}
     }
     ctemp = ctot[2] + 1 + ctot[3];
-    sgemm_("N", "N", k, &nrp1, &ctemp, &c_b1011, &q[ktemp * q_dim1 + 1], ldq,
-	    &vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b320, &vt[nlp2 * vt_dim1
-	    + 1], ldvt);
+    dgemm_("N", "N", k, &nrp1, &ctemp, &c_b15, &q[ktemp * q_dim1 + 1], ldq, &
+	    vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b29, &vt[nlp2 * vt_dim1 +
+	    1], ldvt);
 
     return 0;
 
-/*     End of SLASD3 */
+/*     End of DLASD3 */
 
-} /* slasd3_ */
+} /* dlasd3_ */
 
-/* Subroutine */ int slasd4_(integer *n, integer *i__, real *d__, real *z__,
-	real *delta, real *rho, real *sigma, real *work, integer *info)
+/* Subroutine */ int dlasd4_(integer *n, integer *i__, doublereal *d__,
+	doublereal *z__, doublereal *delta, doublereal *rho, doublereal *
+	sigma, doublereal *work, integer *info)
 {
     /* System generated locals */
     integer i__1;
-    real r__1;
+    doublereal d__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static real a, b, c__;
+    static doublereal a, b, c__;
     static integer j;
-    static real w, dd[3];
+    static doublereal w, dd[3];
     static integer ii;
-    static real dw, zz[3];
+    static doublereal dw, zz[3];
     static integer ip1;
-    static real eta, phi, eps, tau, psi;
+    static doublereal eta, phi, eps, tau, psi;
     static integer iim1, iip1;
-    static real dphi, dpsi;
+    static doublereal dphi, dpsi;
     static integer iter;
-    static real temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, dtiip;
+    static doublereal temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq,
+	    dtiip;
     static integer niter;
-    static real dtisq;
+    static doublereal dtisq;
     static logical swtch;
-    static real dtnsq;
-    extern /* Subroutine */ int slaed6_(integer *, logical *, real *, real *,
-	    real *, real *, real *, integer *);
-    static real delsq2;
-    extern /* Subroutine */ int slasd5_(integer *, real *, real *, real *,
-	    real *, real *, real *);
-    static real dtnsq1;
+    static doublereal dtnsq;
+    extern /* Subroutine */ int dlaed6_(integer *, logical *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *)
+	    , dlasd5_(integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
+    static doublereal delsq2, dtnsq1;
     static logical swtch3;
-    extern doublereal slamch_(char *);
+
     static logical orgati;
-    static real erretm, dtipsq, rhoinv;
+    static doublereal erretm, dtipsq, rhoinv;
 
 
 /*
@@ -85877,26 +21155,26 @@ L100:
     I      (input) INTEGER
            The index of the eigenvalue to be computed.  1 <= I <= N.
 
-    D      (input) REAL array, dimension ( N )
+    D      (input) DOUBLE PRECISION array, dimension ( N )
            The original eigenvalues.  It is assumed that they are in
            order, 0 <= D(I) < D(J)  for I < J.
 
-    Z      (input) REAL array, dimension ( N )
+    Z      (input) DOUBLE PRECISION array, dimension ( N )
            The components of the updating vector.
 
-    DELTA  (output) REAL array, dimension ( N )
+    DELTA  (output) DOUBLE PRECISION array, dimension ( N )
            If N .ne. 1, DELTA contains (D(j) - sigma_I) in its  j-th
            component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
            contains the information necessary to construct the
            (singular) eigenvectors.
 
-    RHO    (input) REAL
+    RHO    (input) DOUBLE PRECISION
            The scalar in the symmetric updating formula.
 
-    SIGMA  (output) REAL
+    SIGMA  (output) DOUBLE PRECISION
            The computed lambda_I, the I-th updated eigenvalue.
 
-    WORK   (workspace) REAL array, dimension ( N )
+    WORK   (workspace) DOUBLE PRECISION array, dimension ( N )
            If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th
            component.  If N = 1, then WORK( 1 ) = 1.
 
@@ -85948,19 +21226,19 @@ L100:
 /*        Presumably, I=1 upon entry */
 
 	*sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]);
-	delta[1] = 1.f;
-	work[1] = 1.f;
+	delta[1] = 1.;
+	work[1] = 1.;
 	return 0;
     }
     if (*n == 2) {
-	slasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]);
+	dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]);
 	return 0;
     }
 
 /*     Compute machine epsilon */
 
-    eps = slamch_("Epsilon");
-    rhoinv = 1.f / *rho;
+    eps = EPSILON;
+    rhoinv = 1. / *rho;
 
 /*     The case I = N */
 
@@ -85973,7 +21251,7 @@ L100:
 
 /*        Calculate initial guess */
 
-	temp = *rho / 2.f;
+	temp = *rho / 2.;
 
 /*
           If ||Z||_2 is not one, then TEMP should be set to
@@ -85988,7 +21266,7 @@ L100:
 /* L10: */
 	}
 
-	psi = 0.f;
+	psi = 0.;
 	i__1 = *n - 2;
 	for (j = 1; j <= i__1; ++j) {
 	    psi += z__[j] * z__[j] / (delta[j] * work[j]);
@@ -85999,7 +21277,7 @@ L100:
 	w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[*
 		n] / (delta[*n] * work[*n]);
 
-	if (w <= 0.f) {
+	if (w <= 0.) {
 	    temp1 = sqrt(d__[*n] * d__[*n] + *rho);
 	    temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[*
 		    n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] *
@@ -86017,10 +21295,10 @@ L100:
 		a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*
 			n];
 		b = z__[*n] * z__[*n] * delsq;
-		if (a < 0.f) {
-		    tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
+		if (a < 0.) {
+		    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
 		} else {
-		    tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
+		    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
 		}
 	    }
 
@@ -86039,10 +21317,10 @@ L100:
              SIGMA_n^2 - D( N )*D( N )
 */
 
-	    if (a < 0.f) {
-		tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
+	    if (a < 0.) {
+		tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
 	    } else {
-		tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
+		tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
 	    }
 
 /*
@@ -86066,9 +21344,9 @@ L100:
 
 /*        Evaluate PSI and the derivative DPSI */
 
-	dpsi = 0.f;
-	psi = 0.f;
-	erretm = 0.f;
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
 	i__1 = ii;
 	for (j = 1; j <= i__1; ++j) {
 	    temp = z__[j] / (delta[j] * work[j]);
@@ -86077,21 +21355,21 @@ L100:
 	    erretm += psi;
 /* L40: */
 	}
-	erretm = dabs(erretm);
+	erretm = abs(erretm);
 
 /*        Evaluate PHI and the derivative DPHI */
 
 	temp = z__[*n] / (delta[*n] * work[*n]);
 	phi = z__[*n] * temp;
 	dphi = temp * temp;
-	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
-		dpsi + dphi);
+	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
+		+ dphi);
 
 	w = rhoinv + phi + psi;
 
 /*        Test for convergence */
 
-	if (dabs(w) <= eps * erretm) {
+	if (abs(w) <= eps * erretm) {
 	    goto L240;
 	}
 
@@ -86103,17 +21381,17 @@ L100:
 	c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
 	a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi);
 	b = dtnsq * dtnsq1 * w;
-	if (c__ < 0.f) {
-	    c__ = dabs(c__);
+	if (c__ < 0.) {
+	    c__ = abs(c__);
 	}
-	if (c__ == 0.f) {
+	if (c__ == 0.) {
 	    eta = *rho - *sigma * *sigma;
-	} else if (a >= 0.f) {
-	    eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
-		    c__ * 2.f);
+	} else if (a >= 0.) {
+	    eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
+		    * 2.);
 	} else {
-	    eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
-		    r__1))));
+	    eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
+		    );
 	}
 
 /*
@@ -86124,7 +21402,7 @@ L100:
           will guarantee eta*w < 0.
 */
 
-	if (w * eta > 0.f) {
+	if (w * eta > 0.) {
 	    eta = -w / (dpsi + dphi);
 	}
 	temp = eta - dtnsq;
@@ -86145,9 +21423,9 @@ L100:
 
 /*        Evaluate PSI and the derivative DPSI */
 
-	dpsi = 0.f;
-	psi = 0.f;
-	erretm = 0.f;
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
 	i__1 = ii;
 	for (j = 1; j <= i__1; ++j) {
 	    temp = z__[j] / (work[j] * delta[j]);
@@ -86156,15 +21434,15 @@ L100:
 	    erretm += psi;
 /* L60: */
 	}
-	erretm = dabs(erretm);
+	erretm = abs(erretm);
 
 /*        Evaluate PHI and the derivative DPHI */
 
 	temp = z__[*n] / (work[*n] * delta[*n]);
 	phi = z__[*n] * temp;
 	dphi = temp * temp;
-	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
-		dpsi + dphi);
+	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
+		+ dphi);
 
 	w = rhoinv + phi + psi;
 
@@ -86176,7 +21454,7 @@ L100:
 
 /*           Test for convergence */
 
-	    if (dabs(w) <= eps * erretm) {
+	    if (abs(w) <= eps * erretm) {
 		goto L240;
 	    }
 
@@ -86187,12 +21465,12 @@ L100:
 	    c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
 	    a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi);
 	    b = dtnsq1 * dtnsq * w;
-	    if (a >= 0.f) {
-		eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
-			 (c__ * 2.f);
+	    if (a >= 0.) {
+		eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
 	    } else {
-		eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
-			r__1))));
+		eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
 	    }
 
 /*
@@ -86203,12 +21481,12 @@ L100:
              will guarantee eta*w < 0.
 */
 
-	    if (w * eta > 0.f) {
+	    if (w * eta > 0.) {
 		eta = -w / (dpsi + dphi);
 	    }
 	    temp = eta - dtnsq;
-	    if (temp <= 0.f) {
-		eta /= 2.f;
+	    if (temp <= 0.) {
+		eta /= 2.;
 	    }
 
 	    tau += eta;
@@ -86224,9 +21502,9 @@ L100:
 
 /*           Evaluate PSI and the derivative DPSI */
 
-	    dpsi = 0.f;
-	    psi = 0.f;
-	    erretm = 0.f;
+	    dpsi = 0.;
+	    psi = 0.;
+	    erretm = 0.;
 	    i__1 = ii;
 	    for (j = 1; j <= i__1; ++j) {
 		temp = z__[j] / (work[j] * delta[j]);
@@ -86235,15 +21513,15 @@ L100:
 		erretm += psi;
 /* L80: */
 	    }
-	    erretm = dabs(erretm);
+	    erretm = abs(erretm);
 
 /*           Evaluate PHI and the derivative DPHI */
 
 	    temp = z__[*n] / (work[*n] * delta[*n]);
 	    phi = z__[*n] * temp;
 	    dphi = temp * temp;
-	    erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) *
-		    (dpsi + dphi);
+	    erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
+		    dpsi + dphi);
 
 	    w = rhoinv + phi + psi;
 /* L90: */
@@ -86266,7 +21544,7 @@ L100:
 /*        Calculate initial guess */
 
 	delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]);
-	delsq2 = delsq / 2.f;
+	delsq2 = delsq / 2.;
 	temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2));
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
@@ -86275,14 +21553,14 @@ L100:
 /* L100: */
 	}
 
-	psi = 0.f;
+	psi = 0.;
 	i__1 = *i__ - 1;
 	for (j = 1; j <= i__1; ++j) {
 	    psi += z__[j] * z__[j] / (work[j] * delta[j]);
 /* L110: */
 	}
 
-	phi = 0.f;
+	phi = 0.;
 	i__1 = *i__ + 2;
 	for (j = *n; j >= i__1; --j) {
 	    phi += z__[j] * z__[j] / (work[j] * delta[j]);
@@ -86292,7 +21570,7 @@ L100:
 	w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[
 		ip1] * z__[ip1] / (work[ip1] * delta[ip1]);
 
-	if (w > 0.f) {
+	if (w > 0.) {
 
 /*
              d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
@@ -86301,16 +21579,16 @@ L100:
 */
 
 	    orgati = TRUE_;
-	    sg2lb = 0.f;
+	    sg2lb = 0.;
 	    sg2ub = delsq2;
 	    a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
 	    b = z__[*i__] * z__[*i__] * delsq;
-	    if (a > 0.f) {
-		tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
-			r__1))));
+	    if (a > 0.) {
+		tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
 	    } else {
-		tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
-			 (c__ * 2.f);
+		tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
 	    }
 
 /*
@@ -86330,15 +21608,15 @@ L100:
 
 	    orgati = FALSE_;
 	    sg2lb = -delsq2;
-	    sg2ub = 0.f;
+	    sg2ub = 0.;
 	    a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
 	    b = z__[ip1] * z__[ip1] * delsq;
-	    if (a < 0.f) {
-		tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs(
-			r__1))));
+	    if (a < 0.) {
+		tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
+			d__1))));
 	    } else {
-		tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1))))
-			/ (c__ * 2.f);
+		tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
+			(c__ * 2.);
 	    }
 
 /*
@@ -86347,8 +21625,8 @@ L100:
              SIGMA - D( IP1 ).
 */
 
-	    eta = tau / (d__[ip1] + sqrt((r__1 = d__[ip1] * d__[ip1] + tau,
-		    dabs(r__1))));
+	    eta = tau / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau,
+		    abs(d__1))));
 	}
 
 	if (orgati) {
@@ -86375,9 +21653,9 @@ L100:
 
 /*        Evaluate PSI and the derivative DPSI */
 
-	dpsi = 0.f;
-	psi = 0.f;
-	erretm = 0.f;
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
 	i__1 = iim1;
 	for (j = 1; j <= i__1; ++j) {
 	    temp = z__[j] / (work[j] * delta[j]);
@@ -86386,12 +21664,12 @@ L100:
 	    erretm += psi;
 /* L150: */
 	}
-	erretm = dabs(erretm);
+	erretm = abs(erretm);
 
 /*        Evaluate PHI and the derivative DPHI */
 
-	dphi = 0.f;
-	phi = 0.f;
+	dphi = 0.;
+	phi = 0.;
 	i__1 = iip1;
 	for (j = *n; j >= i__1; --j) {
 	    temp = z__[j] / (work[j] * delta[j]);
@@ -86410,15 +21688,15 @@ L100:
 
 	swtch3 = FALSE_;
 	if (orgati) {
-	    if (w < 0.f) {
+	    if (w < 0.) {
 		swtch3 = TRUE_;
 	    }
 	} else {
-	    if (w > 0.f) {
+	    if (w > 0.) {
 		swtch3 = TRUE_;
 	    }
 	}
-	if ((ii == 1) || (ii == *n)) {
+	if (ii == 1 || ii == *n) {
 	    swtch3 = FALSE_;
 	}
 
@@ -86426,19 +21704,19 @@ L100:
 	dw = dpsi + dphi + temp * temp;
 	temp = z__[ii] * temp;
 	w += temp;
-	erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
-		+ dabs(tau) * dw;
+	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
+		abs(tau) * dw;
 
 /*        Test for convergence */
 
-	if (dabs(w) <= eps * erretm) {
+	if (abs(w) <= eps * erretm) {
 	    goto L240;
 	}
 
-	if (w <= 0.f) {
-	    sg2lb = dmax(sg2lb,tau);
+	if (w <= 0.) {
+	    sg2lb = max(sg2lb,tau);
 	} else {
-	    sg2ub = dmin(sg2ub,tau);
+	    sg2ub = min(sg2ub,tau);
 	}
 
 /*        Calculate the new step */
@@ -86449,17 +21727,17 @@ L100:
 	    dtisq = work[*i__] * delta[*i__];
 	    if (orgati) {
 /* Computing 2nd power */
-		r__1 = z__[*i__] / dtisq;
-		c__ = w - dtipsq * dw + delsq * (r__1 * r__1);
+		d__1 = z__[*i__] / dtisq;
+		c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
 	    } else {
 /* Computing 2nd power */
-		r__1 = z__[ip1] / dtipsq;
-		c__ = w - dtisq * dw - delsq * (r__1 * r__1);
+		d__1 = z__[ip1] / dtipsq;
+		c__ = w - dtisq * dw - delsq * (d__1 * d__1);
 	    }
 	    a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
 	    b = dtipsq * dtisq * w;
-	    if (c__ == 0.f) {
-		if (a == 0.f) {
+	    if (c__ == 0.) {
+		if (a == 0.) {
 		    if (orgati) {
 			a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi +
 				dphi);
@@ -86469,12 +21747,12 @@ L100:
 		    }
 		}
 		eta = b / a;
-	    } else if (a <= 0.f) {
-		eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
-			 (c__ * 2.f);
+	    } else if (a <= 0.) {
+		eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
 	    } else {
-		eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
-			r__1))));
+		eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
 	    }
 	} else {
 
@@ -86510,7 +21788,7 @@ L100:
 	    dd[0] = dtiim;
 	    dd[1] = delta[ii] * work[ii];
 	    dd[2] = dtiip;
-	    slaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
+	    dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
 	    if (*info != 0) {
 		goto L240;
 	    }
@@ -86524,7 +21802,7 @@ L100:
           will guarantee eta*w < 0.
 */
 
-	if (w * eta >= 0.f) {
+	if (w * eta >= 0.) {
 	    eta = -w / dw;
 	}
 	if (orgati) {
@@ -86534,11 +21812,11 @@ L100:
 	    temp1 = work[ip1] * delta[ip1];
 	    temp = eta - temp1;
 	}
-	if ((temp > sg2ub) || (temp < sg2lb)) {
-	    if (w < 0.f) {
-		eta = (sg2ub - tau) / 2.f;
+	if (temp > sg2ub || temp < sg2lb) {
+	    if (w < 0.) {
+		eta = (sg2ub - tau) / 2.;
 	    } else {
-		eta = (sg2lb - tau) / 2.f;
+		eta = (sg2lb - tau) / 2.;
 	    }
 	}
 
@@ -86557,9 +21835,9 @@ L100:
 
 /*        Evaluate PSI and the derivative DPSI */
 
-	dpsi = 0.f;
-	psi = 0.f;
-	erretm = 0.f;
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
 	i__1 = iim1;
 	for (j = 1; j <= i__1; ++j) {
 	    temp = z__[j] / (work[j] * delta[j]);
@@ -86568,12 +21846,12 @@ L100:
 	    erretm += psi;
 /* L180: */
 	}
-	erretm = dabs(erretm);
+	erretm = abs(erretm);
 
 /*        Evaluate PHI and the derivative DPHI */
 
-	dphi = 0.f;
-	phi = 0.f;
+	dphi = 0.;
+	phi = 0.;
 	i__1 = iip1;
 	for (j = *n; j >= i__1; --j) {
 	    temp = z__[j] / (work[j] * delta[j]);
@@ -86587,22 +21865,22 @@ L100:
 	dw = dpsi + dphi + temp * temp;
 	temp = z__[ii] * temp;
 	w = rhoinv + phi + psi + temp;
-	erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
-		+ dabs(tau) * dw;
+	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
+		abs(tau) * dw;
 
-	if (w <= 0.f) {
-	    sg2lb = dmax(sg2lb,tau);
+	if (w <= 0.) {
+	    sg2lb = max(sg2lb,tau);
 	} else {
-	    sg2ub = dmin(sg2ub,tau);
+	    sg2ub = min(sg2ub,tau);
 	}
 
 	swtch = FALSE_;
 	if (orgati) {
-	    if (-w > dabs(prew) / 10.f) {
+	    if (-w > abs(prew) / 10.) {
 		swtch = TRUE_;
 	    }
 	} else {
-	    if (w > dabs(prew) / 10.f) {
+	    if (w > abs(prew) / 10.) {
 		swtch = TRUE_;
 	    }
 	}
@@ -86615,7 +21893,7 @@ L100:
 
 /*           Test for convergence */
 
-	    if (dabs(w) <= eps * erretm) {
+	    if (abs(w) <= eps * erretm) {
 		goto L240;
 	    }
 
@@ -86627,12 +21905,12 @@ L100:
 		if (! swtch) {
 		    if (orgati) {
 /* Computing 2nd power */
-			r__1 = z__[*i__] / dtisq;
-			c__ = w - dtipsq * dw + delsq * (r__1 * r__1);
+			d__1 = z__[*i__] / dtisq;
+			c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
 		    } else {
 /* Computing 2nd power */
-			r__1 = z__[ip1] / dtipsq;
-			c__ = w - dtisq * dw - delsq * (r__1 * r__1);
+			d__1 = z__[ip1] / dtipsq;
+			c__ = w - dtisq * dw - delsq * (d__1 * d__1);
 		    }
 		} else {
 		    temp = z__[ii] / (work[ii] * delta[ii]);
@@ -86645,8 +21923,8 @@ L100:
 		}
 		a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
 		b = dtipsq * dtisq * w;
-		if (c__ == 0.f) {
-		    if (a == 0.f) {
+		if (c__ == 0.) {
+		    if (a == 0.) {
 			if (! swtch) {
 			    if (orgati) {
 				a = z__[*i__] * z__[*i__] + dtipsq * dtipsq *
@@ -86660,12 +21938,12 @@ L100:
 			}
 		    }
 		    eta = b / a;
-		} else if (a <= 0.f) {
-		    eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1))
-			    )) / (c__ * 2.f);
+		} else if (a <= 0.) {
+		    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
+			     / (c__ * 2.);
 		} else {
-		    eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__,
-			    dabs(r__1))));
+		    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
+			    abs(d__1))));
 		}
 	    } else {
 
@@ -86708,7 +21986,7 @@ L100:
 		dd[0] = dtiim;
 		dd[1] = delta[ii] * work[ii];
 		dd[2] = dtiip;
-		slaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
+		dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
 		if (*info != 0) {
 		    goto L240;
 		}
@@ -86722,7 +22000,7 @@ L100:
              will guarantee eta*w < 0.
 */
 
-	    if (w * eta >= 0.f) {
+	    if (w * eta >= 0.) {
 		eta = -w / dw;
 	    }
 	    if (orgati) {
@@ -86732,11 +22010,11 @@ L100:
 		temp1 = work[ip1] * delta[ip1];
 		temp = eta - temp1;
 	    }
-	    if ((temp > sg2ub) || (temp < sg2lb)) {
-		if (w < 0.f) {
-		    eta = (sg2ub - tau) / 2.f;
+	    if (temp > sg2ub || temp < sg2lb) {
+		if (w < 0.) {
+		    eta = (sg2ub - tau) / 2.;
 		} else {
-		    eta = (sg2lb - tau) / 2.f;
+		    eta = (sg2lb - tau) / 2.;
 		}
 	    }
 
@@ -86755,9 +22033,9 @@ L100:
 
 /*           Evaluate PSI and the derivative DPSI */
 
-	    dpsi = 0.f;
-	    psi = 0.f;
-	    erretm = 0.f;
+	    dpsi = 0.;
+	    psi = 0.;
+	    erretm = 0.;
 	    i__1 = iim1;
 	    for (j = 1; j <= i__1; ++j) {
 		temp = z__[j] / (work[j] * delta[j]);
@@ -86766,12 +22044,12 @@ L100:
 		erretm += psi;
 /* L210: */
 	    }
-	    erretm = dabs(erretm);
+	    erretm = abs(erretm);
 
 /*           Evaluate PHI and the derivative DPHI */
 
-	    dphi = 0.f;
-	    phi = 0.f;
+	    dphi = 0.;
+	    phi = 0.;
 	    i__1 = iip1;
 	    for (j = *n; j >= i__1; --j) {
 		temp = z__[j] / (work[j] * delta[j]);
@@ -86785,16 +22063,16 @@ L100:
 	    dw = dpsi + dphi + temp * temp;
 	    temp = z__[ii] * temp;
 	    w = rhoinv + phi + psi + temp;
-	    erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) *
-		    3.f + dabs(tau) * dw;
-	    if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) {
+	    erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
+		    + abs(tau) * dw;
+	    if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
 		swtch = ! swtch;
 	    }
 
-	    if (w <= 0.f) {
-		sg2lb = dmax(sg2lb,tau);
+	    if (w <= 0.) {
+		sg2lb = max(sg2lb,tau);
 	    } else {
-		sg2ub = dmin(sg2ub,tau);
+		sg2ub = min(sg2ub,tau);
 	    }
 
 /* L230: */
@@ -86809,21 +22087,22 @@ L100:
 L240:
     return 0;
 
-/*     End of SLASD4 */
+/*     End of DLASD4 */
 
-} /* slasd4_ */
+} /* dlasd4_ */
 
-/* Subroutine */ int slasd5_(integer *i__, real *d__, real *z__, real *delta,
-	real *rho, real *dsigma, real *work)
+/* Subroutine */ int dlasd5_(integer *i__, doublereal *d__, doublereal *z__,
+	doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal *
+	work)
 {
     /* System generated locals */
-    real r__1;
+    doublereal d__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static real b, c__, w, del, tau, delsq;
+    static doublereal b, c__, w, del, tau, delsq;
 
 
 /*
@@ -86855,24 +22134,24 @@ L240:
     I      (input) INTEGER
            The index of the eigenvalue to be computed.  I = 1 or I = 2.
 
-    D      (input) REAL array, dimension ( 2 )
+    D      (input) DOUBLE PRECISION array, dimension ( 2 )
            The original eigenvalues.  We assume 0 <= D(1) < D(2).
 
-    Z      (input) REAL array, dimension ( 2 )
+    Z      (input) DOUBLE PRECISION array, dimension ( 2 )
            The components of the updating vector.
 
-    DELTA  (output) REAL array, dimension ( 2 )
+    DELTA  (output) DOUBLE PRECISION array, dimension ( 2 )
            Contains (D(j) - lambda_I) in its  j-th component.
            The vector DELTA contains the information necessary
            to construct the eigenvectors.
 
-    RHO    (input) REAL
+    RHO    (input) DOUBLE PRECISION
            The scalar in the symmetric updating formula.
 
-    DSIGMA (output) REAL
+    DSIGMA (output) DOUBLE PRECISION
            The computed lambda_I, the I-th updated eigenvalue.
 
-    WORK   (workspace) REAL array, dimension ( 2 )
+    WORK   (workspace) DOUBLE PRECISION array, dimension ( 2 )
            WORK contains (D(j) + sigma_I) in its  j-th component.
 
     Further Details
@@ -86896,9 +22175,9 @@ L240:
     del = d__[2] - d__[1];
     delsq = del * (d__[2] + d__[1]);
     if (*i__ == 1) {
-	w = *rho * 4.f * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.f) - z__[1] *
-		 z__[1] / (d__[1] * 3.f + d__[2])) / del + 1.f;
-	if (w > 0.f) {
+	w = *rho * 4. * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.) - z__[1] *
+		z__[1] / (d__[1] * 3. + d__[2])) / del + 1.;
+	if (w > 0.) {
 	    b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
 	    c__ = *rho * z__[1] * z__[1] * delsq;
 
@@ -86908,8 +22187,7 @@ L240:
              The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 )
 */
 
-	    tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1))
-		    ));
+	    tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
 
 /*           The following TAU is DSIGMA - D( 1 ) */
 
@@ -86917,7 +22195,7 @@ L240:
 	    *dsigma = d__[1] + tau;
 	    delta[1] = -tau;
 	    delta[2] = del - tau;
-	    work[1] = d__[1] * 2.f + tau;
+	    work[1] = d__[1] * 2. + tau;
 	    work[2] = d__[1] + tau + d__[2];
 /*
              DELTA( 1 ) = -Z( 1 ) / TAU
@@ -86929,20 +22207,20 @@ L240:
 
 /*           The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
 
-	    if (b > 0.f) {
-		tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f));
+	    if (b > 0.) {
+		tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
 	    } else {
-		tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f;
+		tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
 	    }
 
 /*           The following TAU is DSIGMA - D( 2 ) */
 
-	    tau /= d__[2] + sqrt((r__1 = d__[2] * d__[2] + tau, dabs(r__1)));
+	    tau /= d__[2] + sqrt((d__1 = d__[2] * d__[2] + tau, abs(d__1)));
 	    *dsigma = d__[2] + tau;
 	    delta[1] = -(del + tau);
 	    delta[2] = -tau;
 	    work[1] = d__[1] + tau + d__[2];
-	    work[2] = d__[2] * 2.f + tau;
+	    work[2] = d__[2] * 2. + tau;
 /*
              DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
              DELTA( 2 ) = -Z( 2 ) / TAU
@@ -86962,10 +22240,10 @@ L240:
 
 /*        The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
 
-	if (b > 0.f) {
-	    tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f;
+	if (b > 0.) {
+	    tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
 	} else {
-	    tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f));
+	    tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
 	}
 
 /*        The following TAU is DSIGMA - D( 2 ) */
@@ -86975,7 +22253,7 @@ L240:
 	delta[1] = -(del + tau);
 	delta[2] = -tau;
 	work[1] = d__[1] + tau + d__[2];
-	work[2] = d__[2] * 2.f + tau;
+	work[2] = d__[2] * 2. + tau;
 /*
           DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
           DELTA( 2 ) = -Z( 2 ) / TAU
@@ -86986,38 +22264,41 @@ L240:
     }
     return 0;
 
-/*     End of SLASD5 */
+/*     End of DLASD5 */
 
-} /* slasd5_ */
+} /* dlasd5_ */
 
-/* Subroutine */ int slasd6_(integer *icompq, integer *nl, integer *nr,
-	integer *sqre, real *d__, real *vf, real *vl, real *alpha, real *beta,
-	 integer *idxq, integer *perm, integer *givptr, integer *givcol,
-	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
-	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
-	work, integer *iwork, integer *info)
+/* Subroutine */ int dlasd6_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl,
+	doublereal *alpha, doublereal *beta, integer *idxq, integer *perm,
+	integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum,
+	 integer *ldgnum, doublereal *poles, doublereal *difl, doublereal *
+	difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s,
+	doublereal *work, integer *iwork, integer *info)
 {
     /* System generated locals */
     integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset,
 	    poles_dim1, poles_offset, i__1;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Local variables */
     static integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *), slasd7_(integer *, integer *, integer *, integer *,
-	    integer *, real *, real *, real *, real *, real *, real *, real *,
-	     real *, real *, real *, integer *, integer *, integer *, integer
-	    *, integer *, integer *, integer *, real *, integer *, real *,
-	    real *, integer *), slasd8_(integer *, integer *, real *, real *,
-	    real *, real *, real *, real *, integer *, real *, real *,
-	    integer *);
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlasd7_(integer *, integer *, integer *,
+	     integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), dlasd8_(
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     doublereal *, integer *), dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlamrg_(integer *, integer *,
+	    doublereal *, integer *, integer *, integer *);
     static integer isigma;
-    extern /* Subroutine */ int xerbla_(char *, integer *), slascl_(
-	    char *, integer *, integer *, real *, real *, integer *, integer *
-	    , real *, integer *, integer *), slamrg_(integer *,
-	    integer *, real *, integer *, integer *, integer *);
-    static real orgnrm;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal orgnrm;
 
 
 /*
@@ -87030,15 +22311,15 @@ L240:
     Purpose
     =======
 
-    SLASD6 computes the SVD of an updated upper bidiagonal matrix B
+    DLASD6 computes the SVD of an updated upper bidiagonal matrix B
     obtained by merging two smaller ones by appending a row. This
     routine is used only for the problem which requires all singular
     values and optionally singular vector matrices in factored form.
     B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
-    A related subroutine, SLASD1, handles the case in which all singular
+    A related subroutine, DLASD1, handles the case in which all singular
     values and singular vectors of the bidiagonal matrix are desired.
 
-    SLASD6 computes the SVD as follows:
+    DLASD6 computes the SVD as follows:
 
                   ( D1(in)  0    0     0 )
       B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
@@ -87054,7 +22335,7 @@ L240:
     components of all the right singular vectors of the lower block, and
     the last components of all the right singular vectors of the upper
     block. These components are stored and updated in VF and VL,
-    respectively, in SLASD6. Hence U and VT are not explicitly
+    respectively, in DLASD6. Hence U and VT are not explicitly
     referenced.
 
     The singular values are stored in D. The algorithm consists of two
@@ -87064,16 +22345,16 @@ L240:
           when there are multiple singular values or if there is a zero
           in the Z vector. For each such occurence the dimension of the
           secular equation problem is reduced by one. This stage is
-          performed by the routine SLASD7.
+          performed by the routine DLASD7.
 
           The second stage consists of calculating the updated
           singular values. This is done by finding the roots of the
-          secular equation via the routine SLASD4 (as called by SLASD8).
+          secular equation via the routine DLASD4 (as called by DLASD8).
           This routine also updates VF and VL and computes the distances
           between the updated singular values and the old singular
           values.
 
-    SLASD6 is called from SLASDA.
+    DLASD6 is called from DLASDA.
 
     Arguments
     =========
@@ -87097,30 +22378,30 @@ L240:
            The bidiagonal matrix has row dimension N = NL + NR + 1,
            and column dimension M = N + SQRE.
 
-    D      (input/output) REAL array, dimension ( NL+NR+1 ).
+    D      (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ).
            On entry D(1:NL,1:NL) contains the singular values of the
            upper block, and D(NL+2:N) contains the singular values
            of the lower block. On exit D(1:N) contains the singular
            values of the modified matrix.
 
-    VF     (input/output) REAL array, dimension ( M )
+    VF     (input/output) DOUBLE PRECISION array, dimension ( M )
            On entry, VF(1:NL+1) contains the first components of all
            right singular vectors of the upper block; and VF(NL+2:M)
            contains the first components of all right singular vectors
            of the lower block. On exit, VF contains the first components
            of all right singular vectors of the bidiagonal matrix.
 
-    VL     (input/output) REAL array, dimension ( M )
+    VL     (input/output) DOUBLE PRECISION array, dimension ( M )
            On entry, VL(1:NL+1) contains the  last components of all
            right singular vectors of the upper block; and VL(NL+2:M)
            contains the last components of all right singular vectors of
            the lower block. On exit, VL contains the last components of
            all right singular vectors of the bidiagonal matrix.
 
-    ALPHA  (input) REAL
+    ALPHA  (input) DOUBLE PRECISION
            Contains the diagonal element associated with the added row.
 
-    BETA   (input) REAL
+    BETA   (input) DOUBLE PRECISION
            Contains the off-diagonal element associated with the added
            row.
 
@@ -87144,25 +22425,25 @@ L240:
     LDGCOL (input) INTEGER
            leading dimension of GIVCOL, must be at least N.
 
-    GIVNUM (output) REAL array, dimension ( LDGNUM, 2 )
+    GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
            Each number indicates the C or S value to be used in the
            corresponding Givens rotation. Not referenced if ICOMPQ = 0.
 
     LDGNUM (input) INTEGER
            The leading dimension of GIVNUM and POLES, must be at least N.
 
-    POLES  (output) REAL array, dimension ( LDGNUM, 2 )
+    POLES  (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
            On exit, POLES(1,*) is an array containing the new singular
            values obtained from solving the secular equation, and
            POLES(2,*) is an array containing the poles in the secular
            equation. Not referenced if ICOMPQ = 0.
 
-    DIFL   (output) REAL array, dimension ( N )
+    DIFL   (output) DOUBLE PRECISION array, dimension ( N )
            On exit, DIFL(I) is the distance between I-th updated
            (undeflated) singular value and the I-th (undeflated) old
            singular value.
 
-    DIFR   (output) REAL array,
+    DIFR   (output) DOUBLE PRECISION array,
                     dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and
                     dimension ( N ) if ICOMPQ = 0.
            On exit, DIFR(I, 1) is the distance between I-th updated
@@ -87172,9 +22453,9 @@ L240:
            If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
            normalizing factors for the right singular vector matrix.
 
-           See SLASD8 for details on DIFL and DIFR.
+           See DLASD8 for details on DIFL and DIFR.
 
-    Z      (output) REAL array, dimension ( M )
+    Z      (output) DOUBLE PRECISION array, dimension ( M )
            The first elements of this array contain the components
            of the deflation-adjusted updating row vector.
 
@@ -87182,15 +22463,15 @@ L240:
            Contains the dimension of the non-deflated matrix,
            This is the order of the related secular equation. 1 <= K <=N.
 
-    C      (output) REAL
+    C      (output) DOUBLE PRECISION
            C contains garbage if SQRE =0 and the C-value of a Givens
            rotation related to the right null space if SQRE = 1.
 
-    S      (output) REAL
+    S      (output) DOUBLE PRECISION
            S contains garbage if SQRE =0 and the S-value of a Givens
            rotation related to the right null space if SQRE = 1.
 
-    WORK   (workspace) REAL array, dimension ( 4 * M )
+    WORK   (workspace) DOUBLE PRECISION array, dimension ( 4 * M )
 
     IWORK  (workspace) INTEGER array, dimension ( 3 * N )
 
@@ -87238,13 +22519,13 @@ L240:
     n = *nl + *nr + 1;
     m = n + *sqre;
 
-    if ((*icompq < 0) || (*icompq > 1)) {
+    if (*icompq < 0 || *icompq > 1) {
 	*info = -1;
     } else if (*nl < 1) {
 	*info = -2;
     } else if (*nr < 1) {
 	*info = -3;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
+    } else if (*sqre < 0 || *sqre > 1) {
 	*info = -4;
     } else if (*ldgcol < n) {
 	*info = -14;
@@ -87253,14 +22534,14 @@ L240:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLASD6", &i__1);
+	xerbla_("DLASD6", &i__1);
 	return 0;
     }
 
 /*
        The following values are for bookkeeping purposes only.  They are
        integer pointers which indicate the portion of the workspace
-       used by a particular array in SLASD7 and SLASD8.
+       used by a particular array in DLASD7 and DLASD8.
 */
 
     isigma = 1;
@@ -87277,24 +22558,23 @@ L240:
 
    Computing MAX
 */
-    r__1 = dabs(*alpha), r__2 = dabs(*beta);
-    orgnrm = dmax(r__1,r__2);
-    d__[*nl + 1] = 0.f;
+    d__1 = abs(*alpha), d__2 = abs(*beta);
+    orgnrm = max(d__1,d__2);
+    d__[*nl + 1] = 0.;
     i__1 = n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) {
-	    orgnrm = (r__1 = d__[i__], dabs(r__1));
+	if ((d__1 = d__[i__], abs(d__1)) > orgnrm) {
+	    orgnrm = (d__1 = d__[i__], abs(d__1));
 	}
 /* L10: */
     }
-    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &n, &c__1, &d__[1], &n,
-	    info);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &n, &c__1, &d__[1], &n, info);
     *alpha /= orgnrm;
     *beta /= orgnrm;
 
 /*     Sort and Deflate singular values. */
 
-    slasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], &
+    dlasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], &
 	    work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], &
 	    iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[
 	    givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s,
@@ -87302,60 +22582,60 @@ L240:
 
 /*     Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */
 
-    slasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1],
+    dlasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1],
 	    ldgnum, &work[isigma], &work[iw], info);
 
 /*     Save the poles if ICOMPQ = 1. */
 
     if (*icompq == 1) {
-	scopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1);
-	scopy_(k, &work[isigma], &c__1, &poles[((poles_dim1) << (1)) + 1], &
-		c__1);
+	dcopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1);
+	dcopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1);
     }
 
 /*     Unscale. */
 
-    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, &n, &c__1, &d__[1], &n,
-	    info);
+    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &n, &c__1, &d__[1], &n, info);
 
 /*     Prepare the IDXQ sorting permutation. */
 
     n1 = *k;
     n2 = n - *k;
-    slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
+    dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
 
     return 0;
 
-/*     End of SLASD6 */
+/*     End of DLASD6 */
 
-} /* slasd6_ */
+} /* dlasd6_ */
 
-/* Subroutine */ int slasd7_(integer *icompq, integer *nl, integer *nr,
-	integer *sqre, integer *k, real *d__, real *z__, real *zw, real *vf,
-	real *vfw, real *vl, real *vlw, real *alpha, real *beta, real *dsigma,
-	 integer *idx, integer *idxp, integer *idxq, integer *perm, integer *
-	givptr, integer *givcol, integer *ldgcol, real *givnum, integer *
-	ldgnum, real *c__, real *s, integer *info)
+/* Subroutine */ int dlasd7_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, integer *k, doublereal *d__, doublereal *z__,
+	doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl,
+	doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal *
+	dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm,
+	integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum,
+	 integer *ldgnum, doublereal *c__, doublereal *s, integer *info)
 {
     /* System generated locals */
     integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Local variables */
     static integer i__, j, m, n, k2;
-    static real z1;
+    static doublereal z1;
     static integer jp;
-    static real eps, tau, tol;
+    static doublereal eps, tau, tol;
     static integer nlp1, nlp2, idxi, idxj;
-    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
-	    integer *, real *, real *);
-    static integer idxjp, jprev;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *);
-    extern doublereal slapy2_(real *, real *), slamch_(char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
-	    integer *, integer *, real *, integer *, integer *, integer *);
-    static real hlftol;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer idxjp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer jprev;
+
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *);
+    static doublereal hlftol;
 
 
 /*
@@ -87368,14 +22648,14 @@ L240:
     Purpose
     =======
 
-    SLASD7 merges the two sets of singular values together into a single
+    DLASD7 merges the two sets of singular values together into a single
     sorted set. Then it tries to deflate the size of the problem. There
     are two ways in which deflation can occur:  when two or more singular
     values are close together or if there is a tiny entry in the Z
     vector. For each such occurrence the order of the related
     secular equation problem is reduced by one.
 
-    SLASD7 is called from SLASD6.
+    DLASD7 is called from DLASD6.
 
     Arguments
     =========
@@ -87405,47 +22685,47 @@ L240:
            Contains the dimension of the non-deflated matrix, this is
            the order of the related secular equation. 1 <= K <=N.
 
-    D      (input/output) REAL array, dimension ( N )
+    D      (input/output) DOUBLE PRECISION array, dimension ( N )
            On entry D contains the singular values of the two submatrices
            to be combined. On exit D contains the trailing (N-K) updated
            singular values (those which were deflated) sorted into
            increasing order.
 
-    Z      (output) REAL array, dimension ( M )
+    Z      (output) DOUBLE PRECISION array, dimension ( M )
            On exit Z contains the updating row vector in the secular
            equation.
 
-    ZW     (workspace) REAL array, dimension ( M )
+    ZW     (workspace) DOUBLE PRECISION array, dimension ( M )
            Workspace for Z.
 
-    VF     (input/output) REAL array, dimension ( M )
+    VF     (input/output) DOUBLE PRECISION array, dimension ( M )
            On entry, VF(1:NL+1) contains the first components of all
            right singular vectors of the upper block; and VF(NL+2:M)
            contains the first components of all right singular vectors
            of the lower block. On exit, VF contains the first components
            of all right singular vectors of the bidiagonal matrix.
 
-    VFW    (workspace) REAL array, dimension ( M )
+    VFW    (workspace) DOUBLE PRECISION array, dimension ( M )
            Workspace for VF.
 
-    VL     (input/output) REAL array, dimension ( M )
+    VL     (input/output) DOUBLE PRECISION array, dimension ( M )
            On entry, VL(1:NL+1) contains the  last components of all
            right singular vectors of the upper block; and VL(NL+2:M)
            contains the last components of all right singular vectors
            of the lower block. On exit, VL contains the last components
            of all right singular vectors of the bidiagonal matrix.
 
-    VLW    (workspace) REAL array, dimension ( M )
+    VLW    (workspace) DOUBLE PRECISION array, dimension ( M )
            Workspace for VL.
 
-    ALPHA  (input) REAL
+    ALPHA  (input) DOUBLE PRECISION
            Contains the diagonal element associated with the added row.
 
-    BETA   (input) REAL
+    BETA   (input) DOUBLE PRECISION
            Contains the off-diagonal element associated with the added
            row.
 
-    DSIGMA (output) REAL array, dimension ( N )
+    DSIGMA (output) DOUBLE PRECISION array, dimension ( N )
            Contains a copy of the diagonal elements (K-1 singular values
            and one zero) in the secular equation.
 
@@ -87481,18 +22761,18 @@ L240:
     LDGCOL (input) INTEGER
            The leading dimension of GIVCOL, must be at least N.
 
-    GIVNUM (output) REAL array, dimension ( LDGNUM, 2 )
+    GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
            Each number indicates the C or S value to be used in the
            corresponding Givens rotation. Not referenced if ICOMPQ = 0.
 
     LDGNUM (input) INTEGER
            The leading dimension of GIVNUM, must be at least N.
 
-    C      (output) REAL
+    C      (output) DOUBLE PRECISION
            C contains garbage if SQRE =0 and the C-value of a Givens
            rotation related to the right null space if SQRE = 1.
 
-    S      (output) REAL
+    S      (output) DOUBLE PRECISION
            S contains garbage if SQRE =0 and the S-value of a Givens
            rotation related to the right null space if SQRE = 1.
 
@@ -87538,13 +22818,13 @@ L240:
     n = *nl + *nr + 1;
     m = n + *sqre;
 
-    if ((*icompq < 0) || (*icompq > 1)) {
+    if (*icompq < 0 || *icompq > 1) {
 	*info = -1;
     } else if (*nl < 1) {
 	*info = -2;
     } else if (*nr < 1) {
 	*info = -3;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
+    } else if (*sqre < 0 || *sqre > 1) {
 	*info = -4;
     } else if (*ldgcol < n) {
 	*info = -22;
@@ -87553,7 +22833,7 @@ L240:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLASD7", &i__1);
+	xerbla_("DLASD7", &i__1);
 	return 0;
     }
 
@@ -87569,11 +22849,11 @@ L240:
 */
 
     z1 = *alpha * vl[nlp1];
-    vl[nlp1] = 0.f;
+    vl[nlp1] = 0.;
     tau = vf[nlp1];
     for (i__ = *nl; i__ >= 1; --i__) {
 	z__[i__ + 1] = *alpha * vl[i__];
-	vl[i__] = 0.f;
+	vl[i__] = 0.;
 	vf[i__ + 1] = vf[i__];
 	d__[i__ + 1] = d__[i__];
 	idxq[i__ + 1] = idxq[i__] + 1;
@@ -87586,7 +22866,7 @@ L240:
     i__1 = m;
     for (i__ = nlp2; i__ <= i__1; ++i__) {
 	z__[i__] = *beta * vf[i__];
-	vf[i__] = 0.f;
+	vf[i__] = 0.;
 /* L20: */
     }
 
@@ -87609,7 +22889,7 @@ L240:
 /* L40: */
     }
 
-    slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
+    dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
 
     i__1 = n;
     for (i__ = 2; i__ <= i__1; ++i__) {
@@ -87623,13 +22903,13 @@ L240:
 
 /*     Calculate the allowable deflation tolerence */
 
-    eps = slamch_("Epsilon");
+    eps = EPSILON;
 /* Computing MAX */
-    r__1 = dabs(*alpha), r__2 = dabs(*beta);
-    tol = dmax(r__1,r__2);
+    d__1 = abs(*alpha), d__2 = abs(*beta);
+    tol = max(d__1,d__2);
 /* Computing MAX */
-    r__2 = (r__1 = d__[n], dabs(r__1));
-    tol = eps * 64.f * dmax(r__2,tol);
+    d__2 = (d__1 = d__[n], abs(d__1));
+    tol = eps * 64. * max(d__2,tol);
 
 /*
        There are 2 kinds of deflation -- first a value in the z-vector
@@ -87656,7 +22936,7 @@ L240:
     k2 = n + 1;
     i__1 = n;
     for (j = 2; j <= i__1; ++j) {
-	if ((r__1 = z__[j], dabs(r__1)) <= tol) {
+	if ((d__1 = z__[j], abs(d__1)) <= tol) {
 
 /*           Deflate due to small z component. */
 
@@ -87678,7 +22958,7 @@ L80:
     if (j > n) {
 	goto L90;
     }
-    if ((r__1 = z__[j], dabs(r__1)) <= tol) {
+    if ((d__1 = z__[j], abs(d__1)) <= tol) {
 
 /*        Deflate due to small z component. */
 
@@ -87688,7 +22968,7 @@ L80:
 
 /*        Check if singular values are close enough to allow deflation. */
 
-	if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) {
+	if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) {
 
 /*           Deflation is possible. */
 
@@ -87700,9 +22980,9 @@ L80:
              destructive underflow.
 */
 
-	    tau = slapy2_(c__, s);
+	    tau = dlapy2_(c__, s);
 	    z__[j] = tau;
-	    z__[jprev] = 0.f;
+	    z__[jprev] = 0.;
 	    *c__ /= tau;
 	    *s = -(*s) / tau;
 
@@ -87718,13 +22998,13 @@ L80:
 		if (idxj <= nlp1) {
 		    --idxj;
 		}
-		givcol[*givptr + ((givcol_dim1) << (1))] = idxjp;
+		givcol[*givptr + (givcol_dim1 << 1)] = idxjp;
 		givcol[*givptr + givcol_dim1] = idxj;
-		givnum[*givptr + ((givnum_dim1) << (1))] = *c__;
+		givnum[*givptr + (givnum_dim1 << 1)] = *c__;
 		givnum[*givptr + givnum_dim1] = *s;
 	    }
-	    srot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s);
-	    srot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s);
+	    drot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s);
+	    drot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s);
 	    --k2;
 	    idxp[k2] = jprev;
 	    jprev = j;
@@ -87780,32 +23060,32 @@ L100:
 */
 
     i__1 = n - *k;
-    scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
+    dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
 
 /*
        Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and
        VL(M).
 */
 
-    dsigma[1] = 0.f;
-    hlftol = tol / 2.f;
-    if (dabs(dsigma[2]) <= hlftol) {
+    dsigma[1] = 0.;
+    hlftol = tol / 2.;
+    if (abs(dsigma[2]) <= hlftol) {
 	dsigma[2] = hlftol;
     }
     if (m > n) {
-	z__[1] = slapy2_(&z1, &z__[m]);
+	z__[1] = dlapy2_(&z1, &z__[m]);
 	if (z__[1] <= tol) {
-	    *c__ = 1.f;
-	    *s = 0.f;
+	    *c__ = 1.;
+	    *s = 0.;
 	    z__[1] = tol;
 	} else {
 	    *c__ = z1 / z__[1];
 	    *s = -z__[m] / z__[1];
 	}
-	srot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s);
-	srot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s);
+	drot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s);
+	drot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s);
     } else {
-	if (dabs(z1) <= tol) {
+	if (abs(z1) <= tol) {
 	    z__[1] = tol;
 	} else {
 	    z__[1] = z1;
@@ -87815,48 +23095,51 @@ L100:
 /*     Restore Z, VF, and VL. */
 
     i__1 = *k - 1;
-    scopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1);
+    dcopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1);
     i__1 = n - 1;
-    scopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1);
+    dcopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1);
     i__1 = n - 1;
-    scopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1);
+    dcopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1);
 
     return 0;
 
-/*     End of SLASD7 */
+/*     End of DLASD7 */
 
-} /* slasd7_ */
+} /* dlasd7_ */
 
-/* Subroutine */ int slasd8_(integer *icompq, integer *k, real *d__, real *
-	z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr,
-	real *dsigma, real *work, integer *info)
+/* Subroutine */ int dlasd8_(integer *icompq, integer *k, doublereal *d__,
+	doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl,
+	doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal *
+	work, integer *info)
 {
     /* System generated locals */
     integer difr_dim1, difr_offset, i__1, i__2;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Builtin functions */
-    double sqrt(doublereal), r_sign(real *, real *);
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
 
     /* Local variables */
     static integer i__, j;
-    static real dj, rho;
+    static doublereal dj, rho;
     static integer iwk1, iwk2, iwk3;
-    static real temp;
-    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
-    static integer iwk2i, iwk3i;
-    extern doublereal snrm2_(integer *, real *, integer *);
-    static real diflj, difrj, dsigj;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *);
-    extern doublereal slamc3_(real *, real *);
-    extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *,
-	    real *, real *, real *, real *, integer *), xerbla_(char *,
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
 	    integer *);
-    static real dsigjp;
-    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *,
-	    real *, integer *);
+    static doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    static integer iwk2i, iwk3i;
+    static doublereal diflj, difrj, dsigj;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *), dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlaset_(char *, integer *, integer
+	    *, doublereal *, doublereal *, doublereal *, integer *),
+	    xerbla_(char *, integer *);
+    static doublereal dsigjp;
 
 
 /*
@@ -87869,14 +23152,14 @@ L100:
     Purpose
     =======
 
-    SLASD8 finds the square roots of the roots of the secular equation,
+    DLASD8 finds the square roots of the roots of the secular equation,
     as defined by the values in DSIGMA and Z. It makes the appropriate
-    calls to SLASD4, and stores, for each  element in D, the distance
+    calls to DLASD4, and stores, for each  element in D, the distance
     to its two nearest poles (elements in DSIGMA). It also updates
     the arrays VF and VL, the first and last components of all the
     right singular vectors of the original bidiagonal matrix.
 
-    SLASD8 is called from SLASD6.
+    DLASD8 is called from DLASD6.
 
     Arguments
     =========
@@ -87889,31 +23172,31 @@ L100:
 
     K       (input) INTEGER
             The number of terms in the rational function to be solved
-            by SLASD4.  K >= 1.
+            by DLASD4.  K >= 1.
 
-    D       (output) REAL array, dimension ( K )
+    D       (output) DOUBLE PRECISION array, dimension ( K )
             On output, D contains the updated singular values.
 
-    Z       (input) REAL array, dimension ( K )
+    Z       (input) DOUBLE PRECISION array, dimension ( K )
             The first K elements of this array contain the components
             of the deflation-adjusted updating row vector.
 
-    VF      (input/output) REAL array, dimension ( K )
+    VF      (input/output) DOUBLE PRECISION array, dimension ( K )
             On entry, VF contains  information passed through DBEDE8.
             On exit, VF contains the first K components of the first
             components of all right singular vectors of the bidiagonal
             matrix.
 
-    VL      (input/output) REAL array, dimension ( K )
+    VL      (input/output) DOUBLE PRECISION array, dimension ( K )
             On entry, VL contains  information passed through DBEDE8.
             On exit, VL contains the first K components of the last
             components of all right singular vectors of the bidiagonal
             matrix.
 
-    DIFL    (output) REAL array, dimension ( K )
+    DIFL    (output) DOUBLE PRECISION array, dimension ( K )
             On exit, DIFL(I) = D(I) - DSIGMA(I).
 
-    DIFR    (output) REAL array,
+    DIFR    (output) DOUBLE PRECISION array,
                      dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and
                      dimension ( K ) if ICOMPQ = 0.
             On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not
@@ -87925,12 +23208,12 @@ L100:
     LDDIFR  (input) INTEGER
             The leading dimension of DIFR, must be at least K.
 
-    DSIGMA  (input) REAL array, dimension ( K )
+    DSIGMA  (input) DOUBLE PRECISION array, dimension ( K )
             The first K elements of this array contain the old roots
             of the deflated updating problem.  These are the poles
             of the secular equation.
 
-    WORK    (workspace) REAL array, dimension at least 3 * K
+    WORK    (workspace) DOUBLE PRECISION array, dimension at least 3 * K
 
     INFO    (output) INTEGER
             = 0:  successful exit.
@@ -87965,7 +23248,7 @@ L100:
     /* Function Body */
     *info = 0;
 
-    if ((*icompq < 0) || (*icompq > 1)) {
+    if (*icompq < 0 || *icompq > 1) {
 	*info = -1;
     } else if (*k < 1) {
 	*info = -2;
@@ -87974,18 +23257,18 @@ L100:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLASD8", &i__1);
+	xerbla_("DLASD8", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
     if (*k == 1) {
-	d__[1] = dabs(z__[1]);
+	d__[1] = abs(z__[1]);
 	difl[1] = d__[1];
 	if (*icompq == 1) {
-	    difl[2] = 1.f;
-	    difr[((difr_dim1) << (1)) + 1] = 1.f;
+	    difl[2] = 1.;
+	    difr[(difr_dim1 << 1) + 1] = 1.;
 	}
 	return 0;
     }
@@ -88011,7 +23294,7 @@ L100:
 
     i__1 = *k;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
+	dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
 /* L10: */
     }
 
@@ -88025,13 +23308,13 @@ L100:
 
 /*     Normalize Z. */
 
-    rho = snrm2_(k, &z__[1], &c__1);
-    slascl_("G", &c__0, &c__0, &rho, &c_b1011, k, &c__1, &z__[1], k, info);
+    rho = dnrm2_(k, &z__[1], &c__1);
+    dlascl_("G", &c__0, &c__0, &rho, &c_b15, k, &c__1, &z__[1], k, info);
     rho *= rho;
 
 /*     Initialize WORK(IWK3). */
 
-    slaset_("A", k, &c__1, &c_b1011, &c_b1011, &work[iwk3], k);
+    dlaset_("A", k, &c__1, &c_b15, &c_b15, &work[iwk3], k);
 
 /*
        Compute the updated singular values, the arrays DIFL, DIFR,
@@ -88040,7 +23323,7 @@ L100:
 
     i__1 = *k;
     for (j = 1; j <= i__1; ++j) {
-	slasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[
+	dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[
 		iwk2], info);
 
 /*        If the root finder fails, the computation is terminated. */
@@ -88072,8 +23355,8 @@ L100:
 
     i__1 = *k;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	r__2 = sqrt((r__1 = work[iwk3i + i__], dabs(r__1)));
-	z__[i__] = r_sign(&r__2, &z__[i__]);
+	d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1)));
+	z__[i__] = d_sign(&d__2, &z__[i__]);
 /* L50: */
     }
 
@@ -88091,39 +23374,40 @@ L100:
 	work[j] = -z__[j] / diflj / (dsigma[j] + dj);
 	i__2 = j - 1;
 	for (i__ = 1; i__ <= i__2; ++i__) {
-	    work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigj) - diflj) / (
+	    work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigj) - diflj) / (
 		    dsigma[i__] + dj);
 /* L60: */
 	}
 	i__2 = *k;
 	for (i__ = j + 1; i__ <= i__2; ++i__) {
-	    work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigjp) + difrj) /
+	    work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigjp) + difrj) /
 		    (dsigma[i__] + dj);
 /* L70: */
 	}
-	temp = snrm2_(k, &work[1], &c__1);
-	work[iwk2i + j] = sdot_(k, &work[1], &c__1, &vf[1], &c__1) / temp;
-	work[iwk3i + j] = sdot_(k, &work[1], &c__1, &vl[1], &c__1) / temp;
+	temp = dnrm2_(k, &work[1], &c__1);
+	work[iwk2i + j] = ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp;
+	work[iwk3i + j] = ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp;
 	if (*icompq == 1) {
-	    difr[j + ((difr_dim1) << (1))] = temp;
+	    difr[j + (difr_dim1 << 1)] = temp;
 	}
 /* L80: */
     }
 
-    scopy_(k, &work[iwk2], &c__1, &vf[1], &c__1);
-    scopy_(k, &work[iwk3], &c__1, &vl[1], &c__1);
+    dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1);
+    dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1);
 
     return 0;
 
-/*     End of SLASD8 */
+/*     End of DLASD8 */
 
-} /* slasd8_ */
+} /* dlasd8_ */
 
-/* Subroutine */ int slasda_(integer *icompq, integer *smlsiz, integer *n,
-	integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt,
-	integer *k, real *difl, real *difr, real *z__, real *poles, integer *
-	givptr, integer *givcol, integer *ldgcol, integer *perm, real *givnum,
-	 real *c__, real *s, real *work, integer *iwork, integer *info)
+/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n,
+	integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer
+	*ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr,
+	doublereal *z__, doublereal *poles, integer *givptr, integer *givcol,
+	integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__,
+	doublereal *s, doublereal *work, integer *iwork, integer *info)
 {
     /* System generated locals */
     integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1,
@@ -88137,24 +23421,27 @@ L100:
     /* Local variables */
     static integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc,
 	    nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1;
-    static real beta;
+    static doublereal beta;
     static integer idxq, nlvl;
-    static real alpha;
-    static integer inode, ndiml, ndimr, idxqi, itemp, sqrei;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *), slasd6_(integer *, integer *, integer *, integer *,
-	    real *, real *, real *, real *, real *, integer *, integer *,
-	    integer *, integer *, integer *, real *, integer *, real *, real *
-	    , real *, real *, integer *, real *, real *, real *, integer *,
-	    integer *);
+    static doublereal alpha;
+    static integer inode, ndiml, ndimr, idxqi, itemp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer sqrei;
+    extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *);
     static integer nwork1, nwork2;
-    extern /* Subroutine */ int xerbla_(char *, integer *), slasdq_(
-	    char *, integer *, integer *, integer *, integer *, integer *,
-	    real *, real *, real *, integer *, real *, integer *, real *,
-	    integer *, real *, integer *), slasdt_(integer *, integer
-	    *, integer *, integer *, integer *, integer *, integer *),
-	    slaset_(char *, integer *, integer *, real *, real *, real *,
-	    integer *);
+    extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlasdt_(integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *), dlaset_(
+	    char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *), xerbla_(char *, integer *);
     static integer smlszp;
 
 
@@ -88168,14 +23455,14 @@ L100:
     Purpose
     =======
 
-    Using a divide and conquer approach, SLASDA computes the singular
+    Using a divide and conquer approach, DLASDA computes the singular
     value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
     B with diagonal D and offdiagonal E, where M = N + SQRE. The
     algorithm computes the singular values in the SVD B = U * S * VT.
     The orthogonal matrices U and VT are optionally computed in
     compact form.
 
-    A related subroutine, SLASD0, computes the singular values and
+    A related subroutine, DLASD0, computes the singular values and
     the singular vectors in explicit form.
 
     Arguments
@@ -88201,15 +23488,15 @@ L100:
            = 0: The bidiagonal matrix has column dimension M = N;
            = 1: The bidiagonal matrix has column dimension M = N + 1.
 
-    D      (input/output) REAL array, dimension ( N )
+    D      (input/output) DOUBLE PRECISION array, dimension ( N )
            On entry D contains the main diagonal of the bidiagonal
            matrix. On exit D, if INFO = 0, contains its singular values.
 
-    E      (input) REAL array, dimension ( M-1 )
+    E      (input) DOUBLE PRECISION array, dimension ( M-1 )
            Contains the subdiagonal entries of the bidiagonal matrix.
            On exit, E has been destroyed.
 
-    U      (output) REAL array,
+    U      (output) DOUBLE PRECISION array,
            dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced
            if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left
            singular vector matrices of all subproblems at the bottom
@@ -88219,7 +23506,7 @@ L100:
            The leading dimension of arrays U, VT, DIFL, DIFR, POLES,
            GIVNUM, and Z.
 
-    VT     (output) REAL array,
+    VT     (output) DOUBLE PRECISION array,
            dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced
            if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right
            singular vector matrices of all subproblems at the bottom
@@ -88230,26 +23517,26 @@ L100:
            If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th
            secular equation on the computation tree.
 
-    DIFL   (output) REAL array, dimension ( LDU, NLVL ),
+    DIFL   (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ),
            where NLVL = floor(log_2 (N/SMLSIZ))).
 
-    DIFR   (output) REAL array,
+    DIFR   (output) DOUBLE PRECISION array,
                     dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and
                     dimension ( N ) if ICOMPQ = 0.
            If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1)
            record distances between singular values on the I-th
            level and singular values on the (I -1)-th level, and
            DIFR(1:N, 2 * I ) contains the normalizing factors for
-           the right singular vector matrix. See SLASD8 for details.
+           the right singular vector matrix. See DLASD8 for details.
 
-    Z      (output) REAL array,
+    Z      (output) DOUBLE PRECISION array,
                     dimension ( LDU, NLVL ) if ICOMPQ = 1 and
                     dimension ( N ) if ICOMPQ = 0.
            The first K elements of Z(1, I) contain the components of
            the deflation-adjusted updating row vector for subproblems
            on the I-th level.
 
-    POLES  (output) REAL array,
+    POLES  (output) DOUBLE PRECISION array,
            dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced
            if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and
            POLES(1, 2*I) contain  the new and old singular values
@@ -88276,26 +23563,26 @@ L100:
            if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records
            permutations done on the I-th level of the computation tree.
 
-    GIVNUM (output) REAL array,
+    GIVNUM (output) DOUBLE PRECISION array,
            dimension ( LDU,  2 * NLVL ) if ICOMPQ = 1, and not
            referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
            GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S-
            values of Givens rotations performed on the I-th level on
            the computation tree.
 
-    C      (output) REAL array,
+    C      (output) DOUBLE PRECISION array,
            dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0.
            If ICOMPQ = 1 and the I-th subproblem is not square, on exit,
            C( I ) contains the C-value of a Givens rotation related to
            the right null space of the I-th subproblem.
 
-    S      (output) REAL array, dimension ( N ) if
+    S      (output) DOUBLE PRECISION array, dimension ( N ) if
            ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1
            and the I-th subproblem is not square, on exit, S( I )
            contains the S-value of a Givens rotation related to
            the right null space of the I-th subproblem.
 
-    WORK   (workspace) REAL array, dimension
+    WORK   (workspace) DOUBLE PRECISION array, dimension
            (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)).
 
     IWORK  (workspace) INTEGER array.
@@ -88359,13 +23646,13 @@ L100:
     /* Function Body */
     *info = 0;
 
-    if ((*icompq < 0) || (*icompq > 1)) {
+    if (*icompq < 0 || *icompq > 1) {
 	*info = -1;
     } else if (*smlsiz < 3) {
 	*info = -2;
     } else if (*n < 0) {
 	*info = -3;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
+    } else if (*sqre < 0 || *sqre > 1) {
 	*info = -4;
     } else if (*ldu < *n + *sqre) {
 	*info = -8;
@@ -88374,21 +23661,21 @@ L100:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLASDA", &i__1);
+	xerbla_("DLASDA", &i__1);
 	return 0;
     }
 
     m = *n + *sqre;
 
-/*     If the input matrix is too small, call SLASDQ to find the SVD. */
+/*     If the input matrix is too small, call DLASDQ to find the SVD. */
 
     if (*n <= *smlsiz) {
 	if (*icompq == 0) {
-	    slasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
+	    dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
 		    vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, &
 		    work[1], info);
 	} else {
-	    slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
+	    dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
 		    , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1],
 		    info);
 	}
@@ -88412,12 +23699,12 @@ L100:
     nwork1 = vl + m;
     nwork2 = nwork1 + smlszp * smlszp;
 
-    slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
 	    smlsiz);
 
 /*
        for the nodes on bottom level of the tree, solve
-       their subproblems by SLASDQ.
+       their subproblems by DLASDQ.
 */
 
     ndb1 = (nd + 1) / 2;
@@ -88444,23 +23731,22 @@ L100:
 	vli = vl + nlf - 1;
 	sqrei = 1;
 	if (*icompq == 0) {
-	    slaset_("A", &nlp1, &nlp1, &c_b320, &c_b1011, &work[nwork1], &
-		    smlszp);
-	    slasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], &
+	    dlaset_("A", &nlp1, &nlp1, &c_b29, &c_b15, &work[nwork1], &smlszp);
+	    dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], &
 		    work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2],
 		    &nl, &work[nwork2], info);
 	    itemp = nwork1 + nl * smlszp;
-	    scopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1);
-	    scopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1);
+	    dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1);
+	    dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1);
 	} else {
-	    slaset_("A", &nl, &nl, &c_b320, &c_b1011, &u[nlf + u_dim1], ldu);
-	    slaset_("A", &nlp1, &nlp1, &c_b320, &c_b1011, &vt[nlf + vt_dim1],
+	    dlaset_("A", &nl, &nl, &c_b29, &c_b15, &u[nlf + u_dim1], ldu);
+	    dlaset_("A", &nlp1, &nlp1, &c_b29, &c_b15, &vt[nlf + vt_dim1],
 		    ldu);
-	    slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &
+	    dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &
 		    vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf +
 		    u_dim1], ldu, &work[nwork1], info);
-	    scopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1);
-	    scopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1)
+	    dcopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1);
+	    dcopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1)
 		    ;
 	}
 	if (*info != 0) {
@@ -88481,23 +23767,22 @@ L100:
 	vli += nlp1;
 	nrp1 = nr + sqrei;
 	if (*icompq == 0) {
-	    slaset_("A", &nrp1, &nrp1, &c_b320, &c_b1011, &work[nwork1], &
-		    smlszp);
-	    slasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], &
+	    dlaset_("A", &nrp1, &nrp1, &c_b29, &c_b15, &work[nwork1], &smlszp);
+	    dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], &
 		    work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2],
 		    &nr, &work[nwork2], info);
 	    itemp = nwork1 + (nrp1 - 1) * smlszp;
-	    scopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1);
-	    scopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1);
+	    dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1);
+	    dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1);
 	} else {
-	    slaset_("A", &nr, &nr, &c_b320, &c_b1011, &u[nrf + u_dim1], ldu);
-	    slaset_("A", &nrp1, &nrp1, &c_b320, &c_b1011, &vt[nrf + vt_dim1],
+	    dlaset_("A", &nr, &nr, &c_b29, &c_b15, &u[nrf + u_dim1], ldu);
+	    dlaset_("A", &nrp1, &nrp1, &c_b29, &c_b15, &vt[nrf + vt_dim1],
 		    ldu);
-	    slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &
+	    dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &
 		    vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf +
 		    u_dim1], ldu, &work[nwork1], info);
-	    scopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1);
-	    scopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1)
+	    dcopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1);
+	    dcopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1)
 		    ;
 	}
 	if (*info != 0) {
@@ -88515,7 +23800,7 @@ L100:
 
     j = pow_ii(&c__2, &nlvl);
     for (lvl = nlvl; lvl >= 1; --lvl) {
-	lvl2 = ((lvl) << (1)) - 1;
+	lvl2 = (lvl << 1) - 1;
 
 /*
           Find the first node LF and last node LL on
@@ -88528,7 +23813,7 @@ L100:
 	} else {
 	    i__1 = lvl - 1;
 	    lf = pow_ii(&c__2, &i__1);
-	    ll = ((lf) << (1)) - 1;
+	    ll = (lf << 1) - 1;
 	}
 	i__1 = ll;
 	for (i__ = lf; i__ <= i__1; ++i__) {
@@ -88549,7 +23834,7 @@ L100:
 	    alpha = d__[ic];
 	    beta = e[ic];
 	    if (*icompq == 0) {
-		slasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
+		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
 			work[vli], &alpha, &beta, &iwork[idxqi], &perm[
 			perm_offset], &givptr[1], &givcol[givcol_offset],
 			ldgcol, &givnum[givnum_offset], ldu, &poles[
@@ -88558,7 +23843,7 @@ L100:
 			 &iwork[iwk], info);
 	    } else {
 		--j;
-		slasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
+		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
 			work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf +
 			lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 *
 			givcol_dim1], ldgcol, &givnum[nlf + lvl2 *
@@ -88577,14 +23862,14 @@ L100:
 
     return 0;
 
-/*     End of SLASDA */
+/*     End of DLASDA */
 
-} /* slasda_ */
+} /* dlasda_ */
 
-/* Subroutine */ int slasdq_(char *uplo, integer *sqre, integer *n, integer *
-	ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt,
-	integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real *
-	work, integer *info)
+/* Subroutine */ int dlasdq_(char *uplo, integer *sqre, integer *n, integer *
+	ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e,
+	doublereal *vt, integer *ldvt, doublereal *u, integer *ldu,
+	doublereal *c__, integer *ldc, doublereal *work, integer *info)
 {
     /* System generated locals */
     integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
@@ -88592,21 +23877,22 @@ L100:
 
     /* Local variables */
     static integer i__, j;
-    static real r__, cs, sn;
+    static doublereal r__, cs, sn;
     static integer np1, isub;
-    static real smin;
+    static doublereal smin;
     static integer sqre1;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *,
-	    integer *, real *, real *, real *, integer *);
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *
+	    , doublereal *, integer *);
     static integer iuplo;
-    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
-	    integer *), xerbla_(char *, integer *), slartg_(real *,
-	    real *, real *, real *, real *);
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *), xerbla_(char *,
+	    integer *), dbdsqr_(char *, integer *, integer *, integer
+	    *, integer *, doublereal *, doublereal *, doublereal *, integer *,
+	     doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
     static logical rotate;
-    extern /* Subroutine */ int sbdsqr_(char *, integer *, integer *, integer
-	    *, integer *, real *, real *, real *, integer *, real *, integer *
-	    , real *, integer *, real *, integer *);
 
 
 /*
@@ -88619,7 +23905,7 @@ L100:
     Purpose
     =======
 
-    SLASDQ computes the singular value decomposition (SVD) of a real
+    DLASDQ computes the singular value decomposition (SVD) of a real
     (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
     E, accumulating the transformations if desired. Letting B denote
     the input bidiagonal matrix, the algorithm computes orthogonal
@@ -88669,12 +23955,12 @@ L100:
           On entry, NCC specifies the number of columns of
           the matrix C. NCC must be at least 0.
 
-    D     (input/output) REAL array, dimension (N)
+    D     (input/output) DOUBLE PRECISION array, dimension (N)
           On entry, D contains the diagonal entries of the
           bidiagonal matrix whose SVD is desired. On normal exit,
           D contains the singular values in ascending order.
 
-    E     (input/output) REAL array.
+    E     (input/output) DOUBLE PRECISION array.
           dimension is (N-1) if SQRE = 0 and N if SQRE = 1.
           On entry, the entries of E contain the offdiagonal entries
           of the bidiagonal matrix whose SVD is desired. On normal
@@ -88683,7 +23969,7 @@ L100:
           of a bidiagonal matrix orthogonally equivalent to the one
           given as input.
 
-    VT    (input/output) REAL array, dimension (LDVT, NCVT)
+    VT    (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
           On entry, contains a matrix which on exit has been
           premultiplied by P', dimension N-by-NCVT if SQRE = 0
           and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0).
@@ -88693,7 +23979,7 @@ L100:
           declared in the calling (sub) program. LDVT must be at
           least 1. If NCVT is nonzero LDVT must also be at least N.
 
-    U     (input/output) REAL array, dimension (LDU, N)
+    U     (input/output) DOUBLE PRECISION array, dimension (LDU, N)
           On entry, contains a  matrix which on exit has been
           postmultiplied by Q, dimension NRU-by-N if SQRE = 0
           and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0).
@@ -88703,7 +23989,7 @@ L100:
           declared in the calling (sub) program. LDU must be at
           least max( 1, NRU ) .
 
-    C     (input/output) REAL array, dimension (LDC, NCC)
+    C     (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
           On entry, contains an N-by-NCC matrix which on exit
           has been premultiplied by Q'  dimension N-by-NCC if SQRE = 0
           and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0).
@@ -88713,7 +23999,7 @@ L100:
           declared in the calling (sub) program. LDC must be at
           least 1. If NCC is nonzero, LDC must also be at least N.
 
-    WORK  (workspace) REAL array, dimension (4*N)
+    WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)
           Workspace. Only referenced if one of NCVT, NRU, or NCC is
           nonzero, and if N is at least 2.
 
@@ -88761,7 +24047,7 @@ L100:
     }
     if (iuplo == 0) {
 	*info = -1;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
+    } else if (*sqre < 0 || *sqre > 1) {
 	*info = -2;
     } else if (*n < 0) {
 	*info = -3;
@@ -88771,17 +24057,16 @@ L100:
 	*info = -5;
     } else if (*ncc < 0) {
 	*info = -6;
-    } else if ((*ncvt == 0 && *ldvt < 1) || (*ncvt > 0 && *ldvt < max(1,*n)))
-	    {
+    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) {
 	*info = -10;
     } else if (*ldu < max(1,*nru)) {
 	*info = -12;
-    } else if ((*ncc == 0 && *ldc < 1) || (*ncc > 0 && *ldc < max(1,*n))) {
+    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) {
 	*info = -14;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLASDQ", &i__1);
+	xerbla_("DLASDQ", &i__1);
 	return 0;
     }
     if (*n == 0) {
@@ -88790,7 +24075,7 @@ L100:
 
 /*     ROTATE is true if any singular vectors desired, false otherwise */
 
-    rotate = ((*ncvt > 0) || (*nru > 0)) || (*ncc > 0);
+    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
     np1 = *n + 1;
     sqre1 = *sqre;
 
@@ -88802,7 +24087,7 @@ L100:
     if (iuplo == 1 && sqre1 == 1) {
 	i__1 = *n - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
-	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
 	    d__[i__] = r__;
 	    e[i__] = sn * d__[i__ + 1];
 	    d__[i__ + 1] = cs * d__[i__ + 1];
@@ -88812,9 +24097,9 @@ L100:
 	    }
 /* L10: */
 	}
-	slartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
+	dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
 	d__[*n] = r__;
-	e[*n] = 0.f;
+	e[*n] = 0.;
 	if (rotate) {
 	    work[*n] = cs;
 	    work[*n + *n] = sn;
@@ -88825,7 +24110,7 @@ L100:
 /*        Update singular vectors if desired. */
 
 	if (*ncvt > 0) {
-	    slasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[
+	    dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[
 		    vt_offset], ldvt);
 	}
     }
@@ -88838,7 +24123,7 @@ L100:
     if (iuplo == 2) {
 	i__1 = *n - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
-	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
 	    d__[i__] = r__;
 	    e[i__] = sn * d__[i__ + 1];
 	    d__[i__ + 1] = cs * d__[i__ + 1];
@@ -88855,7 +24140,7 @@ L100:
 */
 
 	if (sqre1 == 1) {
-	    slartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
+	    dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
 	    d__[*n] = r__;
 	    if (rotate) {
 		work[*n] = cs;
@@ -88867,30 +24152,30 @@ L100:
 
 	if (*nru > 0) {
 	    if (sqre1 == 0) {
-		slasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[
+		dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[
 			u_offset], ldu);
 	    } else {
-		slasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[
+		dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[
 			u_offset], ldu);
 	    }
 	}
 	if (*ncc > 0) {
 	    if (sqre1 == 0) {
-		slasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[
+		dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[
 			c_offset], ldc);
 	    } else {
-		slasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[
+		dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[
 			c_offset], ldc);
 	    }
 	}
     }
 
 /*
-       Call SBDSQR to compute the SVD of the reduced real
+       Call DBDSQR to compute the SVD of the reduced real
        N-by-N upper bidiagonal matrix.
 */
 
-    sbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[
+    dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[
 	    u_offset], ldu, &c__[c_offset], ldc, &work[1], info);
 
 /*
@@ -88920,15 +24205,15 @@ L100:
 	    d__[isub] = d__[i__];
 	    d__[i__] = smin;
 	    if (*ncvt > 0) {
-		sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1],
+		dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1],
 			ldvt);
 	    }
 	    if (*nru > 0) {
-		sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1]
+		dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1]
 			, &c__1);
 	    }
 	    if (*ncc > 0) {
-		sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc)
+		dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc)
 			;
 	    }
 	}
@@ -88937,11 +24222,11 @@ L100:
 
     return 0;
 
-/*     End of SLASDQ */
+/*     End of DLASDQ */
 
-} /* slasdq_ */
+} /* dlasdq_ */
 
-/* Subroutine */ int slasdt_(integer *n, integer *lvl, integer *nd, integer *
+/* Subroutine */ int dlasdt_(integer *n, integer *lvl, integer *nd, integer *
 	inode, integer *ndiml, integer *ndimr, integer *msub)
 {
     /* System generated locals */
@@ -88952,7 +24237,7 @@ L100:
 
     /* Local variables */
     static integer i__, il, ir, maxn;
-    static real temp;
+    static doublereal temp;
     static integer nlvl, llst, ncrnt;
 
 
@@ -88960,13 +24245,13 @@ L100:
     -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+       June 30, 1999
 
 
     Purpose
     =======
 
-    SLASDT creates a tree of subproblems for bidiagonal divide and
+    DLASDT creates a tree of subproblems for bidiagonal divide and
     conquer.
 
     Arguments
@@ -89015,7 +24300,7 @@ L100:
 
     /* Function Body */
     maxn = max(1,*n);
-    temp = log((real) maxn / (real) (*msub + 1)) / log(2.f);
+    temp = log((doublereal) maxn / (doublereal) (*msub + 1)) / log(2.);
     *lvl = (integer) temp + 1;
 
     i__ = *n / 2;
@@ -89049,16 +24334,16 @@ L100:
 	llst <<= 1;
 /* L20: */
     }
-    *nd = ((llst) << (1)) - 1;
+    *nd = (llst << 1) - 1;
 
     return 0;
 
-/*     End of SLASDT */
+/*     End of DLASDT */
 
-} /* slasdt_ */
+} /* dlasdt_ */
 
-/* Subroutine */ int slaset_(char *uplo, integer *m, integer *n, real *alpha,
-	real *beta, real *a, integer *lda)
+/* Subroutine */ int dlaset_(char *uplo, integer *m, integer *n, doublereal *
+	alpha, doublereal *beta, doublereal *a, integer *lda)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
@@ -89078,7 +24363,7 @@ L100:
     Purpose
     =======
 
-    SLASET initializes an m-by-n matrix A to BETA on the diagonal and
+    DLASET initializes an m-by-n matrix A to BETA on the diagonal and
     ALPHA on the offdiagonals.
 
     Arguments
@@ -89098,13 +24383,13 @@ L100:
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    ALPHA   (input) REAL
+    ALPHA   (input) DOUBLE PRECISION
             The constant to which the offdiagonal elements are to be set.
 
-    BETA    (input) REAL
+    BETA    (input) DOUBLE PRECISION
             The constant to which the diagonal elements are to be set.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On exit, the leading m-by-n submatrix of A is set as follows:
 
             if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n,
@@ -89187,36 +24472,39 @@ L100:
 
     return 0;
 
-/*     End of SLASET */
+/*     End of DLASET */
 
-} /* slaset_ */
+} /* dlaset_ */
 
-/* Subroutine */ int slasq1_(integer *n, real *d__, real *e, real *work,
-	integer *info)
+/* Subroutine */ int dlasq1_(integer *n, doublereal *d__, doublereal *e,
+	doublereal *work, integer *info)
 {
     /* System generated locals */
     integer i__1, i__2;
-    real r__1, r__2, r__3;
+    doublereal d__1, d__2, d__3;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
     static integer i__;
-    static real eps;
-    extern /* Subroutine */ int slas2_(real *, real *, real *, real *, real *)
-	    ;
-    static real scale;
+    static doublereal eps;
+    extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *);
+    static doublereal scale;
     static integer iinfo;
-    static real sigmn, sigmx;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *), slasq2_(integer *, real *, integer *);
-    extern doublereal slamch_(char *);
-    static real safmin;
-    extern /* Subroutine */ int xerbla_(char *, integer *), slascl_(
-	    char *, integer *, integer *, real *, real *, integer *, integer *
-	    , real *, integer *, integer *), slasrt_(char *, integer *
-	    , real *, integer *);
+    static doublereal sigmn;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static doublereal sigmx;
+    extern /* Subroutine */ int dlasq2_(integer *, doublereal *, integer *);
+
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *), dlasrt_(
+	    char *, integer *, doublereal *, integer *);
 
 
 /*
@@ -89229,7 +24517,7 @@ L100:
     Purpose
     =======
 
-    SLASQ1 computes the singular values of a real N-by-N bidiagonal
+    DLASQ1 computes the singular values of a real N-by-N bidiagonal
     matrix with diagonal D and off-diagonal E. The singular values
     are computed to high relative accuracy, in the absence of
     denormalization, underflow and overflow. The algorithm was first
@@ -89248,17 +24536,17 @@ L100:
     N     (input) INTEGER
           The number of rows and columns in the matrix. N >= 0.
 
-    D     (input/output) REAL array, dimension (N)
+    D     (input/output) DOUBLE PRECISION array, dimension (N)
           On entry, D contains the diagonal elements of the
           bidiagonal matrix whose SVD is desired. On normal exit,
           D contains the singular values in decreasing order.
 
-    E     (input/output) REAL array, dimension (N)
+    E     (input/output) DOUBLE PRECISION array, dimension (N)
           On entry, elements E(1:N-1) contain the off-diagonal elements
           of the bidiagonal matrix whose SVD is desired.
           On exit, E is overwritten.
 
-    WORK  (workspace) REAL array, dimension (4*N)
+    WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)
 
     INFO  (output) INTEGER
           = 0: successful exit
@@ -89284,15 +24572,15 @@ L100:
     if (*n < 0) {
 	*info = -2;
 	i__1 = -(*info);
-	xerbla_("SLASQ1", &i__1);
+	xerbla_("DLASQ1", &i__1);
 	return 0;
     } else if (*n == 0) {
 	return 0;
     } else if (*n == 1) {
-	d__[1] = dabs(d__[1]);
+	d__[1] = abs(d__[1]);
 	return 0;
     } else if (*n == 2) {
-	slas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx);
+	dlas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx);
 	d__[1] = sigmx;
 	d__[2] = sigmn;
 	return 0;
@@ -89300,29 +24588,29 @@ L100:
 
 /*     Estimate the largest singular value. */
 
-    sigmx = 0.f;
+    sigmx = 0.;
     i__1 = *n - 1;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	d__[i__] = (r__1 = d__[i__], dabs(r__1));
+	d__[i__] = (d__1 = d__[i__], abs(d__1));
 /* Computing MAX */
-	r__2 = sigmx, r__3 = (r__1 = e[i__], dabs(r__1));
-	sigmx = dmax(r__2,r__3);
+	d__2 = sigmx, d__3 = (d__1 = e[i__], abs(d__1));
+	sigmx = max(d__2,d__3);
 /* L10: */
     }
-    d__[*n] = (r__1 = d__[*n], dabs(r__1));
+    d__[*n] = (d__1 = d__[*n], abs(d__1));
 
 /*     Early return if SIGMX is zero (matrix is already diagonal). */
 
-    if (sigmx == 0.f) {
-	slasrt_("D", n, &d__[1], &iinfo);
+    if (sigmx == 0.) {
+	dlasrt_("D", n, &d__[1], &iinfo);
 	return 0;
     }
 
     i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
-	r__1 = sigmx, r__2 = d__[i__];
-	sigmx = dmax(r__1,r__2);
+	d__1 = sigmx, d__2 = d__[i__];
+	sigmx = max(d__1,d__2);
 /* L20: */
     }
 
@@ -89331,29 +24619,29 @@ L100:
        input data makes scaling by a power of the radix pointless).
 */
 
-    eps = slamch_("Precision");
-    safmin = slamch_("Safe minimum");
+    eps = PRECISION;
+    safmin = SAFEMINIMUM;
     scale = sqrt(eps / safmin);
-    scopy_(n, &d__[1], &c__1, &work[1], &c__2);
+    dcopy_(n, &d__[1], &c__1, &work[1], &c__2);
     i__1 = *n - 1;
-    scopy_(&i__1, &e[1], &c__1, &work[2], &c__2);
-    i__1 = ((*n) << (1)) - 1;
-    i__2 = ((*n) << (1)) - 1;
-    slascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2,
+    dcopy_(&i__1, &e[1], &c__1, &work[2], &c__2);
+    i__1 = (*n << 1) - 1;
+    i__2 = (*n << 1) - 1;
+    dlascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2,
 	    &iinfo);
 
 /*     Compute the q's and e's. */
 
-    i__1 = ((*n) << (1)) - 1;
+    i__1 = (*n << 1) - 1;
     for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing 2nd power */
-	r__1 = work[i__];
-	work[i__] = r__1 * r__1;
+	d__1 = work[i__];
+	work[i__] = d__1 * d__1;
 /* L30: */
     }
-    work[*n * 2] = 0.f;
+    work[*n * 2] = 0.;
 
-    slasq2_(n, &work[1], info);
+    dlasq2_(n, &work[1], info);
 
     if (*info == 0) {
 	i__1 = *n;
@@ -89361,51 +24649,52 @@ L100:
 	    d__[i__] = sqrt(work[i__]);
 /* L40: */
 	}
-	slascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, &
+	dlascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, &
 		iinfo);
     }
 
     return 0;
 
-/*     End of SLASQ1 */
+/*     End of DLASQ1 */
 
-} /* slasq1_ */
+} /* dlasq1_ */
 
-/* Subroutine */ int slasq2_(integer *n, real *z__, integer *info)
+/* Subroutine */ int dlasq2_(integer *n, doublereal *z__, integer *info)
 {
     /* System generated locals */
     integer i__1, i__2, i__3;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static real d__, e;
+    static doublereal d__, e;
     static integer k;
-    static real s, t;
+    static doublereal s, t;
     static integer i0, i4, n0, pp;
-    static real eps, tol;
+    static doublereal eps, tol;
     static integer ipn4;
-    static real tol2;
+    static doublereal tol2;
     static logical ieee;
     static integer nbig;
-    static real dmin__, emin, emax;
+    static doublereal dmin__, emin, emax;
     static integer ndiv, iter;
-    static real qmin, temp, qmax, zmax;
+    static doublereal qmin, temp, qmax, zmax;
     static integer splt, nfail;
-    static real desig, trace, sigma;
+    static doublereal desig, trace, sigma;
     static integer iinfo;
-    extern /* Subroutine */ int slasq3_(integer *, integer *, real *, integer
-	    *, real *, real *, real *, real *, integer *, integer *, integer *
-	    , logical *);
-    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int dlasq3_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     integer *, integer *, integer *, logical *);
+
     static integer iwhila, iwhilb;
-    static real oldemn, safmin;
+    static doublereal oldemn, safmin;
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
 
 
 /*
@@ -89418,7 +24707,7 @@ L100:
     Purpose
     =======
 
-    SLASQ2 computes all the eigenvalues of the symmetric positive
+    DLASQ2 computes all the eigenvalues of the symmetric positive
     definite tridiagonal matrix associated with the qd array Z to high
     relative accuracy are computed to high relative accuracy, in the
     absence of denormalization, underflow and overflow.
@@ -89429,10 +24718,10 @@ L100:
     Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
     symmetric tridiagonal to which it is similar.
 
-    Note : SLASQ2 defines a logical variable, IEEE, which is true
+    Note : DLASQ2 defines a logical variable, IEEE, which is true
     on machines which follow ieee-754 floating-point standard in their
     handling of infinities and NaNs, and false otherwise. This variable
-    is passed to SLASQ3.
+    is passed to DLASQ3.
 
     Arguments
     =========
@@ -89440,7 +24729,7 @@ L100:
     N     (input) INTEGER
           The number of rows and columns in the matrix. N >= 0.
 
-    Z     (workspace) REAL array, dimension ( 4*N )
+    Z     (workspace) DOUBLE PRECISION array, dimension ( 4*N )
           On entry Z holds the qd array. On exit, entries 1 to N hold
           the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
           trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
@@ -89471,7 +24760,7 @@ L100:
 
 
        Test the input arguments.
-       (in case SLASQ2 is not called by SLASQ1)
+       (in case DLASQ2 is not called by DLASQ1)
 */
 
     /* Parameter adjustments */
@@ -89479,16 +24768,16 @@ L100:
 
     /* Function Body */
     *info = 0;
-    eps = slamch_("Precision");
-    safmin = slamch_("Safe minimum");
-    tol = eps * 100.f;
+    eps = PRECISION;
+    safmin = SAFEMINIMUM;
+    tol = eps * 100.;
 /* Computing 2nd power */
-    r__1 = tol;
-    tol2 = r__1 * r__1;
+    d__1 = tol;
+    tol2 = d__1 * d__1;
 
     if (*n < 0) {
 	*info = -1;
-	xerbla_("SLASQ2", &c__1);
+	xerbla_("DLASQ2", &c__1);
 	return 0;
     } else if (*n == 0) {
 	return 0;
@@ -89496,18 +24785,18 @@ L100:
 
 /*        1-by-1 case. */
 
-	if (z__[1] < 0.f) {
+	if (z__[1] < 0.) {
 	    *info = -201;
-	    xerbla_("SLASQ2", &c__2);
+	    xerbla_("DLASQ2", &c__2);
 	}
 	return 0;
     } else if (*n == 2) {
 
 /*        2-by-2 case. */
 
-	if ((z__[2] < 0.f) || (z__[3] < 0.f)) {
+	if (z__[2] < 0. || z__[3] < 0.) {
 	    *info = -2;
-	    xerbla_("SLASQ2", &c__2);
+	    xerbla_("DLASQ2", &c__2);
 	    return 0;
 	} else if (z__[3] > z__[1]) {
 	    d__ = z__[3];
@@ -89516,10 +24805,10 @@ L100:
 	}
 	z__[5] = z__[1] + z__[2] + z__[3];
 	if (z__[2] > z__[3] * tol2) {
-	    t = (z__[1] - z__[3] + z__[2]) * .5f;
+	    t = (z__[1] - z__[3] + z__[2]) * .5;
 	    s = z__[3] * (z__[2] / t);
 	    if (s <= t) {
-		s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.f) + 1.f)));
+		s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.)));
 	    } else {
 		s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s)));
 	    }
@@ -89534,58 +24823,58 @@ L100:
 
 /*     Check for negative data and compute sums of q's and e's. */
 
-    z__[*n * 2] = 0.f;
+    z__[*n * 2] = 0.;
     emin = z__[2];
-    qmax = 0.f;
-    zmax = 0.f;
-    d__ = 0.f;
-    e = 0.f;
+    qmax = 0.;
+    zmax = 0.;
+    d__ = 0.;
+    e = 0.;
 
-    i__1 = (*n - 1) << (1);
+    i__1 = *n - 1 << 1;
     for (k = 1; k <= i__1; k += 2) {
-	if (z__[k] < 0.f) {
+	if (z__[k] < 0.) {
 	    *info = -(k + 200);
-	    xerbla_("SLASQ2", &c__2);
+	    xerbla_("DLASQ2", &c__2);
 	    return 0;
-	} else if (z__[k + 1] < 0.f) {
+	} else if (z__[k + 1] < 0.) {
 	    *info = -(k + 201);
-	    xerbla_("SLASQ2", &c__2);
+	    xerbla_("DLASQ2", &c__2);
 	    return 0;
 	}
 	d__ += z__[k];
 	e += z__[k + 1];
 /* Computing MAX */
-	r__1 = qmax, r__2 = z__[k];
-	qmax = dmax(r__1,r__2);
+	d__1 = qmax, d__2 = z__[k];
+	qmax = max(d__1,d__2);
 /* Computing MIN */
-	r__1 = emin, r__2 = z__[k + 1];
-	emin = dmin(r__1,r__2);
+	d__1 = emin, d__2 = z__[k + 1];
+	emin = min(d__1,d__2);
 /* Computing MAX */
-	r__1 = max(qmax,zmax), r__2 = z__[k + 1];
-	zmax = dmax(r__1,r__2);
+	d__1 = max(qmax,zmax), d__2 = z__[k + 1];
+	zmax = max(d__1,d__2);
 /* L10: */
     }
-    if (z__[((*n) << (1)) - 1] < 0.f) {
-	*info = -(((*n) << (1)) + 199);
-	xerbla_("SLASQ2", &c__2);
+    if (z__[(*n << 1) - 1] < 0.) {
+	*info = -((*n << 1) + 199);
+	xerbla_("DLASQ2", &c__2);
 	return 0;
     }
-    d__ += z__[((*n) << (1)) - 1];
+    d__ += z__[(*n << 1) - 1];
 /* Computing MAX */
-    r__1 = qmax, r__2 = z__[((*n) << (1)) - 1];
-    qmax = dmax(r__1,r__2);
-    zmax = dmax(qmax,zmax);
+    d__1 = qmax, d__2 = z__[(*n << 1) - 1];
+    qmax = max(d__1,d__2);
+    zmax = max(qmax,zmax);
 
 /*     Check for diagonality. */
 
-    if (e == 0.f) {
+    if (e == 0.) {
 	i__1 = *n;
 	for (k = 2; k <= i__1; ++k) {
-	    z__[k] = z__[((k) << (1)) - 1];
+	    z__[k] = z__[(k << 1) - 1];
 /* L20: */
 	}
-	slasrt_("D", n, &z__[1], &iinfo);
-	z__[((*n) << (1)) - 1] = d__;
+	dlasrt_("D", n, &z__[1], &iinfo);
+	z__[(*n << 1) - 1] = d__;
 	return 0;
     }
 
@@ -89593,24 +24882,24 @@ L100:
 
 /*     Check for zero data. */
 
-    if (trace == 0.f) {
-	z__[((*n) << (1)) - 1] = 0.f;
+    if (trace == 0.) {
+	z__[(*n << 1) - 1] = 0.;
 	return 0;
     }
 
 /*     Check whether the machine is IEEE conformable. */
 
-    ieee = ilaenv_(&c__10, "SLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)
-	    6, (ftnlen)1) == 1 && ilaenv_(&c__11, "SLASQ2", "N", &c__1, &c__2,
+    ieee = ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)
+	    6, (ftnlen)1) == 1 && ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2,
 	     &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1;
 
 /*     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
 
-    for (k = (*n) << (1); k >= 2; k += -2) {
-	z__[k * 2] = 0.f;
-	z__[((k) << (1)) - 1] = z__[k];
-	z__[((k) << (1)) - 2] = 0.f;
-	z__[((k) << (1)) - 3] = z__[k - 1];
+    for (k = *n << 1; k >= 2; k += -2) {
+	z__[k * 2] = 0.;
+	z__[(k << 1) - 1] = z__[k];
+	z__[(k << 1) - 2] = 0.;
+	z__[(k << 1) - 3] = z__[k - 1];
 /* L30: */
     }
 
@@ -89619,10 +24908,10 @@ L100:
 
 /*     Reverse the qd-array, if warranted. */
 
-    if (z__[((i0) << (2)) - 3] * 1.5f < z__[((n0) << (2)) - 3]) {
-	ipn4 = (i0 + n0) << (2);
-	i__1 = (i0 + n0 - 1) << (1);
-	for (i4 = (i0) << (2); i4 <= i__1; i4 += 4) {
+    if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) {
+	ipn4 = i0 + n0 << 2;
+	i__1 = i0 + n0 - 1 << 1;
+	for (i4 = i0 << 2; i4 <= i__1; i4 += 4) {
 	    temp = z__[i4 - 3];
 	    z__[i4 - 3] = z__[ipn4 - i4 - 3];
 	    z__[ipn4 - i4 - 3] = temp;
@@ -89639,11 +24928,11 @@ L100:
 
     for (k = 1; k <= 2; ++k) {
 
-	d__ = z__[((n0) << (2)) + pp - 3];
-	i__1 = ((i0) << (2)) + pp;
-	for (i4 = ((n0 - 1) << (2)) + pp; i4 >= i__1; i4 += -4) {
+	d__ = z__[(n0 << 2) + pp - 3];
+	i__1 = (i0 << 2) + pp;
+	for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) {
 	    if (z__[i4 - 1] <= tol2 * d__) {
-		z__[i4 - 1] = -0.f;
+		z__[i4 - 1] = -0.;
 		d__ = z__[i4 - 3];
 	    } else {
 		d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
@@ -89653,41 +24942,41 @@ L100:
 
 /*        dqd maps Z to ZZ plus Li's test. */
 
-	emin = z__[((i0) << (2)) + pp + 1];
-	d__ = z__[((i0) << (2)) + pp - 3];
-	i__1 = ((n0 - 1) << (2)) + pp;
-	for (i4 = ((i0) << (2)) + pp; i4 <= i__1; i4 += 4) {
-	    z__[i4 - ((pp) << (1)) - 2] = d__ + z__[i4 - 1];
+	emin = z__[(i0 << 2) + pp + 1];
+	d__ = z__[(i0 << 2) + pp - 3];
+	i__1 = (n0 - 1 << 2) + pp;
+	for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) {
+	    z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1];
 	    if (z__[i4 - 1] <= tol2 * d__) {
-		z__[i4 - 1] = -0.f;
-		z__[i4 - ((pp) << (1)) - 2] = d__;
-		z__[i4 - ((pp) << (1))] = 0.f;
+		z__[i4 - 1] = -0.;
+		z__[i4 - (pp << 1) - 2] = d__;
+		z__[i4 - (pp << 1)] = 0.;
 		d__ = z__[i4 + 1];
-	    } else if (safmin * z__[i4 + 1] < z__[i4 - ((pp) << (1)) - 2] &&
-		    safmin * z__[i4 - ((pp) << (1)) - 2] < z__[i4 + 1]) {
-		temp = z__[i4 + 1] / z__[i4 - ((pp) << (1)) - 2];
-		z__[i4 - ((pp) << (1))] = z__[i4 - 1] * temp;
+	    } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] &&
+		    safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) {
+		temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2];
+		z__[i4 - (pp << 1)] = z__[i4 - 1] * temp;
 		d__ *= temp;
 	    } else {
-		z__[i4 - ((pp) << (1))] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4
-			- ((pp) << (1)) - 2]);
-		d__ = z__[i4 + 1] * (d__ / z__[i4 - ((pp) << (1)) - 2]);
+		z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - (
+			pp << 1) - 2]);
+		d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]);
 	    }
 /* Computing MIN */
-	    r__1 = emin, r__2 = z__[i4 - ((pp) << (1))];
-	    emin = dmin(r__1,r__2);
+	    d__1 = emin, d__2 = z__[i4 - (pp << 1)];
+	    emin = min(d__1,d__2);
 /* L60: */
 	}
-	z__[((n0) << (2)) - pp - 2] = d__;
+	z__[(n0 << 2) - pp - 2] = d__;
 
 /*        Now find qmax. */
 
-	qmax = z__[((i0) << (2)) - pp - 2];
-	i__1 = ((n0) << (2)) - pp - 2;
-	for (i4 = ((i0) << (2)) - pp + 2; i4 <= i__1; i4 += 4) {
+	qmax = z__[(i0 << 2) - pp - 2];
+	i__1 = (n0 << 2) - pp - 2;
+	for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) {
 /* Computing MAX */
-	    r__1 = qmax, r__2 = z__[i4];
-	    qmax = dmax(r__1,r__2);
+	    d__1 = qmax, d__2 = z__[i4];
+	    qmax = max(d__1,d__2);
 /* L70: */
 	}
 
@@ -89699,7 +24988,7 @@ L100:
 
     iter = 2;
     nfail = 0;
-    ndiv = (n0 - i0) << (1);
+    ndiv = n0 - i0 << 1;
 
     i__1 = *n + 1;
     for (iwhila = 1; iwhila <= i__1; ++iwhila) {
@@ -89714,13 +25003,13 @@ L100:
           splits from the rest of the array, but is negated.
 */
 
-	desig = 0.f;
+	desig = 0.;
 	if (n0 == *n) {
-	    sigma = 0.f;
+	    sigma = 0.;
 	} else {
-	    sigma = -z__[((n0) << (2)) - 1];
+	    sigma = -z__[(n0 << 2) - 1];
 	}
-	if (sigma < 0.f) {
+	if (sigma < 0.) {
 	    *info = 1;
 	    return 0;
 	}
@@ -89730,32 +25019,32 @@ L100:
           EMIN. Find Gershgorin-type bound if Q's much greater than E's.
 */
 
-	emax = 0.f;
+	emax = 0.;
 	if (n0 > i0) {
-	    emin = (r__1 = z__[((n0) << (2)) - 5], dabs(r__1));
+	    emin = (d__1 = z__[(n0 << 2) - 5], abs(d__1));
 	} else {
-	    emin = 0.f;
+	    emin = 0.;
 	}
-	qmin = z__[((n0) << (2)) - 3];
+	qmin = z__[(n0 << 2) - 3];
 	qmax = qmin;
-	for (i4 = (n0) << (2); i4 >= 8; i4 += -4) {
-	    if (z__[i4 - 5] <= 0.f) {
+	for (i4 = n0 << 2; i4 >= 8; i4 += -4) {
+	    if (z__[i4 - 5] <= 0.) {
 		goto L100;
 	    }
-	    if (qmin >= emax * 4.f) {
+	    if (qmin >= emax * 4.) {
 /* Computing MIN */
-		r__1 = qmin, r__2 = z__[i4 - 3];
-		qmin = dmin(r__1,r__2);
+		d__1 = qmin, d__2 = z__[i4 - 3];
+		qmin = min(d__1,d__2);
 /* Computing MAX */
-		r__1 = emax, r__2 = z__[i4 - 5];
-		emax = dmax(r__1,r__2);
+		d__1 = emax, d__2 = z__[i4 - 5];
+		emax = max(d__1,d__2);
 	    }
 /* Computing MAX */
-	    r__1 = qmax, r__2 = z__[i4 - 7] + z__[i4 - 5];
-	    qmax = dmax(r__1,r__2);
+	    d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5];
+	    qmax = max(d__1,d__2);
 /* Computing MIN */
-	    r__1 = emin, r__2 = z__[i4 - 5];
-	    emin = dmin(r__1,r__2);
+	    d__1 = emin, d__2 = z__[i4 - 5];
+	    emin = min(d__1,d__2);
 /* L90: */
 	}
 	i4 = 4;
@@ -89763,17 +25052,17 @@ L100:
 L100:
 	i0 = i4 / 4;
 
-/*        Store EMIN for passing to SLASQ3. */
+/*        Store EMIN for passing to DLASQ3. */
 
-	z__[((n0) << (2)) - 1] = emin;
+	z__[(n0 << 2) - 1] = emin;
 
 /*
           Put -(initial shift) into DMIN.
 
    Computing MAX
 */
-	r__1 = 0.f, r__2 = qmin - sqrt(qmin) * 2.f * sqrt(emax);
-	dmin__ = -dmax(r__1,r__2);
+	d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax);
+	dmin__ = -max(d__1,d__2);
 
 /*        Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */
 
@@ -89788,7 +25077,7 @@ L100:
 
 /*           While submatrix unfinished take a good dqds step. */
 
-	    slasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
+	    dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
 		    nfail, &iter, &ndiv, &ieee);
 
 	    pp = 1 - pp;
@@ -89796,35 +25085,35 @@ L100:
 /*           When EMIN is very small check for splits. */
 
 	    if (pp == 0 && n0 - i0 >= 3) {
-		if ((z__[n0 * 4] <= tol2 * qmax) || (z__[((n0) << (2)) - 1] <=
-			 tol2 * sigma)) {
+		if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 *
+			 sigma) {
 		    splt = i0 - 1;
-		    qmax = z__[((i0) << (2)) - 3];
-		    emin = z__[((i0) << (2)) - 1];
+		    qmax = z__[(i0 << 2) - 3];
+		    emin = z__[(i0 << 2) - 1];
 		    oldemn = z__[i0 * 4];
-		    i__3 = (n0 - 3) << (2);
-		    for (i4 = (i0) << (2); i4 <= i__3; i4 += 4) {
-			if ((z__[i4] <= tol2 * z__[i4 - 3]) || (z__[i4 - 1] <=
-				 tol2 * sigma)) {
+		    i__3 = n0 - 3 << 2;
+		    for (i4 = i0 << 2; i4 <= i__3; i4 += 4) {
+			if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <=
+				tol2 * sigma) {
 			    z__[i4 - 1] = -sigma;
 			    splt = i4 / 4;
-			    qmax = 0.f;
+			    qmax = 0.;
 			    emin = z__[i4 + 3];
 			    oldemn = z__[i4 + 4];
 			} else {
 /* Computing MAX */
-			    r__1 = qmax, r__2 = z__[i4 + 1];
-			    qmax = dmax(r__1,r__2);
+			    d__1 = qmax, d__2 = z__[i4 + 1];
+			    qmax = max(d__1,d__2);
 /* Computing MIN */
-			    r__1 = emin, r__2 = z__[i4 - 1];
-			    emin = dmin(r__1,r__2);
+			    d__1 = emin, d__2 = z__[i4 - 1];
+			    emin = min(d__1,d__2);
 /* Computing MIN */
-			    r__1 = oldemn, r__2 = z__[i4];
-			    oldemn = dmin(r__1,r__2);
+			    d__1 = oldemn, d__2 = z__[i4];
+			    oldemn = min(d__1,d__2);
 			}
 /* L110: */
 		    }
-		    z__[((n0) << (2)) - 1] = emin;
+		    z__[(n0 << 2) - 1] = emin;
 		    z__[n0 * 4] = oldemn;
 		    i0 = splt + 1;
 		}
@@ -89855,15 +25144,15 @@ L150:
 
     i__1 = *n;
     for (k = 2; k <= i__1; ++k) {
-	z__[k] = z__[((k) << (2)) - 3];
+	z__[k] = z__[(k << 2) - 3];
 /* L160: */
     }
 
 /*     Sort and compute sum of eigenvalues. */
 
-    slasrt_("D", n, &z__[1], &iinfo);
+    dlasrt_("D", n, &z__[1], &iinfo);
 
-    e = 0.f;
+    e = 0.;
     for (k = *n; k >= 1; --k) {
 	e += z__[k];
 /* L170: */
@@ -89871,54 +25160,58 @@ L150:
 
 /*     Store trace, sum(eigenvalues) and information on performance. */
 
-    z__[((*n) << (1)) + 1] = trace;
-    z__[((*n) << (1)) + 2] = e;
-    z__[((*n) << (1)) + 3] = (real) iter;
+    z__[(*n << 1) + 1] = trace;
+    z__[(*n << 1) + 2] = e;
+    z__[(*n << 1) + 3] = (doublereal) iter;
 /* Computing 2nd power */
     i__1 = *n;
-    z__[((*n) << (1)) + 4] = (real) ndiv / (real) (i__1 * i__1);
-    z__[((*n) << (1)) + 5] = nfail * 100.f / (real) iter;
+    z__[(*n << 1) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1);
+    z__[(*n << 1) + 5] = nfail * 100. / (doublereal) iter;
     return 0;
 
-/*     End of SLASQ2 */
+/*     End of DLASQ2 */
 
-} /* slasq2_ */
+} /* dlasq2_ */
 
-/* Subroutine */ int slasq3_(integer *i0, integer *n0, real *z__, integer *pp,
-	 real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail,
-	integer *iter, integer *ndiv, logical *ieee)
+/* Subroutine */ int dlasq3_(integer *i0, integer *n0, doublereal *z__,
+	integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig,
+	 doublereal *qmax, integer *nfail, integer *iter, integer *ndiv,
+	logical *ieee)
 {
     /* Initialized data */
 
     static integer ttype = 0;
-    static real dmin1 = 0.f;
-    static real dmin2 = 0.f;
-    static real dn = 0.f;
-    static real dn1 = 0.f;
-    static real dn2 = 0.f;
-    static real tau = 0.f;
+    static doublereal dmin1 = 0.;
+    static doublereal dmin2 = 0.;
+    static doublereal dn = 0.;
+    static doublereal dn1 = 0.;
+    static doublereal dn2 = 0.;
+    static doublereal tau = 0.;
 
     /* System generated locals */
     integer i__1;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static real s, t;
+    static doublereal s, t;
     static integer j4, nn;
-    static real eps, tol;
+    static doublereal eps, tol;
     static integer n0in, ipn4;
-    static real tol2, temp;
-    extern /* Subroutine */ int slasq4_(integer *, integer *, real *, integer
-	    *, integer *, real *, real *, real *, real *, real *, real *,
-	    real *, integer *), slasq5_(integer *, integer *, real *, integer
-	    *, real *, real *, real *, real *, real *, real *, real *,
-	    logical *), slasq6_(integer *, integer *, real *, integer *, real
-	    *, real *, real *, real *, real *, real *);
-    extern doublereal slamch_(char *);
-    static real safmin;
+    static doublereal tol2, temp;
+    extern /* Subroutine */ int dlasq4_(integer *, integer *, doublereal *,
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *)
+	    , dlasq5_(integer *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, logical *), dlasq6_(
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *);
+
+    static doublereal safmin;
 
 
 /*
@@ -89931,7 +25224,7 @@ L150:
     Purpose
     =======
 
-    SLASQ3 checks for deflation, computes a shift (TAU) and calls dqds.
+    DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds.
     In case of failure it changes shifts, and tries again until output
     is positive.
 
@@ -89944,22 +25237,22 @@ L150:
     N0     (input) INTEGER
            Last index.
 
-    Z      (input) REAL array, dimension ( 4*N )
+    Z      (input) DOUBLE PRECISION array, dimension ( 4*N )
            Z holds the qd array.
 
     PP     (input) INTEGER
            PP=0 for ping, PP=1 for pong.
 
-    DMIN   (output) REAL
+    DMIN   (output) DOUBLE PRECISION
            Minimum value of d.
 
-    SIGMA  (output) REAL
+    SIGMA  (output) DOUBLE PRECISION
            Sum of shifts used in current segment.
 
-    DESIG  (input/output) REAL
+    DESIG  (input/output) DOUBLE PRECISION
            Lower order part of SIGMA
 
-    QMAX   (input) REAL
+    QMAX   (input) DOUBLE PRECISION
            Maximum value of q.
 
     NFAIL  (output) INTEGER
@@ -89975,7 +25268,7 @@ L150:
            Shift type.
 
     IEEE   (input) LOGICAL
-           Flag for IEEE or non IEEE arithmetic (passed to SLASQ5).
+           Flag for IEEE or non IEEE arithmetic (passed to DLASQ5).
 
     =====================================================================
 */
@@ -89986,12 +25279,12 @@ L150:
     /* Function Body */
 
     n0in = *n0;
-    eps = slamch_("Precision");
-    safmin = slamch_("Safe minimum");
-    tol = eps * 100.f;
+    eps = PRECISION;
+    safmin = SAFEMINIMUM;
+    tol = eps * 100.;
 /* Computing 2nd power */
-    r__1 = tol;
-    tol2 = r__1 * r__1;
+    d__1 = tol;
+    tol2 = d__1 * d__1;
 
 /*     Check for deflation. */
 
@@ -90003,21 +25296,21 @@ L10:
     if (*n0 == *i0) {
 	goto L20;
     }
-    nn = ((*n0) << (2)) + *pp;
+    nn = (*n0 << 2) + *pp;
     if (*n0 == *i0 + 1) {
 	goto L40;
     }
 
 /*     Check whether E(N0-1) is negligible, 1 eigenvalue. */
 
-    if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - ((*pp) << (1))
-	     - 4] > tol2 * z__[nn - 7]) {
+    if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) -
+	    4] > tol2 * z__[nn - 7]) {
 	goto L30;
     }
 
 L20:
 
-    z__[((*n0) << (2)) - 3] = z__[((*n0) << (2)) + *pp - 3] + *sigma;
+    z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma;
     --(*n0);
     goto L10;
 
@@ -90025,8 +25318,8 @@ L20:
 
 L30:
 
-    if (z__[nn - 9] > tol2 * *sigma && z__[nn - ((*pp) << (1)) - 8] > tol2 *
-	    z__[nn - 11]) {
+    if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[
+	    nn - 11]) {
 	goto L50;
     }
 
@@ -90038,10 +25331,10 @@ L40:
 	z__[nn - 7] = s;
     }
     if (z__[nn - 5] > z__[nn - 3] * tol2) {
-	t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5f;
+	t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
 	s = z__[nn - 3] * (z__[nn - 5] / t);
 	if (s <= t) {
-	    s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.f) + 1.f)));
+	    s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.)));
 	} else {
 	    s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s)));
 	}
@@ -90049,8 +25342,8 @@ L40:
 	z__[nn - 3] *= z__[nn - 7] / t;
 	z__[nn - 7] = t;
     }
-    z__[((*n0) << (2)) - 7] = z__[nn - 7] + *sigma;
-    z__[((*n0) << (2)) - 3] = z__[nn - 3] + *sigma;
+    z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma;
+    z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma;
     *n0 += -2;
     goto L10;
 
@@ -90058,12 +25351,11 @@ L50:
 
 /*     Reverse the qd-array, if warranted. */
 
-    if ((*dmin__ <= 0.f) || (*n0 < n0in)) {
-	if (z__[((*i0) << (2)) + *pp - 3] * 1.5f < z__[((*n0) << (2)) + *pp -
-		3]) {
-	    ipn4 = (*i0 + *n0) << (2);
-	    i__1 = (*i0 + *n0 - 1) << (1);
-	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+    if (*dmin__ <= 0. || *n0 < n0in) {
+	if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) {
+	    ipn4 = *i0 + *n0 << 2;
+	    i__1 = *i0 + *n0 - 1 << 1;
+	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
 		temp = z__[j4 - 3];
 		z__[j4 - 3] = z__[ipn4 - j4 - 3];
 		z__[ipn4 - j4 - 3] = temp;
@@ -90079,27 +25371,25 @@ L50:
 /* L60: */
 	    }
 	    if (*n0 - *i0 <= 4) {
-		z__[((*n0) << (2)) + *pp - 1] = z__[((*i0) << (2)) + *pp - 1];
-		z__[((*n0) << (2)) - *pp] = z__[((*i0) << (2)) - *pp];
+		z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1];
+		z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
 	    }
 /* Computing MIN */
-	    r__1 = dmin2, r__2 = z__[((*n0) << (2)) + *pp - 1];
-	    dmin2 = dmin(r__1,r__2);
+	    d__1 = dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
+	    dmin2 = min(d__1,d__2);
 /* Computing MIN */
-	    r__1 = z__[((*n0) << (2)) + *pp - 1], r__2 = z__[((*i0) << (2)) +
-		    *pp - 1], r__1 = min(r__1,r__2), r__2 = z__[((*i0) << (2))
-		     + *pp + 3];
-	    z__[((*n0) << (2)) + *pp - 1] = dmin(r__1,r__2);
+	    d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
+		    , d__1 = min(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
+	    z__[(*n0 << 2) + *pp - 1] = min(d__1,d__2);
 /* Computing MIN */
-	    r__1 = z__[((*n0) << (2)) - *pp], r__2 = z__[((*i0) << (2)) - *pp]
-		    , r__1 = min(r__1,r__2), r__2 = z__[((*i0) << (2)) - *pp
-		    + 4];
-	    z__[((*n0) << (2)) - *pp] = dmin(r__1,r__2);
+	    d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 =
+		     min(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4];
+	    z__[(*n0 << 2) - *pp] = min(d__1,d__2);
 /* Computing MAX */
-	    r__1 = *qmax, r__2 = z__[((*i0) << (2)) + *pp - 3], r__1 = max(
-		    r__1,r__2), r__2 = z__[((*i0) << (2)) + *pp + 1];
-	    *qmax = dmax(r__1,r__2);
-	    *dmin__ = -0.f;
+	    d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = max(d__1,
+		    d__2), d__2 = z__[(*i0 << 2) + *pp + 1];
+	    *qmax = max(d__1,d__2);
+	    *dmin__ = -0.;
 	}
     }
 
@@ -90108,20 +25398,20 @@ L50:
 
    Computing MIN
 */
-    r__1 = z__[((*n0) << (2)) + *pp - 1], r__2 = z__[((*n0) << (2)) + *pp - 9]
-	    , r__1 = min(r__1,r__2), r__2 = dmin2 + z__[((*n0) << (2)) - *pp];
-    if ((*dmin__ < 0.f) || (safmin * *qmax < dmin(r__1,r__2))) {
+    d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*n0 << 2) + *pp - 9], d__1 =
+	     min(d__1,d__2), d__2 = dmin2 + z__[(*n0 << 2) - *pp];
+    if (*dmin__ < 0. || safmin * *qmax < min(d__1,d__2)) {
 
 /*        Choose a shift. */
 
-	slasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1,
+	dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1,
 		 &dn2, &tau, &ttype);
 
 /*        Call dqds until DMIN > 0. */
 
 L80:
 
-	slasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1,
+	dlasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1,
 		&dn2, ieee);
 
 	*ndiv += *n0 - *i0 + 2;
@@ -90129,21 +25419,21 @@ L80:
 
 /*        Check status. */
 
-	if (*dmin__ >= 0.f && dmin1 > 0.f) {
+	if (*dmin__ >= 0. && dmin1 > 0.) {
 
 /*           Success. */
 
 	    goto L100;
 
-	} else if (*dmin__ < 0.f && dmin1 > 0.f && z__[((*n0 - 1) << (2)) - *
-		pp] < tol * (*sigma + dn1) && dabs(dn) < tol * *sigma) {
+	} else if (*dmin__ < 0. && dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] <
+		tol * (*sigma + dn1) && abs(dn) < tol * *sigma) {
 
 /*           Convergence hidden by negative DN. */
 
-	    z__[((*n0 - 1) << (2)) - *pp + 2] = 0.f;
-	    *dmin__ = 0.f;
+	    z__[(*n0 - 1 << 2) - *pp + 2] = 0.;
+	    *dmin__ = 0.;
 	    goto L100;
-	} else if (*dmin__ < 0.f) {
+	} else if (*dmin__ < 0.) {
 
 /*           TAU too big. Select new TAU and try again. */
 
@@ -90152,18 +25442,18 @@ L80:
 
 /*              Failed twice. Play it safe. */
 
-		tau = 0.f;
-	    } else if (dmin1 > 0.f) {
+		tau = 0.;
+	    } else if (dmin1 > 0.) {
 
 /*              Late failure. Gives excellent shift. */
 
-		tau = (tau + *dmin__) * (1.f - eps * 2.f);
+		tau = (tau + *dmin__) * (1. - eps * 2.);
 		ttype += -11;
 	    } else {
 
 /*              Early failure. Divide by 4. */
 
-		tau *= .25f;
+		tau *= .25;
 		ttype += -12;
 	    }
 	    goto L80;
@@ -90171,7 +25461,7 @@ L80:
 
 /*           NaN. */
 
-	    tau = 0.f;
+	    tau = 0.;
 	    goto L80;
 	} else {
 
@@ -90184,10 +25474,10 @@ L80:
 /*     Risk of underflow. */
 
 L90:
-    slasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2);
+    dlasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2);
     *ndiv += *n0 - *i0 + 2;
     ++(*iter);
-    tau = 0.f;
+    tau = 0.;
 
 L100:
     if (tau < *sigma) {
@@ -90202,29 +25492,30 @@ L100:
 
     return 0;
 
-/*     End of SLASQ3 */
+/*     End of DLASQ3 */
 
-} /* slasq3_ */
+} /* dlasq3_ */
 
-/* Subroutine */ int slasq4_(integer *i0, integer *n0, real *z__, integer *pp,
-	 integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn,
-	real *dn1, real *dn2, real *tau, integer *ttype)
+/* Subroutine */ int dlasq4_(integer *i0, integer *n0, doublereal *z__,
+	integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1,
+	doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2,
+	doublereal *tau, integer *ttype)
 {
     /* Initialized data */
 
-    static real g = 0.f;
+    static doublereal g = 0.;
 
     /* System generated locals */
     integer i__1;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static real s, a2, b1, b2;
+    static doublereal s, a2, b1, b2;
     static integer i4, nn, np;
-    static real gam, gap1, gap2;
+    static doublereal gam, gap1, gap2;
 
 
 /*
@@ -90237,7 +25528,7 @@ L100:
     Purpose
     =======
 
-    SLASQ4 computes an approximation TAU to the smallest eigenvalue
+    DLASQ4 computes an approximation TAU to the smallest eigenvalue
     using values of d from the previous transform.
 
     I0    (input) INTEGER
@@ -90246,7 +25537,7 @@ L100:
     N0    (input) INTEGER
           Last index.
 
-    Z     (input) REAL array, dimension ( 4*N )
+    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
           Z holds the qd array.
 
     PP    (input) INTEGER
@@ -90255,25 +25546,25 @@ L100:
     NOIN  (input) INTEGER
           The value of N0 at start of EIGTEST.
 
-    DMIN  (input) REAL
+    DMIN  (input) DOUBLE PRECISION
           Minimum value of d.
 
-    DMIN1 (input) REAL
+    DMIN1 (input) DOUBLE PRECISION
           Minimum value of d, excluding D( N0 ).
 
-    DMIN2 (input) REAL
+    DMIN2 (input) DOUBLE PRECISION
           Minimum value of d, excluding D( N0 ) and D( N0-1 ).
 
-    DN    (input) REAL
+    DN    (input) DOUBLE PRECISION
           d(N)
 
-    DN1   (input) REAL
+    DN1   (input) DOUBLE PRECISION
           d(N-1)
 
-    DN2   (input) REAL
+    DN2   (input) DOUBLE PRECISION
           d(N-2)
 
-    TAU   (output) REAL
+    TAU   (output) DOUBLE PRECISION
           This is the shift.
 
     TTYPE (output) INTEGER
@@ -90296,18 +25587,18 @@ L100:
        TTYPE records the type of shift.
 */
 
-    if (*dmin__ <= 0.f) {
+    if (*dmin__ <= 0.) {
 	*tau = -(*dmin__);
 	*ttype = -1;
 	return 0;
     }
 
-    nn = ((*n0) << (2)) + *pp;
+    nn = (*n0 << 2) + *pp;
     if (*n0in == *n0) {
 
 /*        No eigenvalues deflated. */
 
-	if ((*dmin__ == *dn) || (*dmin__ == *dn1)) {
+	if (*dmin__ == *dn || *dmin__ == *dn1) {
 
 	    b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]);
 	    b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]);
@@ -90316,30 +25607,30 @@ L100:
 /*           Cases 2 and 3. */
 
 	    if (*dmin__ == *dn && *dmin1 == *dn1) {
-		gap2 = *dmin2 - a2 - *dmin2 * .25f;
-		if (gap2 > 0.f && gap2 > b2) {
+		gap2 = *dmin2 - a2 - *dmin2 * .25;
+		if (gap2 > 0. && gap2 > b2) {
 		    gap1 = a2 - *dn - b2 / gap2 * b2;
 		} else {
 		    gap1 = a2 - *dn - (b1 + b2);
 		}
-		if (gap1 > 0.f && gap1 > b1) {
+		if (gap1 > 0. && gap1 > b1) {
 /* Computing MAX */
-		    r__1 = *dn - b1 / gap1 * b1, r__2 = *dmin__ * .5f;
-		    s = dmax(r__1,r__2);
+		    d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
+		    s = max(d__1,d__2);
 		    *ttype = -2;
 		} else {
-		    s = 0.f;
+		    s = 0.;
 		    if (*dn > b1) {
 			s = *dn - b1;
 		    }
 		    if (a2 > b1 + b2) {
 /* Computing MIN */
-			r__1 = s, r__2 = a2 - (b1 + b2);
-			s = dmin(r__1,r__2);
+			d__1 = s, d__2 = a2 - (b1 + b2);
+			s = min(d__1,d__2);
 		    }
 /* Computing MAX */
-		    r__1 = s, r__2 = *dmin__ * .333f;
-		    s = dmax(r__1,r__2);
+		    d__1 = s, d__2 = *dmin__ * .333;
+		    s = max(d__1,d__2);
 		    *ttype = -3;
 		}
 	    } else {
@@ -90347,17 +25638,17 @@ L100:
 /*              Case 4. */
 
 		*ttype = -4;
-		s = *dmin__ * .25f;
+		s = *dmin__ * .25;
 		if (*dmin__ == *dn) {
 		    gam = *dn;
-		    a2 = 0.f;
+		    a2 = 0.;
 		    if (z__[nn - 5] > z__[nn - 7]) {
 			return 0;
 		    }
 		    b2 = z__[nn - 5] / z__[nn - 7];
 		    np = nn - 9;
 		} else {
-		    np = nn - ((*pp) << (1));
+		    np = nn - (*pp << 1);
 		    b2 = z__[np - 2];
 		    gam = *dn1;
 		    if (z__[np - 4] > z__[np - 2]) {
@@ -90374,9 +25665,9 @@ L100:
 /*              Approximate contribution to norm squared from I < NN-1. */
 
 		a2 += b2;
-		i__1 = ((*i0) << (2)) - 1 + *pp;
+		i__1 = (*i0 << 2) - 1 + *pp;
 		for (i4 = np; i4 >= i__1; i4 += -4) {
-		    if (b2 == 0.f) {
+		    if (b2 == 0.) {
 			goto L20;
 		    }
 		    b1 = b2;
@@ -90385,18 +25676,18 @@ L100:
 		    }
 		    b2 *= z__[i4] / z__[i4 - 2];
 		    a2 += b2;
-		    if ((dmax(b2,b1) * 100.f < a2) || (.563f < a2)) {
+		    if (max(b2,b1) * 100. < a2 || .563 < a2) {
 			goto L20;
 		    }
 /* L10: */
 		}
 L20:
-		a2 *= 1.05f;
+		a2 *= 1.05;
 
 /*              Rayleigh quotient residual bound. */
 
-		if (a2 < .563f) {
-		    s = gam * (1.f - sqrt(a2)) / (a2 + 1.f);
+		if (a2 < .563) {
+		    s = gam * (1. - sqrt(a2)) / (a2 + 1.);
 		}
 	    }
 	} else if (*dmin__ == *dn2) {
@@ -90404,27 +25695,27 @@ L20:
 /*           Case 5. */
 
 	    *ttype = -5;
-	    s = *dmin__ * .25f;
+	    s = *dmin__ * .25;
 
 /*           Compute contribution to norm squared from I > NN-2. */
 
-	    np = nn - ((*pp) << (1));
+	    np = nn - (*pp << 1);
 	    b1 = z__[np - 2];
 	    b2 = z__[np - 6];
 	    gam = *dn2;
-	    if ((z__[np - 8] > b2) || (z__[np - 4] > b1)) {
+	    if (z__[np - 8] > b2 || z__[np - 4] > b1) {
 		return 0;
 	    }
-	    a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.f);
+	    a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
 
 /*           Approximate contribution to norm squared from I < NN-2. */
 
 	    if (*n0 - *i0 > 2) {
 		b2 = z__[nn - 13] / z__[nn - 15];
 		a2 += b2;
-		i__1 = ((*i0) << (2)) - 1 + *pp;
+		i__1 = (*i0 << 2) - 1 + *pp;
 		for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
-		    if (b2 == 0.f) {
+		    if (b2 == 0.) {
 			goto L40;
 		    }
 		    b1 = b2;
@@ -90433,28 +25724,28 @@ L20:
 		    }
 		    b2 *= z__[i4] / z__[i4 - 2];
 		    a2 += b2;
-		    if ((dmax(b2,b1) * 100.f < a2) || (.563f < a2)) {
+		    if (max(b2,b1) * 100. < a2 || .563 < a2) {
 			goto L40;
 		    }
 /* L30: */
 		}
 L40:
-		a2 *= 1.05f;
+		a2 *= 1.05;
 	    }
 
-	    if (a2 < .563f) {
-		s = gam * (1.f - sqrt(a2)) / (a2 + 1.f);
+	    if (a2 < .563) {
+		s = gam * (1. - sqrt(a2)) / (a2 + 1.);
 	    }
 	} else {
 
 /*           Case 6, no information to guide us. */
 
 	    if (*ttype == -6) {
-		g += (1.f - g) * .333f;
+		g += (1. - g) * .333;
 	    } else if (*ttype == -18) {
-		g = .083250000000000005f;
+		g = .083250000000000005;
 	    } else {
-		g = .25f;
+		g = .25;
 	    }
 	    s = g * *dmin__;
 	    *ttype = -6;
@@ -90469,51 +25760,51 @@ L40:
 /*           Cases 7 and 8. */
 
 	    *ttype = -7;
-	    s = *dmin1 * .333f;
+	    s = *dmin1 * .333;
 	    if (z__[nn - 5] > z__[nn - 7]) {
 		return 0;
 	    }
 	    b1 = z__[nn - 5] / z__[nn - 7];
 	    b2 = b1;
-	    if (b2 == 0.f) {
+	    if (b2 == 0.) {
 		goto L60;
 	    }
-	    i__1 = ((*i0) << (2)) - 1 + *pp;
-	    for (i4 = ((*n0) << (2)) - 9 + *pp; i4 >= i__1; i4 += -4) {
+	    i__1 = (*i0 << 2) - 1 + *pp;
+	    for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
 		a2 = b1;
 		if (z__[i4] > z__[i4 - 2]) {
 		    return 0;
 		}
 		b1 *= z__[i4] / z__[i4 - 2];
 		b2 += b1;
-		if (dmax(b1,a2) * 100.f < b2) {
+		if (max(b1,a2) * 100. < b2) {
 		    goto L60;
 		}
 /* L50: */
 	    }
 L60:
-	    b2 = sqrt(b2 * 1.05f);
+	    b2 = sqrt(b2 * 1.05);
 /* Computing 2nd power */
-	    r__1 = b2;
-	    a2 = *dmin1 / (r__1 * r__1 + 1.f);
-	    gap2 = *dmin2 * .5f - a2;
-	    if (gap2 > 0.f && gap2 > b2 * a2) {
+	    d__1 = b2;
+	    a2 = *dmin1 / (d__1 * d__1 + 1.);
+	    gap2 = *dmin2 * .5 - a2;
+	    if (gap2 > 0. && gap2 > b2 * a2) {
 /* Computing MAX */
-		r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2);
-		s = dmax(r__1,r__2);
+		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
+		s = max(d__1,d__2);
 	    } else {
 /* Computing MAX */
-		r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f);
-		s = dmax(r__1,r__2);
+		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
+		s = max(d__1,d__2);
 		*ttype = -8;
 	    }
 	} else {
 
 /*           Case 9. */
 
-	    s = *dmin1 * .25f;
+	    s = *dmin1 * .25;
 	    if (*dmin1 == *dn1) {
-		s = *dmin1 * .5f;
+		s = *dmin1 * .5;
 	    }
 	    *ttype = -9;
 	}
@@ -90526,76 +25817,77 @@ L60:
           Cases 10 and 11.
 */
 
-	if (*dmin2 == *dn2 && z__[nn - 5] * 2.f < z__[nn - 7]) {
+	if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
 	    *ttype = -10;
-	    s = *dmin2 * .333f;
+	    s = *dmin2 * .333;
 	    if (z__[nn - 5] > z__[nn - 7]) {
 		return 0;
 	    }
 	    b1 = z__[nn - 5] / z__[nn - 7];
 	    b2 = b1;
-	    if (b2 == 0.f) {
+	    if (b2 == 0.) {
 		goto L80;
 	    }
-	    i__1 = ((*i0) << (2)) - 1 + *pp;
-	    for (i4 = ((*n0) << (2)) - 9 + *pp; i4 >= i__1; i4 += -4) {
+	    i__1 = (*i0 << 2) - 1 + *pp;
+	    for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
 		if (z__[i4] > z__[i4 - 2]) {
 		    return 0;
 		}
 		b1 *= z__[i4] / z__[i4 - 2];
 		b2 += b1;
-		if (b1 * 100.f < b2) {
+		if (b1 * 100. < b2) {
 		    goto L80;
 		}
 /* L70: */
 	    }
 L80:
-	    b2 = sqrt(b2 * 1.05f);
+	    b2 = sqrt(b2 * 1.05);
 /* Computing 2nd power */
-	    r__1 = b2;
-	    a2 = *dmin2 / (r__1 * r__1 + 1.f);
+	    d__1 = b2;
+	    a2 = *dmin2 / (d__1 * d__1 + 1.);
 	    gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[
 		    nn - 9]) - a2;
-	    if (gap2 > 0.f && gap2 > b2 * a2) {
+	    if (gap2 > 0. && gap2 > b2 * a2) {
 /* Computing MAX */
-		r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2);
-		s = dmax(r__1,r__2);
+		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
+		s = max(d__1,d__2);
 	    } else {
 /* Computing MAX */
-		r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f);
-		s = dmax(r__1,r__2);
+		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
+		s = max(d__1,d__2);
 	    }
 	} else {
-	    s = *dmin2 * .25f;
+	    s = *dmin2 * .25;
 	    *ttype = -11;
 	}
     } else if (*n0in > *n0 + 2) {
 
 /*        Case 12, more than two eigenvalues deflated. No information. */
 
-	s = 0.f;
+	s = 0.;
 	*ttype = -12;
     }
 
     *tau = s;
     return 0;
 
-/*     End of SLASQ4 */
+/*     End of DLASQ4 */
 
-} /* slasq4_ */
+} /* dlasq4_ */
 
-/* Subroutine */ int slasq5_(integer *i0, integer *n0, real *z__, integer *pp,
-	 real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real *
-	dnm1, real *dnm2, logical *ieee)
+/* Subroutine */ int dlasq5_(integer *i0, integer *n0, doublereal *z__,
+	integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1,
+	doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2,
+	 logical *ieee)
 {
     /* System generated locals */
     integer i__1;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Local variables */
-    static real d__;
+    static doublereal d__;
     static integer j4, j4p2;
-    static real emin, temp;
+    static doublereal emin, temp;
 
 
 /*
@@ -90608,7 +25900,7 @@ L80:
     Purpose
     =======
 
-    SLASQ5 computes one dqds transform in ping-pong form, one
+    DLASQ5 computes one dqds transform in ping-pong form, one
     version for IEEE machines another for non IEEE machines.
 
     Arguments
@@ -90620,32 +25912,32 @@ L80:
     N0    (input) INTEGER
           Last index.
 
-    Z     (input) REAL array, dimension ( 4*N )
+    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
           Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
           an extra argument.
 
     PP    (input) INTEGER
           PP=0 for ping, PP=1 for pong.
 
-    TAU   (input) REAL
+    TAU   (input) DOUBLE PRECISION
           This is the shift.
 
-    DMIN  (output) REAL
+    DMIN  (output) DOUBLE PRECISION
           Minimum value of d.
 
-    DMIN1 (output) REAL
+    DMIN1 (output) DOUBLE PRECISION
           Minimum value of d, excluding D( N0 ).
 
-    DMIN2 (output) REAL
+    DMIN2 (output) DOUBLE PRECISION
           Minimum value of d, excluding D( N0 ) and D( N0-1 ).
 
-    DN    (output) REAL
+    DN    (output) DOUBLE PRECISION
           d(N0), the last value of d.
 
-    DNM1  (output) REAL
+    DNM1  (output) DOUBLE PRECISION
           d(N0-1).
 
-    DNM2  (output) REAL
+    DNM2  (output) DOUBLE PRECISION
           d(N0-2).
 
     IEEE  (input) LOGICAL
@@ -90663,7 +25955,7 @@ L80:
 	return 0;
     }
 
-    j4 = ((*i0) << (2)) + *pp - 3;
+    j4 = (*i0 << 2) + *pp - 3;
     emin = z__[j4 + 4];
     d__ = z__[j4] - *tau;
     *dmin__ = d__;
@@ -90674,29 +25966,29 @@ L80:
 /*        Code for IEEE arithmetic. */
 
 	if (*pp == 0) {
-	    i__1 = (*n0 - 3) << (2);
-	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+	    i__1 = *n0 - 3 << 2;
+	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
 		z__[j4 - 2] = d__ + z__[j4 - 1];
 		temp = z__[j4 + 1] / z__[j4 - 2];
 		d__ = d__ * temp - *tau;
-		*dmin__ = dmin(*dmin__,d__);
+		*dmin__ = min(*dmin__,d__);
 		z__[j4] = z__[j4 - 1] * temp;
 /* Computing MIN */
-		r__1 = z__[j4];
-		emin = dmin(r__1,emin);
+		d__1 = z__[j4];
+		emin = min(d__1,emin);
 /* L10: */
 	    }
 	} else {
-	    i__1 = (*n0 - 3) << (2);
-	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+	    i__1 = *n0 - 3 << 2;
+	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
 		z__[j4 - 3] = d__ + z__[j4];
 		temp = z__[j4 + 2] / z__[j4 - 3];
 		d__ = d__ * temp - *tau;
-		*dmin__ = dmin(*dmin__,d__);
+		*dmin__ = min(*dmin__,d__);
 		z__[j4 - 1] = z__[j4] * temp;
 /* Computing MIN */
-		r__1 = z__[j4 - 1];
-		emin = dmin(r__1,emin);
+		d__1 = z__[j4 - 1];
+		emin = min(d__1,emin);
 /* L20: */
 	    }
 	}
@@ -90705,55 +25997,55 @@ L80:
 
 	*dnm2 = d__;
 	*dmin2 = *dmin__;
-	j4 = ((*n0 - 2) << (2)) - *pp;
-	j4p2 = j4 + ((*pp) << (1)) - 1;
+	j4 = (*n0 - 2 << 2) - *pp;
+	j4p2 = j4 + (*pp << 1) - 1;
 	z__[j4 - 2] = *dnm2 + z__[j4p2];
 	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
 	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
-	*dmin__ = dmin(*dmin__,*dnm1);
+	*dmin__ = min(*dmin__,*dnm1);
 
 	*dmin1 = *dmin__;
 	j4 += 4;
-	j4p2 = j4 + ((*pp) << (1)) - 1;
+	j4p2 = j4 + (*pp << 1) - 1;
 	z__[j4 - 2] = *dnm1 + z__[j4p2];
 	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
 	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
-	*dmin__ = dmin(*dmin__,*dn);
+	*dmin__ = min(*dmin__,*dn);
 
     } else {
 
 /*        Code for non IEEE arithmetic. */
 
 	if (*pp == 0) {
-	    i__1 = (*n0 - 3) << (2);
-	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+	    i__1 = *n0 - 3 << 2;
+	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
 		z__[j4 - 2] = d__ + z__[j4 - 1];
-		if (d__ < 0.f) {
+		if (d__ < 0.) {
 		    return 0;
 		} else {
 		    z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
 		    d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau;
 		}
-		*dmin__ = dmin(*dmin__,d__);
+		*dmin__ = min(*dmin__,d__);
 /* Computing MIN */
-		r__1 = emin, r__2 = z__[j4];
-		emin = dmin(r__1,r__2);
+		d__1 = emin, d__2 = z__[j4];
+		emin = min(d__1,d__2);
 /* L30: */
 	    }
 	} else {
-	    i__1 = (*n0 - 3) << (2);
-	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+	    i__1 = *n0 - 3 << 2;
+	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
 		z__[j4 - 3] = d__ + z__[j4];
-		if (d__ < 0.f) {
+		if (d__ < 0.) {
 		    return 0;
 		} else {
 		    z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
 		    d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau;
 		}
-		*dmin__ = dmin(*dmin__,d__);
+		*dmin__ = min(*dmin__,d__);
 /* Computing MIN */
-		r__1 = emin, r__2 = z__[j4 - 1];
-		emin = dmin(r__1,r__2);
+		d__1 = emin, d__2 = z__[j4 - 1];
+		emin = min(d__1,d__2);
 /* L40: */
 	    }
 	}
@@ -90762,53 +26054,53 @@ L80:
 
 	*dnm2 = d__;
 	*dmin2 = *dmin__;
-	j4 = ((*n0 - 2) << (2)) - *pp;
-	j4p2 = j4 + ((*pp) << (1)) - 1;
+	j4 = (*n0 - 2 << 2) - *pp;
+	j4p2 = j4 + (*pp << 1) - 1;
 	z__[j4 - 2] = *dnm2 + z__[j4p2];
-	if (*dnm2 < 0.f) {
+	if (*dnm2 < 0.) {
 	    return 0;
 	} else {
 	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
 	    *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
 	}
-	*dmin__ = dmin(*dmin__,*dnm1);
+	*dmin__ = min(*dmin__,*dnm1);
 
 	*dmin1 = *dmin__;
 	j4 += 4;
-	j4p2 = j4 + ((*pp) << (1)) - 1;
+	j4p2 = j4 + (*pp << 1) - 1;
 	z__[j4 - 2] = *dnm1 + z__[j4p2];
-	if (*dnm1 < 0.f) {
+	if (*dnm1 < 0.) {
 	    return 0;
 	} else {
 	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
 	    *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
 	}
-	*dmin__ = dmin(*dmin__,*dn);
+	*dmin__ = min(*dmin__,*dn);
 
     }
 
     z__[j4 + 2] = *dn;
-    z__[((*n0) << (2)) - *pp] = emin;
+    z__[(*n0 << 2) - *pp] = emin;
     return 0;
 
-/*     End of SLASQ5 */
+/*     End of DLASQ5 */
 
-} /* slasq5_ */
+} /* dlasq5_ */
 
-/* Subroutine */ int slasq6_(integer *i0, integer *n0, real *z__, integer *pp,
-	 real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real *
-	dnm2)
+/* Subroutine */ int dlasq6_(integer *i0, integer *n0, doublereal *z__,
+	integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2,
+	 doublereal *dn, doublereal *dnm1, doublereal *dnm2)
 {
     /* System generated locals */
     integer i__1;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Local variables */
-    static real d__;
+    static doublereal d__;
     static integer j4, j4p2;
-    static real emin, temp;
-    extern doublereal slamch_(char *);
-    static real safmin;
+    static doublereal emin, temp;
+
+    static doublereal safmin;
 
 
 /*
@@ -90821,7 +26113,7 @@ L80:
     Purpose
     =======
 
-    SLASQ6 computes one dqd (shift equal to zero) transform in
+    DLASQ6 computes one dqd (shift equal to zero) transform in
     ping-pong form, with protection against underflow and overflow.
 
     Arguments
@@ -90833,29 +26125,29 @@ L80:
     N0    (input) INTEGER
           Last index.
 
-    Z     (input) REAL array, dimension ( 4*N )
+    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
           Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
           an extra argument.
 
     PP    (input) INTEGER
           PP=0 for ping, PP=1 for pong.
 
-    DMIN  (output) REAL
+    DMIN  (output) DOUBLE PRECISION
           Minimum value of d.
 
-    DMIN1 (output) REAL
+    DMIN1 (output) DOUBLE PRECISION
           Minimum value of d, excluding D( N0 ).
 
-    DMIN2 (output) REAL
+    DMIN2 (output) DOUBLE PRECISION
           Minimum value of d, excluding D( N0 ) and D( N0-1 ).
 
-    DN    (output) REAL
+    DN    (output) DOUBLE PRECISION
           d(N0), the last value of d.
 
-    DNM1  (output) REAL
+    DNM1  (output) DOUBLE PRECISION
           d(N0-1).
 
-    DNM2  (output) REAL
+    DNM2  (output) DOUBLE PRECISION
           d(N0-2).
 
     =====================================================================
@@ -90870,21 +26162,21 @@ L80:
 	return 0;
     }
 
-    safmin = slamch_("Safe minimum");
-    j4 = ((*i0) << (2)) + *pp - 3;
+    safmin = SAFEMINIMUM;
+    j4 = (*i0 << 2) + *pp - 3;
     emin = z__[j4 + 4];
     d__ = z__[j4];
     *dmin__ = d__;
 
     if (*pp == 0) {
-	i__1 = (*n0 - 3) << (2);
-	for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+	i__1 = *n0 - 3 << 2;
+	for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
 	    z__[j4 - 2] = d__ + z__[j4 - 1];
-	    if (z__[j4 - 2] == 0.f) {
-		z__[j4] = 0.f;
+	    if (z__[j4 - 2] == 0.) {
+		z__[j4] = 0.;
 		d__ = z__[j4 + 1];
 		*dmin__ = d__;
-		emin = 0.f;
+		emin = 0.;
 	    } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4
 		    - 2] < z__[j4 + 1]) {
 		temp = z__[j4 + 1] / z__[j4 - 2];
@@ -90894,21 +26186,21 @@ L80:
 		z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
 		d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]);
 	    }
-	    *dmin__ = dmin(*dmin__,d__);
+	    *dmin__ = min(*dmin__,d__);
 /* Computing MIN */
-	    r__1 = emin, r__2 = z__[j4];
-	    emin = dmin(r__1,r__2);
+	    d__1 = emin, d__2 = z__[j4];
+	    emin = min(d__1,d__2);
 /* L10: */
 	}
     } else {
-	i__1 = (*n0 - 3) << (2);
-	for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+	i__1 = *n0 - 3 << 2;
+	for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
 	    z__[j4 - 3] = d__ + z__[j4];
-	    if (z__[j4 - 3] == 0.f) {
-		z__[j4 - 1] = 0.f;
+	    if (z__[j4 - 3] == 0.) {
+		z__[j4 - 1] = 0.;
 		d__ = z__[j4 + 2];
 		*dmin__ = d__;
-		emin = 0.f;
+		emin = 0.;
 	    } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4
 		    - 3] < z__[j4 + 2]) {
 		temp = z__[j4 + 2] / z__[j4 - 3];
@@ -90918,10 +26210,10 @@ L80:
 		z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
 		d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]);
 	    }
-	    *dmin__ = dmin(*dmin__,d__);
+	    *dmin__ = min(*dmin__,d__);
 /* Computing MIN */
-	    r__1 = emin, r__2 = z__[j4 - 1];
-	    emin = dmin(r__1,r__2);
+	    d__1 = emin, d__2 = z__[j4 - 1];
+	    emin = min(d__1,d__2);
 /* L20: */
 	}
     }
@@ -90930,14 +26222,14 @@ L80:
 
     *dnm2 = d__;
     *dmin2 = *dmin__;
-    j4 = ((*n0 - 2) << (2)) - *pp;
-    j4p2 = j4 + ((*pp) << (1)) - 1;
+    j4 = (*n0 - 2 << 2) - *pp;
+    j4p2 = j4 + (*pp << 1) - 1;
     z__[j4 - 2] = *dnm2 + z__[j4p2];
-    if (z__[j4 - 2] == 0.f) {
-	z__[j4] = 0.f;
+    if (z__[j4 - 2] == 0.) {
+	z__[j4] = 0.;
 	*dnm1 = z__[j4p2 + 2];
 	*dmin__ = *dnm1;
-	emin = 0.f;
+	emin = 0.;
     } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
 	    z__[j4p2 + 2]) {
 	temp = z__[j4p2 + 2] / z__[j4 - 2];
@@ -90947,17 +26239,17 @@ L80:
 	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
 	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]);
     }
-    *dmin__ = dmin(*dmin__,*dnm1);
+    *dmin__ = min(*dmin__,*dnm1);
 
     *dmin1 = *dmin__;
     j4 += 4;
-    j4p2 = j4 + ((*pp) << (1)) - 1;
+    j4p2 = j4 + (*pp << 1) - 1;
     z__[j4 - 2] = *dnm1 + z__[j4p2];
-    if (z__[j4 - 2] == 0.f) {
-	z__[j4] = 0.f;
+    if (z__[j4 - 2] == 0.) {
+	z__[j4] = 0.;
 	*dn = z__[j4p2 + 2];
 	*dmin__ = *dn;
-	emin = 0.f;
+	emin = 0.;
     } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
 	    z__[j4p2 + 2]) {
 	temp = z__[j4p2 + 2] / z__[j4 - 2];
@@ -90967,27 +26259,28 @@ L80:
 	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
 	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]);
     }
-    *dmin__ = dmin(*dmin__,*dn);
+    *dmin__ = min(*dmin__,*dn);
 
     z__[j4 + 2] = *dn;
-    z__[((*n0) << (2)) - *pp] = emin;
+    z__[(*n0 << 2) - *pp] = emin;
     return 0;
 
-/*     End of SLASQ6 */
+/*     End of DLASQ6 */
 
-} /* slasq6_ */
+} /* dlasq6_ */
 
-/* Subroutine */ int slasr_(char *side, char *pivot, char *direct, integer *m,
-	 integer *n, real *c__, real *s, real *a, integer *lda)
+/* Subroutine */ int dlasr_(char *side, char *pivot, char *direct, integer *m,
+	 integer *n, doublereal *c__, doublereal *s, doublereal *a, integer *
+	lda)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2;
 
     /* Local variables */
     static integer i__, j, info;
-    static real temp;
+    static doublereal temp;
     extern logical lsame_(char *, char *);
-    static real ctemp, stemp;
+    static doublereal ctemp, stemp;
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
 
@@ -91001,7 +26294,7 @@ L80:
     Purpose
     =======
 
-    SLASR   performs the transformation
+    DLASR   performs the transformation
 
        A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
 
@@ -91070,7 +26363,7 @@ L80:
             The number of columns of the matrix A.  If n <= 1, an
             immediate return is effected.
 
-    C, S    (input) REAL arrays, dimension
+    C, S    (input) DOUBLE PRECISION arrays, dimension
                     (M-1) if SIDE = 'L'
                     (N-1) if SIDE = 'R'
             c(k) and s(k) contain the cosine and sine that define the
@@ -91079,7 +26372,7 @@ L80:
             R( k ) = (  c( k )  s( k ) ).
                      ( -s( k )  c( k ) )
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             The m by n matrix A.  On exit, A is overwritten by P*A if
             SIDE = 'R' or by A*P' if SIDE = 'L'.
 
@@ -91101,13 +26394,13 @@ L80:
 
     /* Function Body */
     info = 0;
-    if (! ((lsame_(side, "L")) || (lsame_(side, "R")))) {
+    if (! (lsame_(side, "L") || lsame_(side, "R"))) {
 	info = 1;
-    } else if (! (((lsame_(pivot, "V")) || (lsame_(
-	    pivot, "T"))) || (lsame_(pivot, "B")))) {
+    } else if (! (lsame_(pivot, "V") || lsame_(pivot,
+	    "T") || lsame_(pivot, "B"))) {
 	info = 2;
-    } else if (! ((lsame_(direct, "F")) || (lsame_(
-	    direct, "B")))) {
+    } else if (! (lsame_(direct, "F") || lsame_(direct,
+	    "B"))) {
 	info = 3;
     } else if (*m < 0) {
 	info = 4;
@@ -91117,13 +26410,13 @@ L80:
 	info = 9;
     }
     if (info != 0) {
-	xerbla_("SLASR ", &info);
+	xerbla_("DLASR ", &info);
 	return 0;
     }
 
 /*     Quick return if possible */
 
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
     if (lsame_(side, "L")) {
@@ -91136,7 +26429,7 @@ L80:
 		for (j = 1; j <= i__1; ++j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__2 = *n;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    temp = a[j + 1 + i__ * a_dim1];
@@ -91153,7 +26446,7 @@ L80:
 		for (j = *m - 1; j >= 1; --j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__1 = *n;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    temp = a[j + 1 + i__ * a_dim1];
@@ -91173,7 +26466,7 @@ L80:
 		for (j = 2; j <= i__1; ++j) {
 		    ctemp = c__[j - 1];
 		    stemp = s[j - 1];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__2 = *n;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    temp = a[j + i__ * a_dim1];
@@ -91190,7 +26483,7 @@ L80:
 		for (j = *m; j >= 2; --j) {
 		    ctemp = c__[j - 1];
 		    stemp = s[j - 1];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__1 = *n;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    temp = a[j + i__ * a_dim1];
@@ -91210,7 +26503,7 @@ L80:
 		for (j = 1; j <= i__1; ++j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__2 = *n;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    temp = a[j + i__ * a_dim1];
@@ -91227,7 +26520,7 @@ L80:
 		for (j = *m - 1; j >= 1; --j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__1 = *n;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    temp = a[j + i__ * a_dim1];
@@ -91252,7 +26545,7 @@ L80:
 		for (j = 1; j <= i__1; ++j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    temp = a[i__ + (j + 1) * a_dim1];
@@ -91269,7 +26562,7 @@ L80:
 		for (j = *n - 1; j >= 1; --j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    temp = a[i__ + (j + 1) * a_dim1];
@@ -91289,7 +26582,7 @@ L80:
 		for (j = 2; j <= i__1; ++j) {
 		    ctemp = c__[j - 1];
 		    stemp = s[j - 1];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    temp = a[i__ + j * a_dim1];
@@ -91306,7 +26599,7 @@ L80:
 		for (j = *n; j >= 2; --j) {
 		    ctemp = c__[j - 1];
 		    stemp = s[j - 1];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    temp = a[i__ + j * a_dim1];
@@ -91326,7 +26619,7 @@ L80:
 		for (j = 1; j <= i__1; ++j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    temp = a[i__ + j * a_dim1];
@@ -91343,7 +26636,7 @@ L80:
 		for (j = *n - 1; j >= 1; --j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    temp = a[i__ + j * a_dim1];
@@ -91362,24 +26655,25 @@ L80:
 
     return 0;
 
-/*     End of SLASR */
+/*     End of DLASR */
 
-} /* slasr_ */
+} /* dlasr_ */
 
-/* Subroutine */ int slasrt_(char *id, integer *n, real *d__, integer *info)
+/* Subroutine */ int dlasrt_(char *id, integer *n, doublereal *d__, integer *
+	info)
 {
     /* System generated locals */
     integer i__1, i__2;
 
     /* Local variables */
     static integer i__, j;
-    static real d1, d2, d3;
+    static doublereal d1, d2, d3;
     static integer dir;
-    static real tmp;
+    static doublereal tmp;
     static integer endd;
     extern logical lsame_(char *, char *);
     static integer stack[64]	/* was [2][32] */;
-    static real dmnmx;
+    static doublereal dmnmx;
     static integer start;
     extern /* Subroutine */ int xerbla_(char *, integer *);
     static integer stkpnt;
@@ -91411,7 +26705,7 @@ L80:
     N       (input) INTEGER
             The length of the array D.
 
-    D       (input/output) REAL array, dimension (N)
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
             On entry, the array to be sorted.
             On exit, D has been sorted into increasing order
             (D(1) <= ... <= D(N) ) or into decreasing order
@@ -91445,7 +26739,7 @@ L80:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SLASRT", &i__1);
+	xerbla_("DLASRT", &i__1);
 	return 0;
     }
 
@@ -91459,8 +26753,8 @@ L80:
     stack[0] = 1;
     stack[1] = *n;
 L10:
-    start = stack[((stkpnt) << (1)) - 2];
-    endd = stack[((stkpnt) << (1)) - 1];
+    start = stack[(stkpnt << 1) - 2];
+    endd = stack[(stkpnt << 1) - 1];
     --stkpnt;
     if (endd - start <= 20 && endd - start > 0) {
 
@@ -91565,18 +26859,18 @@ L80:
 	    }
 	    if (j - start > endd - j - 1) {
 		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = start;
-		stack[((stkpnt) << (1)) - 1] = j;
+		stack[(stkpnt << 1) - 2] = start;
+		stack[(stkpnt << 1) - 1] = j;
 		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = j + 1;
-		stack[((stkpnt) << (1)) - 1] = endd;
+		stack[(stkpnt << 1) - 2] = j + 1;
+		stack[(stkpnt << 1) - 1] = endd;
 	    } else {
 		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = j + 1;
-		stack[((stkpnt) << (1)) - 1] = endd;
+		stack[(stkpnt << 1) - 2] = j + 1;
+		stack[(stkpnt << 1) - 1] = endd;
 		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = start;
-		stack[((stkpnt) << (1)) - 1] = j;
+		stack[(stkpnt << 1) - 2] = start;
+		stack[(stkpnt << 1) - 1] = j;
 	    }
 	} else {
 
@@ -91603,18 +26897,18 @@ L110:
 	    }
 	    if (j - start > endd - j - 1) {
 		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = start;
-		stack[((stkpnt) << (1)) - 1] = j;
+		stack[(stkpnt << 1) - 2] = start;
+		stack[(stkpnt << 1) - 1] = j;
 		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = j + 1;
-		stack[((stkpnt) << (1)) - 1] = endd;
+		stack[(stkpnt << 1) - 2] = j + 1;
+		stack[(stkpnt << 1) - 1] = endd;
 	    } else {
 		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = j + 1;
-		stack[((stkpnt) << (1)) - 1] = endd;
+		stack[(stkpnt << 1) - 2] = j + 1;
+		stack[(stkpnt << 1) - 1] = endd;
 		++stkpnt;
-		stack[((stkpnt) << (1)) - 2] = start;
-		stack[((stkpnt) << (1)) - 1] = j;
+		stack[(stkpnt << 1) - 2] = start;
+		stack[(stkpnt << 1) - 1] = j;
 	    }
 	}
     }
@@ -91623,20 +26917,20 @@ L110:
     }
     return 0;
 
-/*     End of SLASRT */
+/*     End of DLASRT */
 
-} /* slasrt_ */
+} /* dlasrt_ */
 
-/* Subroutine */ int slassq_(integer *n, real *x, integer *incx, real *scale,
-	real *sumsq)
+/* Subroutine */ int dlassq_(integer *n, doublereal *x, integer *incx,
+	doublereal *scale, doublereal *sumsq)
 {
     /* System generated locals */
     integer i__1, i__2;
-    real r__1;
+    doublereal d__1;
 
     /* Local variables */
     static integer ix;
-    static real absxi;
+    static doublereal absxi;
 
 
 /*
@@ -91649,7 +26943,7 @@ L110:
     Purpose
     =======
 
-    SLASSQ  returns the values  scl  and  smsq  such that
+    DLASSQ  returns the values  scl  and  smsq  such that
 
        ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
 
@@ -91669,7 +26963,7 @@ L110:
     N       (input) INTEGER
             The number of elements to be used from the vector X.
 
-    X       (input) REAL array, dimension (N)
+    X       (input) DOUBLE PRECISION array, dimension (N)
             The vector for which a scaled sum of squares is computed.
                x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
 
@@ -91677,12 +26971,12 @@ L110:
             The increment between successive values of the vector X.
             INCX > 0.
 
-    SCALE   (input/output) REAL
+    SCALE   (input/output) DOUBLE PRECISION
             On entry, the value  scale  in the equation above.
             On exit, SCALE is overwritten with  scl , the scaling factor
             for the sum of squares.
 
-    SUMSQ   (input/output) REAL
+    SUMSQ   (input/output) DOUBLE PRECISION
             On entry, the value  sumsq  in the equation above.
             On exit, SUMSQ is overwritten with  smsq , the basic sum of
             squares from which  scl  has been factored out.
@@ -91699,17 +26993,17 @@ L110:
 	i__1 = (*n - 1) * *incx + 1;
 	i__2 = *incx;
 	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-	    if (x[ix] != 0.f) {
-		absxi = (r__1 = x[ix], dabs(r__1));
+	    if (x[ix] != 0.) {
+		absxi = (d__1 = x[ix], abs(d__1));
 		if (*scale < absxi) {
 /* Computing 2nd power */
-		    r__1 = *scale / absxi;
-		    *sumsq = *sumsq * (r__1 * r__1) + 1;
+		    d__1 = *scale / absxi;
+		    *sumsq = *sumsq * (d__1 * d__1) + 1;
 		    *scale = absxi;
 		} else {
 /* Computing 2nd power */
-		    r__1 = absxi / *scale;
-		    *sumsq += r__1 * r__1;
+		    d__1 = absxi / *scale;
+		    *sumsq += d__1 * d__1;
 		}
 	    }
 /* L10: */
@@ -91717,28 +27011,29 @@ L110:
     }
     return 0;
 
-/*     End of SLASSQ */
+/*     End of DLASSQ */
 
-} /* slassq_ */
+} /* dlassq_ */
 
-/* Subroutine */ int slasv2_(real *f, real *g, real *h__, real *ssmin, real *
-	ssmax, real *snr, real *csr, real *snl, real *csl)
+/* Subroutine */ int dlasv2_(doublereal *f, doublereal *g, doublereal *h__,
+	doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal *
+	csr, doublereal *snl, doublereal *csl)
 {
     /* System generated locals */
-    real r__1;
+    doublereal d__1;
 
     /* Builtin functions */
-    double sqrt(doublereal), r_sign(real *, real *);
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
 
     /* Local variables */
-    static real a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, clt,
-	    crt, slt, srt;
+    static doublereal a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt,
+	     clt, crt, slt, srt;
     static integer pmax;
-    static real temp;
+    static doublereal temp;
     static logical swap;
-    static real tsign;
+    static doublereal tsign;
+
     static logical gasmal;
-    extern doublereal slamch_(char *);
 
 
 /*
@@ -91751,7 +27046,7 @@ L110:
     Purpose
     =======
 
-    SLASV2 computes the singular value decomposition of a 2-by-2
+    DLASV2 computes the singular value decomposition of a 2-by-2
     triangular matrix
        [  F   G  ]
        [  0   H  ].
@@ -91765,28 +27060,28 @@ L110:
     Arguments
     =========
 
-    F       (input) REAL
+    F       (input) DOUBLE PRECISION
             The (1,1) element of the 2-by-2 matrix.
 
-    G       (input) REAL
+    G       (input) DOUBLE PRECISION
             The (1,2) element of the 2-by-2 matrix.
 
-    H       (input) REAL
+    H       (input) DOUBLE PRECISION
             The (2,2) element of the 2-by-2 matrix.
 
-    SSMIN   (output) REAL
+    SSMIN   (output) DOUBLE PRECISION
             abs(SSMIN) is the smaller singular value.
 
-    SSMAX   (output) REAL
+    SSMAX   (output) DOUBLE PRECISION
             abs(SSMAX) is the larger singular value.
 
-    SNL     (output) REAL
-    CSL     (output) REAL
+    SNL     (output) DOUBLE PRECISION
+    CSL     (output) DOUBLE PRECISION
             The vector (CSL, SNL) is a unit left singular vector for the
             singular value abs(SSMAX).
 
-    SNR     (output) REAL
-    CSR     (output) REAL
+    SNR     (output) DOUBLE PRECISION
+    CSR     (output) DOUBLE PRECISION
             The vector (CSR, SNR) is a unit right singular vector for the
             singular value abs(SSMAX).
 
@@ -91816,9 +27111,9 @@ L110:
 
 
     ft = *f;
-    fa = dabs(ft);
+    fa = abs(ft);
     ht = *h__;
-    ha = dabs(*h__);
+    ha = abs(*h__);
 
 /*
        PMAX points to the maximum absolute element of matrix
@@ -91842,35 +27137,35 @@ L110:
 
     }
     gt = *g;
-    ga = dabs(gt);
-    if (ga == 0.f) {
+    ga = abs(gt);
+    if (ga == 0.) {
 
 /*        Diagonal matrix */
 
 	*ssmin = ha;
 	*ssmax = fa;
-	clt = 1.f;
-	crt = 1.f;
-	slt = 0.f;
-	srt = 0.f;
+	clt = 1.;
+	crt = 1.;
+	slt = 0.;
+	srt = 0.;
     } else {
 	gasmal = TRUE_;
 	if (ga > fa) {
 	    pmax = 2;
-	    if (fa / ga < slamch_("EPS")) {
+	    if (fa / ga < EPSILON) {
 
 /*              Case of very large GA */
 
 		gasmal = FALSE_;
 		*ssmax = ga;
-		if (ha > 1.f) {
+		if (ha > 1.) {
 		    *ssmin = fa / (ga / ha);
 		} else {
 		    *ssmin = fa / ga * ha;
 		}
-		clt = 1.f;
+		clt = 1.;
 		slt = ht / gt;
-		srt = 1.f;
+		srt = 1.;
 		crt = ft / gt;
 	    }
 	}
@@ -91883,7 +27178,7 @@ L110:
 
 /*              Copes with infinite F or H */
 
-		l = 1.f;
+		l = 1.;
 	    } else {
 		l = d__ / fa;
 	    }
@@ -91894,7 +27189,7 @@ L110:
 
 /*           Note that abs(M) .le. 1/macheps */
 
-	    t = 2.f - l;
+	    t = 2. - l;
 
 /*           Note that T .ge. 1 */
 
@@ -91904,34 +27199,34 @@ L110:
 
 /*           Note that 1 .le. S .le. 1 + 1/macheps */
 
-	    if (l == 0.f) {
-		r__ = dabs(m);
+	    if (l == 0.) {
+		r__ = abs(m);
 	    } else {
 		r__ = sqrt(l * l + mm);
 	    }
 
 /*           Note that 0 .le. R .le. 1 + 1/macheps */
 
-	    a = (s + r__) * .5f;
+	    a = (s + r__) * .5;
 
 /*           Note that 1 .le. A .le. 1 + abs(M) */
 
 	    *ssmin = ha / a;
 	    *ssmax = fa * a;
-	    if (mm == 0.f) {
+	    if (mm == 0.) {
 
 /*              Note that M is very tiny */
 
-		if (l == 0.f) {
-		    t = r_sign(&c_b9647, &ft) * r_sign(&c_b1011, &gt);
+		if (l == 0.) {
+		    t = d_sign(&c_b2804, &ft) * d_sign(&c_b15, &gt);
 		} else {
-		    t = gt / r_sign(&d__, &ft) + m / t;
+		    t = gt / d_sign(&d__, &ft) + m / t;
 		}
 	    } else {
-		t = (m / (s + t) + m / (r__ + l)) * (a + 1.f);
+		t = (m / (s + t) + m / (r__ + l)) * (a + 1.);
 	    }
-	    l = sqrt(t * t + 4.f);
-	    crt = 2.f / l;
+	    l = sqrt(t * t + 4.);
+	    crt = 2. / l;
 	    srt = t / l;
 	    clt = (crt + srt * m) / a;
 	    slt = ht / ft * srt / a;
@@ -91952,35 +27247,33 @@ L110:
 /*     Correct signs of SSMAX and SSMIN */
 
     if (pmax == 1) {
-	tsign = r_sign(&c_b1011, csr) * r_sign(&c_b1011, csl) * r_sign(&
-		c_b1011, f);
+	tsign = d_sign(&c_b15, csr) * d_sign(&c_b15, csl) * d_sign(&c_b15, f);
     }
     if (pmax == 2) {
-	tsign = r_sign(&c_b1011, snr) * r_sign(&c_b1011, csl) * r_sign(&
-		c_b1011, g);
+	tsign = d_sign(&c_b15, snr) * d_sign(&c_b15, csl) * d_sign(&c_b15, g);
     }
     if (pmax == 3) {
-	tsign = r_sign(&c_b1011, snr) * r_sign(&c_b1011, snl) * r_sign(&
-		c_b1011, h__);
+	tsign = d_sign(&c_b15, snr) * d_sign(&c_b15, snl) * d_sign(&c_b15,
+		h__);
     }
-    *ssmax = r_sign(ssmax, &tsign);
-    r__1 = tsign * r_sign(&c_b1011, f) * r_sign(&c_b1011, h__);
-    *ssmin = r_sign(ssmin, &r__1);
+    *ssmax = d_sign(ssmax, &tsign);
+    d__1 = tsign * d_sign(&c_b15, f) * d_sign(&c_b15, h__);
+    *ssmin = d_sign(ssmin, &d__1);
     return 0;
 
-/*     End of SLASV2 */
+/*     End of DLASV2 */
 
-} /* slasv2_ */
+} /* dlasv2_ */
 
-/* Subroutine */ int slaswp_(integer *n, real *a, integer *lda, integer *k1,
-	integer *k2, integer *ipiv, integer *incx)
+/* Subroutine */ int dlaswp_(integer *n, doublereal *a, integer *lda, integer
+	*k1, integer *k2, integer *ipiv, integer *incx)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
     static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc;
-    static real temp;
+    static doublereal temp;
 
 
 /*
@@ -91993,7 +27286,7 @@ L110:
     Purpose
     =======
 
-    SLASWP performs a series of row interchanges on the matrix A.
+    DLASWP performs a series of row interchanges on the matrix A.
     One row interchange is initiated for each of rows K1 through K2 of A.
 
     Arguments
@@ -92002,7 +27295,7 @@ L110:
     N       (input) INTEGER
             The number of columns of the matrix A.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the matrix of column dimension N to which the row
             interchanges will be applied.
             On exit, the permuted matrix.
@@ -92060,7 +27353,7 @@ L110:
 	return 0;
     }
 
-    n32 = (*n / 32) << (5);
+    n32 = *n / 32 << 5;
     if (n32 != 0) {
 	i__1 = n32;
 	for (j = 1; j <= i__1; j += 32) {
@@ -92108,28 +27401,32 @@ L110:
 
     return 0;
 
-/*     End of SLASWP */
+/*     End of DLASWP */
 
-} /* slaswp_ */
+} /* dlaswp_ */
 
-/* Subroutine */ int slatrd_(char *uplo, integer *n, integer *nb, real *a,
-	integer *lda, real *e, real *tau, real *w, integer *ldw)
+/* Subroutine */ int dlatrd_(char *uplo, integer *n, integer *nb, doublereal *
+	a, integer *lda, doublereal *e, doublereal *tau, doublereal *w,
+	integer *ldw)
 {
     /* System generated locals */
     integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__, iw;
-    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
-    static real alpha;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static doublereal alpha;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
-	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
-	    real *, integer *, real *, real *, integer *), saxpy_(
-	    integer *, real *, real *, integer *, real *, integer *), ssymv_(
-	    char *, integer *, real *, real *, integer *, real *, integer *,
-	    real *, real *, integer *), slarfg_(integer *, real *,
-	    real *, integer *, real *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), daxpy_(integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *),
+	    dsymv_(char *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), dlarfg_(integer *, doublereal *, doublereal *, integer *,
+	     doublereal *);
 
 
 /*
@@ -92142,17 +27439,17 @@ L110:
     Purpose
     =======
 
-    SLATRD reduces NB rows and columns of a real symmetric matrix A to
+    DLATRD reduces NB rows and columns of a real symmetric matrix A to
     symmetric tridiagonal form by an orthogonal similarity
     transformation Q' * A * Q, and returns the matrices V and W which are
     needed to apply the transformation to the unreduced part of A.
 
-    If UPLO = 'U', SLATRD reduces the last NB rows and columns of a
+    If UPLO = 'U', DLATRD reduces the last NB rows and columns of a
     matrix, of which the upper triangle is supplied;
-    if UPLO = 'L', SLATRD reduces the first NB rows and columns of a
+    if UPLO = 'L', DLATRD reduces the first NB rows and columns of a
     matrix, of which the lower triangle is supplied.
 
-    This is an auxiliary routine called by SSYTRD.
+    This is an auxiliary routine called by DSYTRD.
 
     Arguments
     =========
@@ -92169,7 +27466,7 @@ L110:
     NB      (input) INTEGER
             The number of rows and columns to be reduced.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the symmetric matrix A.  If UPLO = 'U', the leading
             n-by-n upper triangular part of A contains the upper
             triangular part of the matrix A, and the strictly lower
@@ -92193,18 +27490,18 @@ L110:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= (1,N).
 
-    E       (output) REAL array, dimension (N-1)
+    E       (output) DOUBLE PRECISION array, dimension (N-1)
             If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
             elements of the last NB columns of the reduced matrix;
             if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
             the first NB columns of the reduced matrix.
 
-    TAU     (output) REAL array, dimension (N-1)
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
             The scalar factors of the elementary reflectors, stored in
             TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
             See Further Details.
 
-    W       (output) REAL array, dimension (LDW,NB)
+    W       (output) DOUBLE PRECISION array, dimension (LDW,NB)
             The n-by-nb matrix W required to update the unreduced part
             of A.
 
@@ -92293,13 +27590,13 @@ L110:
 /*              Update A(1:i,i) */
 
 		i__2 = *n - i__;
-		sgemv_("No transpose", &i__, &i__2, &c_b1290, &a[(i__ + 1) *
+		dgemv_("No transpose", &i__, &i__2, &c_b151, &a[(i__ + 1) *
 			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
-			c_b1011, &a[i__ * a_dim1 + 1], &c__1);
+			c_b15, &a[i__ * a_dim1 + 1], &c__1);
 		i__2 = *n - i__;
-		sgemv_("No transpose", &i__, &i__2, &c_b1290, &w[(iw + 1) *
+		dgemv_("No transpose", &i__, &i__2, &c_b151, &w[(iw + 1) *
 			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b1011, &a[i__ * a_dim1 + 1], &c__1);
+			c_b15, &a[i__ * a_dim1 + 1], &c__1);
 	    }
 	    if (i__ > 1) {
 
@@ -92309,46 +27606,46 @@ L110:
 */
 
 		i__2 = i__ - 1;
-		slarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 +
+		dlarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 +
 			1], &c__1, &tau[i__ - 1]);
 		e[i__ - 1] = a[i__ - 1 + i__ * a_dim1];
-		a[i__ - 1 + i__ * a_dim1] = 1.f;
+		a[i__ - 1 + i__ * a_dim1] = 1.;
 
 /*              Compute W(1:i-1,i) */
 
 		i__2 = i__ - 1;
-		ssymv_("Upper", &i__2, &c_b1011, &a[a_offset], lda, &a[i__ *
-			a_dim1 + 1], &c__1, &c_b320, &w[iw * w_dim1 + 1], &
+		dsymv_("Upper", &i__2, &c_b15, &a[a_offset], lda, &a[i__ *
+			a_dim1 + 1], &c__1, &c_b29, &w[iw * w_dim1 + 1], &
 			c__1);
 		if (i__ < *n) {
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
-		    sgemv_("Transpose", &i__2, &i__3, &c_b1011, &w[(iw + 1) *
+		    dgemv_("Transpose", &i__2, &i__3, &c_b15, &w[(iw + 1) *
 			    w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, &
-			    c_b320, &w[i__ + 1 + iw * w_dim1], &c__1);
+			    c_b29, &w[i__ + 1 + iw * w_dim1], &c__1);
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
-		    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[(i__ +
-			    1) * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1],
-			    &c__1, &c_b1011, &w[iw * w_dim1 + 1], &c__1);
+		    dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1)
+			     * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], &
+			    c__1, &c_b15, &w[iw * w_dim1 + 1], &c__1);
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
-		    sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[(i__ + 1) *
-			     a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, &
-			    c_b320, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) *
+			    a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, &
+			    c_b29, &w[i__ + 1 + iw * w_dim1], &c__1);
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
-		    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &w[(iw + 1)
-			     * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
-			    c__1, &c_b1011, &w[iw * w_dim1 + 1], &c__1);
+		    dgemv_("No transpose", &i__2, &i__3, &c_b151, &w[(iw + 1)
+			    * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
+			    c__1, &c_b15, &w[iw * w_dim1 + 1], &c__1);
 		}
 		i__2 = i__ - 1;
-		sscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
+		dscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
 		i__2 = i__ - 1;
-		alpha = tau[i__ - 1] * -.5f * sdot_(&i__2, &w[iw * w_dim1 + 1]
-			, &c__1, &a[i__ * a_dim1 + 1], &c__1);
+		alpha = tau[i__ - 1] * -.5 * ddot_(&i__2, &w[iw * w_dim1 + 1],
+			 &c__1, &a[i__ * a_dim1 + 1], &c__1);
 		i__2 = i__ - 1;
-		saxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
+		daxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
 			w_dim1 + 1], &c__1);
 	    }
 
@@ -92365,14 +27662,14 @@ L110:
 
 	    i__2 = *n - i__ + 1;
 	    i__3 = i__ - 1;
-	    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[i__ + a_dim1],
-		    lda, &w[i__ + w_dim1], ldw, &c_b1011, &a[i__ + i__ *
-		    a_dim1], &c__1);
+	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + a_dim1],
+		    lda, &w[i__ + w_dim1], ldw, &c_b15, &a[i__ + i__ * a_dim1]
+		    , &c__1);
 	    i__2 = *n - i__ + 1;
 	    i__3 = i__ - 1;
-	    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &w[i__ + w_dim1],
-		    ldw, &a[i__ + a_dim1], lda, &c_b1011, &a[i__ + i__ *
-		    a_dim1], &c__1);
+	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &w[i__ + w_dim1],
+		    ldw, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1]
+		    , &c__1);
 	    if (i__ < *n) {
 
 /*
@@ -92383,44 +27680,44 @@ L110:
 		i__2 = *n - i__;
 /* Computing MIN */
 		i__3 = i__ + 2;
-		slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) +
+		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) +
 			i__ * a_dim1], &c__1, &tau[i__]);
 		e[i__] = a[i__ + 1 + i__ * a_dim1];
-		a[i__ + 1 + i__ * a_dim1] = 1.f;
+		a[i__ + 1 + i__ * a_dim1] = 1.;
 
 /*              Compute W(i+1:n,i) */
 
 		i__2 = *n - i__;
-		ssymv_("Lower", &i__2, &c_b1011, &a[i__ + 1 + (i__ + 1) *
+		dsymv_("Lower", &i__2, &c_b15, &a[i__ + 1 + (i__ + 1) *
 			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b320, &w[i__ + 1 + i__ * w_dim1], &c__1);
+			c_b29, &w[i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &w[i__ + 1 +
-			w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b320, &w[i__ * w_dim1 + 1], &c__1);
+		dgemv_("Transpose", &i__2, &i__3, &c_b15, &w[i__ + 1 + w_dim1]
+			, ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &w[
+			i__ * w_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[i__ + 1 +
-			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b1011, &
-			w[i__ + 1 + i__ * w_dim1], &c__1);
+		dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 +
+			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b15, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[i__ + 1 +
-			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b320, &w[i__ * w_dim1 + 1], &c__1);
+		dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1]
+			, lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &w[
+			i__ * w_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &w[i__ + 1 +
-			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b1011, &
-			w[i__ + 1 + i__ * w_dim1], &c__1);
+		dgemv_("No transpose", &i__2, &i__3, &c_b151, &w[i__ + 1 +
+			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b15, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
-		sscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
+		dscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
-		alpha = tau[i__] * -.5f * sdot_(&i__2, &w[i__ + 1 + i__ *
+		alpha = tau[i__] * -.5 * ddot_(&i__2, &w[i__ + 1 + i__ *
 			w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
 		i__2 = *n - i__;
-		saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
+		daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
 			i__ + 1 + i__ * w_dim1], &c__1);
 	    }
 
@@ -92430,355 +27727,22 @@ L110:
 
     return 0;
 
-/*     End of SLATRD */
-
-} /* slatrd_ */
-
-/* Subroutine */ int slauu2_(char *uplo, integer *n, real *a, integer *lda,
-	integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__;
-    static real aii;
-    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
-	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
-	    real *, integer *, real *, real *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    SLAUU2 computes the product U * U' or L' * L, where the triangular
-    factor U or L is stored in the upper or lower triangular part of
-    the array A.
-
-    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
-    overwriting the factor U in A.
-    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
-    overwriting the factor L in A.
-
-    This is the unblocked form of the algorithm, calling Level 2 BLAS.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the triangular factor stored in the array A
-            is upper or lower triangular:
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    N       (input) INTEGER
-            The order of the triangular factor U or L.  N >= 0.
-
-    A       (input/output) REAL array, dimension (LDA,N)
-            On entry, the triangular factor U or L.
-            On exit, if UPLO = 'U', the upper triangle of A is
-            overwritten with the upper triangle of the product U * U';
-            if UPLO = 'L', the lower triangle of A is overwritten with
-            the lower triangle of the product L' * L.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("SLAUU2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (upper) {
-
-/*        Compute the product U * U'. */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    aii = a[i__ + i__ * a_dim1];
-	    if (i__ < *n) {
-		i__2 = *n - i__ + 1;
-		a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1],
-			lda, &a[i__ + i__ * a_dim1], lda);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1011, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			aii, &a[i__ * a_dim1 + 1], &c__1);
-	    } else {
-		sscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
-	    }
-/* L10: */
-	}
-
-    } else {
-
-/*        Compute the product L' * L. */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    aii = a[i__ + i__ * a_dim1];
-	    if (i__ < *n) {
-		i__2 = *n - i__ + 1;
-		a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1], &
-			c__1, &a[i__ + i__ * a_dim1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[i__ + 1 +
-			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii,
-			 &a[i__ + a_dim1], lda);
-	    } else {
-		sscal_(&i__, &aii, &a[i__ + a_dim1], lda);
-	    }
-/* L20: */
-	}
-    }
-
-    return 0;
-
-/*     End of SLAUU2 */
-
-} /* slauu2_ */
-
-/* Subroutine */ int slauum_(char *uplo, integer *n, real *a, integer *lda,
-	integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, ib, nb;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int strmm_(char *, char *, char *, char *,
-	    integer *, integer *, real *, real *, integer *, real *, integer *
-	    ), ssyrk_(char *, char *, integer
-	    *, integer *, real *, real *, integer *, real *, real *, integer *
-	    ), slauu2_(char *, integer *, real *, integer *,
-	    integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    SLAUUM computes the product U * U' or L' * L, where the triangular
-    factor U or L is stored in the upper or lower triangular part of
-    the array A.
-
-    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
-    overwriting the factor U in A.
-    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
-    overwriting the factor L in A.
-
-    This is the blocked form of the algorithm, calling Level 3 BLAS.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            Specifies whether the triangular factor stored in the array A
-            is upper or lower triangular:
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
-
-    N       (input) INTEGER
-            The order of the triangular factor U or L.  N >= 0.
-
-    A       (input/output) REAL array, dimension (LDA,N)
-            On entry, the triangular factor U or L.
-            On exit, if UPLO = 'U', the upper triangle of A is
-            overwritten with the upper triangle of the product U * U';
-            if UPLO = 'L', the lower triangle of A is overwritten with
-            the lower triangle of the product L' * L.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("SLAUUM", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Determine the block size for this environment. */
-
-    nb = ilaenv_(&c__1, "SLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
-	    ftnlen)1);
-
-    if ((nb <= 1) || (nb >= *n)) {
-
-/*        Use unblocked code */
-
-	slauu2_(uplo, n, &a[a_offset], lda, info);
-    } else {
-
-/*        Use blocked code */
-
-	if (upper) {
-
-/*           Compute the product U * U'. */
-
-	    i__1 = *n;
-	    i__2 = nb;
-	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-		i__3 = nb, i__4 = *n - i__ + 1;
-		ib = min(i__3,i__4);
-		i__3 = i__ - 1;
-		strmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib,
-			&c_b1011, &a[i__ + i__ * a_dim1], lda, &a[i__ *
-			a_dim1 + 1], lda);
-		slauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
-		if (i__ + ib <= *n) {
-		    i__3 = i__ - 1;
-		    i__4 = *n - i__ - ib + 1;
-		    sgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
-			    c_b1011, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__
-			    + (i__ + ib) * a_dim1], lda, &c_b1011, &a[i__ *
-			    a_dim1 + 1], lda);
-		    i__3 = *n - i__ - ib + 1;
-		    ssyrk_("Upper", "No transpose", &ib, &i__3, &c_b1011, &a[
-			    i__ + (i__ + ib) * a_dim1], lda, &c_b1011, &a[i__
-			    + i__ * a_dim1], lda);
-		}
-/* L10: */
-	    }
-	} else {
-
-/*           Compute the product L' * L. */
-
-	    i__2 = *n;
-	    i__1 = nb;
-	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-/* Computing MIN */
-		i__3 = nb, i__4 = *n - i__ + 1;
-		ib = min(i__3,i__4);
-		i__3 = i__ - 1;
-		strmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
-			c_b1011, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1]
-			, lda);
-		slauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
-		if (i__ + ib <= *n) {
-		    i__3 = i__ - 1;
-		    i__4 = *n - i__ - ib + 1;
-		    sgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
-			    c_b1011, &a[i__ + ib + i__ * a_dim1], lda, &a[i__
-			    + ib + a_dim1], lda, &c_b1011, &a[i__ + a_dim1],
-			    lda);
-		    i__3 = *n - i__ - ib + 1;
-		    ssyrk_("Lower", "Transpose", &ib, &i__3, &c_b1011, &a[i__
-			    + ib + i__ * a_dim1], lda, &c_b1011, &a[i__ + i__
-			    * a_dim1], lda);
-		}
-/* L20: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of SLAUUM */
+/*     End of DLATRD */
 
-} /* slauum_ */
+} /* dlatrd_ */
 
-/* Subroutine */ int sorg2r_(integer *m, integer *n, integer *k, real *a,
-	integer *lda, real *tau, real *work, integer *info)
+/* Subroutine */ int dorg2r_(integer *m, integer *n, integer *k, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2;
-    real r__1;
+    doublereal d__1;
 
     /* Local variables */
     static integer i__, j, l;
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
-	    slarf_(char *, integer *, integer *, real *, integer *, real *,
-	    real *, integer *, real *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dlarf_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
 
 
 /*
@@ -92791,13 +27755,13 @@ L110:
     Purpose
     =======
 
-    SORG2R generates an m by n real matrix Q with orthonormal columns,
+    DORG2R generates an m by n real matrix Q with orthonormal columns,
     which is defined as the first n columns of a product of k elementary
     reflectors of order m
 
           Q  =  H(1) H(2) . . . H(k)
 
-    as returned by SGEQRF.
+    as returned by DGEQRF.
 
     Arguments
     =========
@@ -92812,21 +27776,21 @@ L110:
             The number of elementary reflectors whose product defines the
             matrix Q. N >= K >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the i-th column must contain the vector which
             defines the elementary reflector H(i), for i = 1,2,...,k, as
-            returned by SGEQRF in the first k columns of its array
+            returned by DGEQRF in the first k columns of its array
             argument A.
             On exit, the m-by-n matrix Q.
 
     LDA     (input) INTEGER
             The first dimension of the array A. LDA >= max(1,M).
 
-    TAU     (input) REAL array, dimension (K)
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by SGEQRF.
+            reflector H(i), as returned by DGEQRF.
 
-    WORK    (workspace) REAL array, dimension (N)
+    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
 
     INFO    (output) INTEGER
             = 0: successful exit
@@ -92849,16 +27813,16 @@ L110:
     *info = 0;
     if (*m < 0) {
 	*info = -1;
-    } else if ((*n < 0) || (*n > *m)) {
+    } else if (*n < 0 || *n > *m) {
 	*info = -2;
-    } else if ((*k < 0) || (*k > *n)) {
+    } else if (*k < 0 || *k > *n) {
 	*info = -3;
     } else if (*lda < max(1,*m)) {
 	*info = -5;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORG2R", &i__1);
+	xerbla_("DORG2R", &i__1);
 	return 0;
     }
 
@@ -92874,10 +27838,10 @@ L110:
     for (j = *k + 1; j <= i__1; ++j) {
 	i__2 = *m;
 	for (l = 1; l <= i__2; ++l) {
-	    a[l + j * a_dim1] = 0.f;
+	    a[l + j * a_dim1] = 0.;
 /* L10: */
 	}
-	a[j + j * a_dim1] = 1.f;
+	a[j + j * a_dim1] = 1.;
 /* L20: */
     }
 
@@ -92886,37 +27850,37 @@ L110:
 /*        Apply H(i) to A(i:m,i:n) from the left */
 
 	if (i__ < *n) {
-	    a[i__ + i__ * a_dim1] = 1.f;
+	    a[i__ + i__ * a_dim1] = 1.;
 	    i__1 = *m - i__ + 1;
 	    i__2 = *n - i__;
-	    slarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[
+	    dlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[
 		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
 	}
 	if (i__ < *m) {
 	    i__1 = *m - i__;
-	    r__1 = -tau[i__];
-	    sscal_(&i__1, &r__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
+	    d__1 = -tau[i__];
+	    dscal_(&i__1, &d__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
 	}
-	a[i__ + i__ * a_dim1] = 1.f - tau[i__];
+	a[i__ + i__ * a_dim1] = 1. - tau[i__];
 
 /*        Set A(1:i-1,i) to zero */
 
 	i__1 = i__ - 1;
 	for (l = 1; l <= i__1; ++l) {
-	    a[l + i__ * a_dim1] = 0.f;
+	    a[l + i__ * a_dim1] = 0.;
 /* L30: */
 	}
 /* L40: */
     }
     return 0;
 
-/*     End of SORG2R */
+/*     End of DORG2R */
 
-} /* sorg2r_ */
+} /* dorg2r_ */
 
-/* Subroutine */ int sorgbr_(char *vect, integer *m, integer *n, integer *k,
-	real *a, integer *lda, real *tau, real *work, integer *lwork, integer
-	*info)
+/* Subroutine */ int dorgbr_(char *vect, integer *m, integer *n, integer *k,
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+	integer *lwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
@@ -92929,10 +27893,10 @@ L110:
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real
-	    *, integer *, real *, real *, integer *, integer *), sorgqr_(
-	    integer *, integer *, integer *, real *, integer *, real *, real *
-	    , integer *, integer *);
+    extern /* Subroutine */ int dorglq_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dorgqr_(integer *, integer *, integer *, doublereal *,
+	     integer *, doublereal *, doublereal *, integer *, integer *);
     static integer lwkopt;
     static logical lquery;
 
@@ -92947,23 +27911,23 @@ L110:
     Purpose
     =======
 
-    SORGBR generates one of the real orthogonal matrices Q or P**T
-    determined by SGEBRD when reducing a real matrix A to bidiagonal
+    DORGBR generates one of the real orthogonal matrices Q or P**T
+    determined by DGEBRD when reducing a real matrix A to bidiagonal
     form: A = Q * B * P**T.  Q and P**T are defined as products of
     elementary reflectors H(i) or G(i) respectively.
 
     If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
     is of order M:
-    if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n
+    if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n
     columns of Q, where m >= n >= k;
-    if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an
+    if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an
     M-by-M matrix.
 
     If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
     is of order N:
-    if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m
+    if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m
     rows of P**T, where n >= m >= k;
-    if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as
+    if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as
     an N-by-N matrix.
 
     Arguments
@@ -92971,7 +27935,7 @@ L110:
 
     VECT    (input) CHARACTER*1
             Specifies whether the matrix Q or the matrix P**T is
-            required, as defined in the transformation applied by SGEBRD:
+            required, as defined in the transformation applied by DGEBRD:
             = 'Q':  generate Q;
             = 'P':  generate P**T.
 
@@ -92987,27 +27951,27 @@ L110:
 
     K       (input) INTEGER
             If VECT = 'Q', the number of columns in the original M-by-K
-            matrix reduced by SGEBRD.
+            matrix reduced by DGEBRD.
             If VECT = 'P', the number of rows in the original K-by-N
-            matrix reduced by SGEBRD.
+            matrix reduced by DGEBRD.
             K >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the vectors which define the elementary reflectors,
-            as returned by SGEBRD.
+            as returned by DGEBRD.
             On exit, the M-by-N matrix Q or P**T.
 
     LDA     (input) INTEGER
             The leading dimension of the array A. LDA >= max(1,M).
 
-    TAU     (input) REAL array, dimension
+    TAU     (input) DOUBLE PRECISION array, dimension
                                   (min(M,K)) if VECT = 'Q'
                                   (min(N,K)) if VECT = 'P'
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i) or G(i), which determines Q or P**T, as
-            returned by SGEBRD in its array argument TAUQ or TAUP.
+            returned by DGEBRD in its array argument TAUQ or TAUP.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -93046,8 +28010,8 @@ L110:
 	*info = -1;
     } else if (*m < 0) {
 	*info = -2;
-    } else if (((*n < 0) || (wantq && ((*n > *m) || (*n < min(*m,*k))))) || (!
-	     wantq && ((*m > *n) || (*m < min(*n,*k))))) {
+    } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && (
+	    *m > *n || *m < min(*n,*k))) {
 	*info = -3;
     } else if (*k < 0) {
 	*info = -4;
@@ -93059,19 +28023,19 @@ L110:
 
     if (*info == 0) {
 	if (wantq) {
-	    nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
+	    nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 	} else {
-	    nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
+	    nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 	}
 	lwkopt = max(1,mn) * nb;
-	work[1] = (real) lwkopt;
+	work[1] = (doublereal) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORGBR", &i__1);
+	xerbla_("DORGBR", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -93079,15 +28043,15 @@ L110:
 
 /*     Quick return if possible */
 
-    if ((*m == 0) || (*n == 0)) {
-	work[1] = 1.f;
+    if (*m == 0 || *n == 0) {
+	work[1] = 1.;
 	return 0;
     }
 
     if (wantq) {
 
 /*
-          Form Q, determined by a call to SGEBRD to reduce an m-by-k
+          Form Q, determined by a call to DGEBRD to reduce an m-by-k
           matrix
 */
 
@@ -93095,7 +28059,7 @@ L110:
 
 /*           If m >= k, assume m >= n >= k */
 
-	    sorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+	    dorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
 		    iinfo);
 
 	} else {
@@ -93109,7 +28073,7 @@ L110:
 */
 
 	    for (j = *m; j >= 2; --j) {
-		a[j * a_dim1 + 1] = 0.f;
+		a[j * a_dim1 + 1] = 0.;
 		i__1 = *m;
 		for (i__ = j + 1; i__ <= i__1; ++i__) {
 		    a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1];
@@ -93117,10 +28081,10 @@ L110:
 		}
 /* L20: */
 	    }
-	    a[a_dim1 + 1] = 1.f;
+	    a[a_dim1 + 1] = 1.;
 	    i__1 = *m;
 	    for (i__ = 2; i__ <= i__1; ++i__) {
-		a[i__ + a_dim1] = 0.f;
+		a[i__ + a_dim1] = 0.;
 /* L30: */
 	    }
 	    if (*m > 1) {
@@ -93130,14 +28094,14 @@ L110:
 		i__1 = *m - 1;
 		i__2 = *m - 1;
 		i__3 = *m - 1;
-		sorgqr_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
-			tau[1], &work[1], lwork, &iinfo);
+		dorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[
+			1], &work[1], lwork, &iinfo);
 	    }
 	}
     } else {
 
 /*
-          Form P', determined by a call to SGEBRD to reduce a k-by-n
+          Form P', determined by a call to DGEBRD to reduce a k-by-n
           matrix
 */
 
@@ -93145,7 +28109,7 @@ L110:
 
 /*           If k < n, assume k <= m <= n */
 
-	    sorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+	    dorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
 		    iinfo);
 
 	} else {
@@ -93158,10 +28122,10 @@ L110:
              those of the unit matrix
 */
 
-	    a[a_dim1 + 1] = 1.f;
+	    a[a_dim1 + 1] = 1.;
 	    i__1 = *n;
 	    for (i__ = 2; i__ <= i__1; ++i__) {
-		a[i__ + a_dim1] = 0.f;
+		a[i__ + a_dim1] = 0.;
 /* L40: */
 	    }
 	    i__1 = *n;
@@ -93170,7 +28134,7 @@ L110:
 		    a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1];
 /* L50: */
 		}
-		a[j * a_dim1 + 1] = 0.f;
+		a[j * a_dim1 + 1] = 0.;
 /* L60: */
 	    }
 	    if (*n > 1) {
@@ -93180,20 +28144,21 @@ L110:
 		i__1 = *n - 1;
 		i__2 = *n - 1;
 		i__3 = *n - 1;
-		sorglq_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
-			tau[1], &work[1], lwork, &iinfo);
+		dorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[
+			1], &work[1], lwork, &iinfo);
 	    }
 	}
     }
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     return 0;
 
-/*     End of SORGBR */
+/*     End of DORGBR */
 
-} /* sorgbr_ */
+} /* dorgbr_ */
 
-/* Subroutine */ int sorghr_(integer *n, integer *ilo, integer *ihi, real *a,
-	integer *lda, real *tau, real *work, integer *lwork, integer *info)
+/* Subroutine */ int dorghr_(integer *n, integer *ilo, integer *ihi,
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+	integer *lwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2;
@@ -93203,8 +28168,9 @@ L110:
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real
-	    *, integer *, real *, real *, integer *, integer *);
+    extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *);
     static integer lwkopt;
     static logical lquery;
 
@@ -93219,9 +28185,9 @@ L110:
     Purpose
     =======
 
-    SORGHR generates a real orthogonal matrix Q which is defined as the
+    DORGHR generates a real orthogonal matrix Q which is defined as the
     product of IHI-ILO elementary reflectors of order N, as returned by
-    SGEHRD:
+    DGEHRD:
 
     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
 
@@ -93234,23 +28200,23 @@ L110:
     ILO     (input) INTEGER
     IHI     (input) INTEGER
             ILO and IHI must have the same values as in the previous call
-            of SGEHRD. Q is equal to the unit matrix except in the
+            of DGEHRD. Q is equal to the unit matrix except in the
             submatrix Q(ilo+1:ihi,ilo+1:ihi).
             1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the vectors which define the elementary reflectors,
-            as returned by SGEHRD.
+            as returned by DGEHRD.
             On exit, the N-by-N orthogonal matrix Q.
 
     LDA     (input) INTEGER
             The leading dimension of the array A. LDA >= max(1,N).
 
-    TAU     (input) REAL array, dimension (N-1)
+    TAU     (input) DOUBLE PRECISION array, dimension (N-1)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by SGEHRD.
+            reflector H(i), as returned by DGEHRD.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -93286,9 +28252,9 @@ L110:
     lquery = *lwork == -1;
     if (*n < 0) {
 	*info = -1;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
 	*info = -2;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
 	*info = -3;
     } else if (*lda < max(1,*n)) {
 	*info = -5;
@@ -93297,15 +28263,15 @@ L110:
     }
 
     if (*info == 0) {
-	nb = ilaenv_(&c__1, "SORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (
+	nb = ilaenv_(&c__1, "DORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (
 		ftnlen)1);
 	lwkopt = max(1,nh) * nb;
-	work[1] = (real) lwkopt;
+	work[1] = (doublereal) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORGHR", &i__1);
+	xerbla_("DORGHR", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -93314,7 +28280,7 @@ L110:
 /*     Quick return if possible */
 
     if (*n == 0) {
-	work[1] = 1.f;
+	work[1] = 1.;
 	return 0;
     }
 
@@ -93328,7 +28294,7 @@ L110:
     for (j = *ihi; j >= i__1; --j) {
 	i__2 = j - 1;
 	for (i__ = 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = 0.f;
+	    a[i__ + j * a_dim1] = 0.;
 /* L10: */
 	}
 	i__2 = *ihi;
@@ -93338,7 +28304,7 @@ L110:
 	}
 	i__2 = *n;
 	for (i__ = *ihi + 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = 0.f;
+	    a[i__ + j * a_dim1] = 0.;
 /* L30: */
 	}
 /* L40: */
@@ -93347,20 +28313,20 @@ L110:
     for (j = 1; j <= i__1; ++j) {
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = 0.f;
+	    a[i__ + j * a_dim1] = 0.;
 /* L50: */
 	}
-	a[j + j * a_dim1] = 1.f;
+	a[j + j * a_dim1] = 1.;
 /* L60: */
     }
     i__1 = *n;
     for (j = *ihi + 1; j <= i__1; ++j) {
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = 0.f;
+	    a[i__ + j * a_dim1] = 0.;
 /* L70: */
 	}
-	a[j + j * a_dim1] = 1.f;
+	a[j + j * a_dim1] = 1.;
 /* L80: */
     }
 
@@ -93368,28 +28334,28 @@ L110:
 
 /*        Generate Q(ilo+1:ihi,ilo+1:ihi) */
 
-	sorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*
+	dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*
 		ilo], &work[1], lwork, &iinfo);
     }
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     return 0;
 
-/*     End of SORGHR */
+/*     End of DORGHR */
 
-} /* sorghr_ */
+} /* dorghr_ */
 
-/* Subroutine */ int sorgl2_(integer *m, integer *n, integer *k, real *a,
-	integer *lda, real *tau, real *work, integer *info)
+/* Subroutine */ int dorgl2_(integer *m, integer *n, integer *k, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2;
-    real r__1;
+    doublereal d__1;
 
     /* Local variables */
     static integer i__, j, l;
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
-	    slarf_(char *, integer *, integer *, real *, integer *, real *,
-	    real *, integer *, real *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dlarf_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
 
 
 /*
@@ -93402,13 +28368,13 @@ L110:
     Purpose
     =======
 
-    SORGL2 generates an m by n real matrix Q with orthonormal rows,
+    DORGL2 generates an m by n real matrix Q with orthonormal rows,
     which is defined as the first m rows of a product of k elementary
     reflectors of order n
 
           Q  =  H(k) . . . H(2) H(1)
 
-    as returned by SGELQF.
+    as returned by DGELQF.
 
     Arguments
     =========
@@ -93423,20 +28389,20 @@ L110:
             The number of elementary reflectors whose product defines the
             matrix Q. M >= K >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the i-th row must contain the vector which defines
             the elementary reflector H(i), for i = 1,2,...,k, as returned
-            by SGELQF in the first k rows of its array argument A.
+            by DGELQF in the first k rows of its array argument A.
             On exit, the m-by-n matrix Q.
 
     LDA     (input) INTEGER
             The first dimension of the array A. LDA >= max(1,M).
 
-    TAU     (input) REAL array, dimension (K)
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by SGELQF.
+            reflector H(i), as returned by DGELQF.
 
-    WORK    (workspace) REAL array, dimension (M)
+    WORK    (workspace) DOUBLE PRECISION array, dimension (M)
 
     INFO    (output) INTEGER
             = 0: successful exit
@@ -93461,14 +28427,14 @@ L110:
 	*info = -1;
     } else if (*n < *m) {
 	*info = -2;
-    } else if ((*k < 0) || (*k > *m)) {
+    } else if (*k < 0 || *k > *m) {
 	*info = -3;
     } else if (*lda < max(1,*m)) {
 	*info = -5;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORGL2", &i__1);
+	xerbla_("DORGL2", &i__1);
 	return 0;
     }
 
@@ -93486,11 +28452,11 @@ L110:
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = *m;
 	    for (l = *k + 1; l <= i__2; ++l) {
-		a[l + j * a_dim1] = 0.f;
+		a[l + j * a_dim1] = 0.;
 /* L10: */
 	    }
 	    if (j > *k && j <= *m) {
-		a[j + j * a_dim1] = 1.f;
+		a[j + j * a_dim1] = 1.;
 	    }
 /* L20: */
 	}
@@ -93502,49 +28468,50 @@ L110:
 
 	if (i__ < *n) {
 	    if (i__ < *m) {
-		a[i__ + i__ * a_dim1] = 1.f;
+		a[i__ + i__ * a_dim1] = 1.;
 		i__1 = *m - i__;
 		i__2 = *n - i__ + 1;
-		slarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &
+		dlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &
 			tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
 	    }
 	    i__1 = *n - i__;
-	    r__1 = -tau[i__];
-	    sscal_(&i__1, &r__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    d__1 = -tau[i__];
+	    dscal_(&i__1, &d__1, &a[i__ + (i__ + 1) * a_dim1], lda);
 	}
-	a[i__ + i__ * a_dim1] = 1.f - tau[i__];
+	a[i__ + i__ * a_dim1] = 1. - tau[i__];
 
 /*        Set A(i,1:i-1) to zero */
 
 	i__1 = i__ - 1;
 	for (l = 1; l <= i__1; ++l) {
-	    a[i__ + l * a_dim1] = 0.f;
+	    a[i__ + l * a_dim1] = 0.;
 /* L30: */
 	}
 /* L40: */
     }
     return 0;
 
-/*     End of SORGL2 */
+/*     End of DORGL2 */
 
-} /* sorgl2_ */
+} /* dorgl2_ */
 
-/* Subroutine */ int sorglq_(integer *m, integer *n, integer *k, real *a,
-	integer *lda, real *tau, real *work, integer *lwork, integer *info)
+/* Subroutine */ int dorglq_(integer *m, integer *n, integer *k, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *work, integer *lwork,
+	integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int sorgl2_(integer *, integer *, integer *, real
-	    *, integer *, real *, real *, integer *), slarfb_(char *, char *,
-	    char *, char *, integer *, integer *, integer *, real *, integer *
-	    , real *, integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int dorgl2_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *),
+	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
-	    real *, integer *, real *, real *, integer *);
     static integer ldwork, lwkopt;
     static logical lquery;
 
@@ -93559,13 +28526,13 @@ L110:
     Purpose
     =======
 
-    SORGLQ generates an M-by-N real matrix Q with orthonormal rows,
+    DORGLQ generates an M-by-N real matrix Q with orthonormal rows,
     which is defined as the first M rows of a product of K elementary
     reflectors of order N
 
           Q  =  H(k) . . . H(2) H(1)
 
-    as returned by SGELQF.
+    as returned by DGELQF.
 
     Arguments
     =========
@@ -93580,20 +28547,20 @@ L110:
             The number of elementary reflectors whose product defines the
             matrix Q. M >= K >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the i-th row must contain the vector which defines
             the elementary reflector H(i), for i = 1,2,...,k, as returned
-            by SGELQF in the first k rows of its array argument A.
+            by DGELQF in the first k rows of its array argument A.
             On exit, the M-by-N matrix Q.
 
     LDA     (input) INTEGER
             The first dimension of the array A. LDA >= max(1,M).
 
-    TAU     (input) REAL array, dimension (K)
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by SGELQF.
+            reflector H(i), as returned by DGELQF.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -93625,15 +28592,15 @@ L110:
 
     /* Function Body */
     *info = 0;
-    nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
     lwkopt = max(1,*m) * nb;
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     lquery = *lwork == -1;
     if (*m < 0) {
 	*info = -1;
     } else if (*n < *m) {
 	*info = -2;
-    } else if ((*k < 0) || (*k > *m)) {
+    } else if (*k < 0 || *k > *m) {
 	*info = -3;
     } else if (*lda < max(1,*m)) {
 	*info = -5;
@@ -93642,7 +28609,7 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORGLQ", &i__1);
+	xerbla_("DORGLQ", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -93651,7 +28618,7 @@ L110:
 /*     Quick return if possible */
 
     if (*m <= 0) {
-	work[1] = 1.f;
+	work[1] = 1.;
 	return 0;
     }
 
@@ -93665,7 +28632,7 @@ L110:
 
    Computing MAX
 */
-	i__1 = 0, i__2 = ilaenv_(&c__3, "SORGLQ", " ", m, n, k, &c_n1, (
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DORGLQ", " ", m, n, k, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = max(i__1,i__2);
 	if (nx < *k) {
@@ -93683,7 +28650,7 @@ L110:
 
 		nb = *lwork / ldwork;
 /* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "SORGLQ", " ", m, n, k, &c_n1,
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DORGLQ", " ", m, n, k, &c_n1,
 			 (ftnlen)6, (ftnlen)1);
 		nbmin = max(i__1,i__2);
 	    }
@@ -93708,7 +28675,7 @@ L110:
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = *m;
 	    for (i__ = kk + 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] = 0.f;
+		a[i__ + j * a_dim1] = 0.;
 /* L10: */
 	    }
 /* L20: */
@@ -93723,7 +28690,7 @@ L110:
 	i__1 = *m - kk;
 	i__2 = *n - kk;
 	i__3 = *k - kk;
-	sorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+	dorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
 		tau[kk + 1], &work[1], &iinfo);
     }
 
@@ -93744,14 +28711,14 @@ L110:
 */
 
 		i__2 = *n - i__ + 1;
-		slarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ *
+		dlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ *
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 
 /*              Apply H' to A(i+ib:m,i:n) from the right */
 
 		i__2 = *m - i__ - ib + 1;
 		i__3 = *n - i__ + 1;
-		slarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, &
+		dlarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, &
 			i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
 			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
 			1], &ldwork);
@@ -93760,7 +28727,7 @@ L110:
 /*           Apply H' to columns i:n of current block */
 
 	    i__2 = *n - i__ + 1;
-	    sorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+	    dorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
 		    work[1], &iinfo);
 
 /*           Set columns 1:i-1 of current block to zero */
@@ -93769,7 +28736,7 @@ L110:
 	    for (j = 1; j <= i__2; ++j) {
 		i__3 = i__ + ib - 1;
 		for (l = i__; l <= i__3; ++l) {
-		    a[l + j * a_dim1] = 0.f;
+		    a[l + j * a_dim1] = 0.;
 /* L30: */
 		}
 /* L40: */
@@ -93778,29 +28745,30 @@ L110:
 	}
     }
 
-    work[1] = (real) iws;
+    work[1] = (doublereal) iws;
     return 0;
 
-/*     End of SORGLQ */
+/*     End of DORGLQ */
 
-} /* sorglq_ */
+} /* dorglq_ */
 
-/* Subroutine */ int sorgqr_(integer *m, integer *n, integer *k, real *a,
-	integer *lda, real *tau, real *work, integer *lwork, integer *info)
+/* Subroutine */ int dorgqr_(integer *m, integer *n, integer *k, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *work, integer *lwork,
+	integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real
-	    *, integer *, real *, real *, integer *), slarfb_(char *, char *,
-	    char *, char *, integer *, integer *, integer *, real *, integer *
-	    , real *, integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int dorg2r_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *),
+	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
-	    real *, integer *, real *, real *, integer *);
     static integer ldwork, lwkopt;
     static logical lquery;
 
@@ -93815,13 +28783,13 @@ L110:
     Purpose
     =======
 
-    SORGQR generates an M-by-N real matrix Q with orthonormal columns,
+    DORGQR generates an M-by-N real matrix Q with orthonormal columns,
     which is defined as the first N columns of a product of K elementary
     reflectors of order M
 
           Q  =  H(1) H(2) . . . H(k)
 
-    as returned by SGEQRF.
+    as returned by DGEQRF.
 
     Arguments
     =========
@@ -93836,21 +28804,21 @@ L110:
             The number of elementary reflectors whose product defines the
             matrix Q. N >= K >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the i-th column must contain the vector which
             defines the elementary reflector H(i), for i = 1,2,...,k, as
-            returned by SGEQRF in the first k columns of its array
+            returned by DGEQRF in the first k columns of its array
             argument A.
             On exit, the M-by-N matrix Q.
 
     LDA     (input) INTEGER
             The first dimension of the array A. LDA >= max(1,M).
 
-    TAU     (input) REAL array, dimension (K)
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by SGEQRF.
+            reflector H(i), as returned by DGEQRF.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -93882,15 +28850,15 @@ L110:
 
     /* Function Body */
     *info = 0;
-    nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
     lwkopt = max(1,*n) * nb;
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     lquery = *lwork == -1;
     if (*m < 0) {
 	*info = -1;
-    } else if ((*n < 0) || (*n > *m)) {
+    } else if (*n < 0 || *n > *m) {
 	*info = -2;
-    } else if ((*k < 0) || (*k > *n)) {
+    } else if (*k < 0 || *k > *n) {
 	*info = -3;
     } else if (*lda < max(1,*m)) {
 	*info = -5;
@@ -93899,7 +28867,7 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORGQR", &i__1);
+	xerbla_("DORGQR", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -93908,7 +28876,7 @@ L110:
 /*     Quick return if possible */
 
     if (*n <= 0) {
-	work[1] = 1.f;
+	work[1] = 1.;
 	return 0;
     }
 
@@ -93922,7 +28890,7 @@ L110:
 
    Computing MAX
 */
-	i__1 = 0, i__2 = ilaenv_(&c__3, "SORGQR", " ", m, n, k, &c_n1, (
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DORGQR", " ", m, n, k, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = max(i__1,i__2);
 	if (nx < *k) {
@@ -93940,7 +28908,7 @@ L110:
 
 		nb = *lwork / ldwork;
 /* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "SORGQR", " ", m, n, k, &c_n1,
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DORGQR", " ", m, n, k, &c_n1,
 			 (ftnlen)6, (ftnlen)1);
 		nbmin = max(i__1,i__2);
 	    }
@@ -93965,7 +28933,7 @@ L110:
 	for (j = kk + 1; j <= i__1; ++j) {
 	    i__2 = kk;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] = 0.f;
+		a[i__ + j * a_dim1] = 0.;
 /* L10: */
 	    }
 /* L20: */
@@ -93980,7 +28948,7 @@ L110:
 	i__1 = *m - kk;
 	i__2 = *n - kk;
 	i__3 = *k - kk;
-	sorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+	dorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
 		tau[kk + 1], &work[1], &iinfo);
     }
 
@@ -94001,14 +28969,14 @@ L110:
 */
 
 		i__2 = *m - i__ + 1;
-		slarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ *
+		dlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ *
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 
 /*              Apply H to A(i:m,i+ib:n) from the left */
 
 		i__2 = *m - i__ + 1;
 		i__3 = *n - i__ - ib + 1;
-		slarfb_("Left", "No transpose", "Forward", "Columnwise", &
+		dlarfb_("Left", "No transpose", "Forward", "Columnwise", &
 			i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
 			1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &
 			work[ib + 1], &ldwork);
@@ -94017,7 +28985,7 @@ L110:
 /*           Apply H to rows i:m of current block */
 
 	    i__2 = *m - i__ + 1;
-	    sorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+	    dorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
 		    work[1], &iinfo);
 
 /*           Set rows 1:i-1 of current block to zero */
@@ -94026,7 +28994,7 @@ L110:
 	    for (j = i__; j <= i__2; ++j) {
 		i__3 = i__ - 1;
 		for (l = 1; l <= i__3; ++l) {
-		    a[l + j * a_dim1] = 0.f;
+		    a[l + j * a_dim1] = 0.;
 /* L30: */
 		}
 /* L40: */
@@ -94035,28 +29003,29 @@ L110:
 	}
     }
 
-    work[1] = (real) iws;
+    work[1] = (doublereal) iws;
     return 0;
 
-/*     End of SORGQR */
+/*     End of DORGQR */
 
-} /* sorgqr_ */
+} /* dorgqr_ */
 
-/* Subroutine */ int sorm2l_(char *side, char *trans, integer *m, integer *n,
-	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
-	 real *work, integer *info)
+/* Subroutine */ int dorm2l_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
 
     /* Local variables */
     static integer i__, i1, i2, i3, mi, ni, nq;
-    static real aii;
+    static doublereal aii;
     static logical left;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
-	    integer *, real *, real *, integer *, real *), xerbla_(
-	    char *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
     static logical notran;
 
 
@@ -94070,7 +29039,7 @@ L110:
     Purpose
     =======
 
-    SORM2L overwrites the general real m by n matrix C with
+    DORM2L overwrites the general real m by n matrix C with
 
           Q * C  if SIDE = 'L' and TRANS = 'N', or
 
@@ -94085,7 +29054,7 @@ L110:
 
           Q = H(k) . . . H(2) H(1)
 
-    as returned by SGEQLF. Q is of order m if SIDE = 'L' and of order n
+    as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n
     if SIDE = 'R'.
 
     Arguments
@@ -94111,10 +29080,10 @@ L110:
             If SIDE = 'L', M >= K >= 0;
             if SIDE = 'R', N >= K >= 0.
 
-    A       (input) REAL array, dimension (LDA,K)
+    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
             The i-th column must contain the vector which defines the
             elementary reflector H(i), for i = 1,2,...,k, as returned by
-            SGEQLF in the last k columns of its array argument A.
+            DGEQLF in the last k columns of its array argument A.
             A is modified by the routine but restored on exit.
 
     LDA     (input) INTEGER
@@ -94122,18 +29091,18 @@ L110:
             If SIDE = 'L', LDA >= max(1,M);
             if SIDE = 'R', LDA >= max(1,N).
 
-    TAU     (input) REAL array, dimension (K)
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by SGEQLF.
+            reflector H(i), as returned by DGEQLF.
 
-    C       (input/output) REAL array, dimension (LDC,N)
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
             On entry, the m by n matrix C.
             On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace) REAL array, dimension
+    WORK    (workspace) DOUBLE PRECISION array, dimension
                                      (N) if SIDE = 'L',
                                      (M) if SIDE = 'R'
 
@@ -94177,7 +29146,7 @@ L110:
 	*info = -3;
     } else if (*n < 0) {
 	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
+    } else if (*k < 0 || *k > nq) {
 	*info = -5;
     } else if (*lda < max(1,nq)) {
 	*info = -7;
@@ -94186,17 +29155,17 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORM2L", &i__1);
+	xerbla_("DORM2L", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+    if (*m == 0 || *n == 0 || *k == 0) {
 	return 0;
     }
 
-    if ((left && notran) || (! left && ! notran)) {
+    if (left && notran || ! left && ! notran) {
 	i1 = 1;
 	i2 = *k;
 	i3 = 1;
@@ -94230,33 +29199,34 @@ L110:
 /*        Apply H(i) */
 
 	aii = a[nq - *k + i__ + i__ * a_dim1];
-	a[nq - *k + i__ + i__ * a_dim1] = 1.f;
-	slarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[
+	a[nq - *k + i__ + i__ * a_dim1] = 1.;
+	dlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[
 		c_offset], ldc, &work[1]);
 	a[nq - *k + i__ + i__ * a_dim1] = aii;
 /* L10: */
     }
     return 0;
 
-/*     End of SORM2L */
+/*     End of DORM2L */
 
-} /* sorm2l_ */
+} /* dorm2l_ */
 
-/* Subroutine */ int sorm2r_(char *side, char *trans, integer *m, integer *n,
-	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
-	 real *work, integer *info)
+/* Subroutine */ int dorm2r_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
 
     /* Local variables */
     static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
-    static real aii;
+    static doublereal aii;
     static logical left;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
-	    integer *, real *, real *, integer *, real *), xerbla_(
-	    char *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
     static logical notran;
 
 
@@ -94270,7 +29240,7 @@ L110:
     Purpose
     =======
 
-    SORM2R overwrites the general real m by n matrix C with
+    DORM2R overwrites the general real m by n matrix C with
 
           Q * C  if SIDE = 'L' and TRANS = 'N', or
 
@@ -94285,7 +29255,7 @@ L110:
 
           Q = H(1) H(2) . . . H(k)
 
-    as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n
+    as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
     if SIDE = 'R'.
 
     Arguments
@@ -94311,10 +29281,10 @@ L110:
             If SIDE = 'L', M >= K >= 0;
             if SIDE = 'R', N >= K >= 0.
 
-    A       (input) REAL array, dimension (LDA,K)
+    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
             The i-th column must contain the vector which defines the
             elementary reflector H(i), for i = 1,2,...,k, as returned by
-            SGEQRF in the first k columns of its array argument A.
+            DGEQRF in the first k columns of its array argument A.
             A is modified by the routine but restored on exit.
 
     LDA     (input) INTEGER
@@ -94322,18 +29292,18 @@ L110:
             If SIDE = 'L', LDA >= max(1,M);
             if SIDE = 'R', LDA >= max(1,N).
 
-    TAU     (input) REAL array, dimension (K)
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by SGEQRF.
+            reflector H(i), as returned by DGEQRF.
 
-    C       (input/output) REAL array, dimension (LDC,N)
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
             On entry, the m by n matrix C.
             On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace) REAL array, dimension
+    WORK    (workspace) DOUBLE PRECISION array, dimension
                                      (N) if SIDE = 'L',
                                      (M) if SIDE = 'R'
 
@@ -94377,7 +29347,7 @@ L110:
 	*info = -3;
     } else if (*n < 0) {
 	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
+    } else if (*k < 0 || *k > nq) {
 	*info = -5;
     } else if (*lda < max(1,nq)) {
 	*info = -7;
@@ -94386,17 +29356,17 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORM2R", &i__1);
+	xerbla_("DORM2R", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+    if (*m == 0 || *n == 0 || *k == 0) {
 	return 0;
     }
 
-    if ((left && ! notran) || (! left && notran)) {
+    if (left && ! notran || ! left && notran) {
 	i1 = 1;
 	i2 = *k;
 	i3 = 1;
@@ -94434,21 +29404,22 @@ L110:
 /*        Apply H(i) */
 
 	aii = a[i__ + i__ * a_dim1];
-	a[i__ + i__ * a_dim1] = 1.f;
-	slarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[
+	a[i__ + i__ * a_dim1] = 1.;
+	dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[
 		ic + jc * c_dim1], ldc, &work[1]);
 	a[i__ + i__ * a_dim1] = aii;
 /* L10: */
     }
     return 0;
 
-/*     End of SORM2R */
+/*     End of DORM2R */
 
-} /* sorm2r_ */
+} /* dorm2r_ */
 
-/* Subroutine */ int sormbr_(char *vect, char *side, char *trans, integer *m,
-	integer *n, integer *k, real *a, integer *lda, real *tau, real *c__,
-	integer *ldc, real *work, integer *lwork, integer *info)
+/* Subroutine */ int dormbr_(char *vect, char *side, char *trans, integer *m,
+	integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau,
+	doublereal *c__, integer *ldc, doublereal *work, integer *lwork,
+	integer *info)
 {
     /* System generated locals */
     address a__1[2];
@@ -94466,16 +29437,17 @@ L110:
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    static logical notran, applyq;
+    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
+    static logical notran;
+    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
+    static logical applyq;
     static char transt[1];
-    extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *, real *,
-	    integer *, integer *);
     static integer lwkopt;
     static logical lquery;
-    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *, real *,
-	    integer *, integer *);
 
 
 /*
@@ -94488,19 +29460,19 @@ L110:
     Purpose
     =======
 
-    If VECT = 'Q', SORMBR overwrites the general real M-by-N matrix C
+    If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C
     with
                     SIDE = 'L'     SIDE = 'R'
     TRANS = 'N':      Q * C          C * Q
     TRANS = 'T':      Q**T * C       C * Q**T
 
-    If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C
+    If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C
     with
                     SIDE = 'L'     SIDE = 'R'
     TRANS = 'N':      P * C          C * P
     TRANS = 'T':      P**T * C       C * P**T
 
-    Here Q and P**T are the orthogonal matrices determined by SGEBRD when
+    Here Q and P**T are the orthogonal matrices determined by DGEBRD when
     reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
     P**T are defined as products of elementary reflectors H(i) and G(i)
     respectively.
@@ -94539,29 +29511,29 @@ L110:
 
     K       (input) INTEGER
             If VECT = 'Q', the number of columns in the original
-            matrix reduced by SGEBRD.
+            matrix reduced by DGEBRD.
             If VECT = 'P', the number of rows in the original
-            matrix reduced by SGEBRD.
+            matrix reduced by DGEBRD.
             K >= 0.
 
-    A       (input) REAL array, dimension
+    A       (input) DOUBLE PRECISION array, dimension
                                   (LDA,min(nq,K)) if VECT = 'Q'
                                   (LDA,nq)        if VECT = 'P'
             The vectors which define the elementary reflectors H(i) and
             G(i), whose products determine the matrices Q and P, as
-            returned by SGEBRD.
+            returned by DGEBRD.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.
             If VECT = 'Q', LDA >= max(1,nq);
             if VECT = 'P', LDA >= max(1,min(nq,K)).
 
-    TAU     (input) REAL array, dimension (min(nq,K))
+    TAU     (input) DOUBLE PRECISION array, dimension (min(nq,K))
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i) or G(i) which determines Q or P, as returned
-            by SGEBRD in the array argument TAUQ or TAUP.
+            by DGEBRD in the array argument TAUQ or TAUP.
 
-    C       (input/output) REAL array, dimension (LDC,N)
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
             On entry, the M-by-N matrix C.
             On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q
             or P*C or P**T*C or C*P or C*P**T.
@@ -94569,7 +29541,7 @@ L110:
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -94636,8 +29608,7 @@ L110:
     } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = 1, i__2 = min(nq,*k);
-	if ((applyq && *lda < max(1,nq)) || (! applyq && *lda < max(i__1,i__2)
-		)) {
+	if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) {
 	    *info = -8;
 	} else if (*ldc < max(1,*m)) {
 	    *info = -11;
@@ -94655,7 +29626,7 @@ L110:
 		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
 		i__1 = *m - 1;
 		i__2 = *m - 1;
-		nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__1, n, &i__2, &c_n1, (
+		nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__1, n, &i__2, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    } else {
 /* Writing concatenation */
@@ -94664,7 +29635,7 @@ L110:
 		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
 		i__1 = *n - 1;
 		i__2 = *n - 1;
-		nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__1, &i__2, &c_n1, (
+		nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__1, &i__2, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    }
 	} else {
@@ -94675,7 +29646,7 @@ L110:
 		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
 		i__1 = *m - 1;
 		i__2 = *m - 1;
-		nb = ilaenv_(&c__1, "SORMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
+		nb = ilaenv_(&c__1, "DORMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    } else {
 /* Writing concatenation */
@@ -94684,17 +29655,17 @@ L110:
 		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
 		i__1 = *n - 1;
 		i__2 = *n - 1;
-		nb = ilaenv_(&c__1, "SORMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
+		nb = ilaenv_(&c__1, "DORMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    }
 	}
 	lwkopt = max(1,nw) * nb;
-	work[1] = (real) lwkopt;
+	work[1] = (doublereal) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORMBR", &i__1);
+	xerbla_("DORMBR", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -94702,8 +29673,8 @@ L110:
 
 /*     Quick return if possible */
 
-    work[1] = 1.f;
-    if ((*m == 0) || (*n == 0)) {
+    work[1] = 1.;
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -94713,13 +29684,13 @@ L110:
 
 	if (nq >= *k) {
 
-/*           Q was determined by a call to SGEBRD with nq >= k */
+/*           Q was determined by a call to DGEBRD with nq >= k */
 
-	    sormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+	    dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
 		    c_offset], ldc, &work[1], lwork, &iinfo);
 	} else if (nq > 1) {
 
-/*           Q was determined by a call to SGEBRD with nq < k */
+/*           Q was determined by a call to DGEBRD with nq < k */
 
 	    if (left) {
 		mi = *m - 1;
@@ -94733,7 +29704,7 @@ L110:
 		i2 = 2;
 	    }
 	    i__1 = nq - 1;
-	    sormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1]
+	    dormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1]
 		    , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
 	}
     } else {
@@ -94747,13 +29718,13 @@ L110:
 	}
 	if (nq > *k) {
 
-/*           P was determined by a call to SGEBRD with nq > k */
+/*           P was determined by a call to DGEBRD with nq > k */
 
-	    sormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+	    dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
 		    c_offset], ldc, &work[1], lwork, &iinfo);
 	} else if (nq > 1) {
 
-/*           P was determined by a call to SGEBRD with nq <= k */
+/*           P was determined by a call to DGEBRD with nq <= k */
 
 	    if (left) {
 		mi = *m - 1;
@@ -94767,33 +29738,34 @@ L110:
 		i2 = 2;
 	    }
 	    i__1 = nq - 1;
-	    sormlq_(side, transt, &mi, &ni, &i__1, &a[((a_dim1) << (1)) + 1],
-		    lda, &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1],
-		    lwork, &iinfo);
+	    dormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda,
+		     &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &
+		    iinfo);
 	}
     }
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     return 0;
 
-/*     End of SORMBR */
+/*     End of DORMBR */
 
-} /* sormbr_ */
+} /* dormbr_ */
 
-/* Subroutine */ int sorml2_(char *side, char *trans, integer *m, integer *n,
-	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
-	 real *work, integer *info)
+/* Subroutine */ int dorml2_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
 
     /* Local variables */
     static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
-    static real aii;
+    static doublereal aii;
     static logical left;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
-	    integer *, real *, real *, integer *, real *), xerbla_(
-	    char *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
     static logical notran;
 
 
@@ -94807,7 +29779,7 @@ L110:
     Purpose
     =======
 
-    SORML2 overwrites the general real m by n matrix C with
+    DORML2 overwrites the general real m by n matrix C with
 
           Q * C  if SIDE = 'L' and TRANS = 'N', or
 
@@ -94822,7 +29794,7 @@ L110:
 
           Q = H(k) . . . H(2) H(1)
 
-    as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n
+    as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n
     if SIDE = 'R'.
 
     Arguments
@@ -94848,29 +29820,29 @@ L110:
             If SIDE = 'L', M >= K >= 0;
             if SIDE = 'R', N >= K >= 0.
 
-    A       (input) REAL array, dimension
+    A       (input) DOUBLE PRECISION array, dimension
                                  (LDA,M) if SIDE = 'L',
                                  (LDA,N) if SIDE = 'R'
             The i-th row must contain the vector which defines the
             elementary reflector H(i), for i = 1,2,...,k, as returned by
-            SGELQF in the first k rows of its array argument A.
+            DGELQF in the first k rows of its array argument A.
             A is modified by the routine but restored on exit.
 
     LDA     (input) INTEGER
             The leading dimension of the array A. LDA >= max(1,K).
 
-    TAU     (input) REAL array, dimension (K)
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by SGELQF.
+            reflector H(i), as returned by DGELQF.
 
-    C       (input/output) REAL array, dimension (LDC,N)
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
             On entry, the m by n matrix C.
             On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace) REAL array, dimension
+    WORK    (workspace) DOUBLE PRECISION array, dimension
                                      (N) if SIDE = 'L',
                                      (M) if SIDE = 'R'
 
@@ -94914,7 +29886,7 @@ L110:
 	*info = -3;
     } else if (*n < 0) {
 	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
+    } else if (*k < 0 || *k > nq) {
 	*info = -5;
     } else if (*lda < max(1,*k)) {
 	*info = -7;
@@ -94923,17 +29895,17 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORML2", &i__1);
+	xerbla_("DORML2", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+    if (*m == 0 || *n == 0 || *k == 0) {
 	return 0;
     }
 
-    if ((left && notran) || (! left && ! notran)) {
+    if (left && notran || ! left && ! notran) {
 	i1 = 1;
 	i2 = *k;
 	i3 = 1;
@@ -94971,21 +29943,21 @@ L110:
 /*        Apply H(i) */
 
 	aii = a[i__ + i__ * a_dim1];
-	a[i__ + i__ * a_dim1] = 1.f;
-	slarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[
+	a[i__ + i__ * a_dim1] = 1.;
+	dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[
 		ic + jc * c_dim1], ldc, &work[1]);
 	a[i__ + i__ * a_dim1] = aii;
 /* L10: */
     }
     return 0;
 
-/*     End of SORML2 */
+/*     End of DORML2 */
 
-} /* sorml2_ */
+} /* dorml2_ */
 
-/* Subroutine */ int sormlq_(char *side, char *trans, integer *m, integer *n,
-	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
-	 real *work, integer *lwork, integer *info)
+/* Subroutine */ int dormlq_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     address a__1[2];
@@ -94998,20 +29970,20 @@ L110:
 
     /* Local variables */
     static integer i__;
-    static real t[4160]	/* was [65][64] */;
+    static doublereal t[4160]	/* was [65][64] */;
     static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
     static logical left;
     extern logical lsame_(char *, char *);
     static integer nbmin, iinfo;
-    extern /* Subroutine */ int sorml2_(char *, char *, integer *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *, real *,
-	    integer *), slarfb_(char *, char *, char *, char *
-	    , integer *, integer *, integer *, real *, integer *, real *,
-	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int dorml2_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *), dlarfb_(char
+	    *, char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
-	    real *, integer *, real *, real *, integer *);
     static logical notran;
     static integer ldwork;
     static char transt[1];
@@ -95029,7 +30001,7 @@ L110:
     Purpose
     =======
 
-    SORMLQ overwrites the general real M-by-N matrix C with
+    DORMLQ overwrites the general real M-by-N matrix C with
 
                     SIDE = 'L'     SIDE = 'R'
     TRANS = 'N':      Q * C          C * Q
@@ -95040,7 +30012,7 @@ L110:
 
           Q = H(k) . . . H(2) H(1)
 
-    as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N
+    as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N
     if SIDE = 'R'.
 
     Arguments
@@ -95066,29 +30038,29 @@ L110:
             If SIDE = 'L', M >= K >= 0;
             if SIDE = 'R', N >= K >= 0.
 
-    A       (input) REAL array, dimension
+    A       (input) DOUBLE PRECISION array, dimension
                                  (LDA,M) if SIDE = 'L',
                                  (LDA,N) if SIDE = 'R'
             The i-th row must contain the vector which defines the
             elementary reflector H(i), for i = 1,2,...,k, as returned by
-            SGELQF in the first k rows of its array argument A.
+            DGELQF in the first k rows of its array argument A.
             A is modified by the routine but restored on exit.
 
     LDA     (input) INTEGER
             The leading dimension of the array A. LDA >= max(1,K).
 
-    TAU     (input) REAL array, dimension (K)
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by SGELQF.
+            reflector H(i), as returned by DGELQF.
 
-    C       (input/output) REAL array, dimension (LDC,N)
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
             On entry, the M-by-N matrix C.
             On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -95147,7 +30119,7 @@ L110:
 	*info = -3;
     } else if (*n < 0) {
 	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
+    } else if (*k < 0 || *k > nq) {
 	*info = -5;
     } else if (*lda < max(1,*k)) {
 	*info = -7;
@@ -95169,16 +30141,16 @@ L110:
 	i__3[0] = 1, a__1[0] = side;
 	i__3[1] = 1, a__1[1] = trans;
 	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "SORMLQ", ch__1, m, n, k, &c_n1, (
+	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1, (
 		ftnlen)6, (ftnlen)2);
 	nb = min(i__1,i__2);
 	lwkopt = max(1,nw) * nb;
-	work[1] = (real) lwkopt;
+	work[1] = (doublereal) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORMLQ", &i__1);
+	xerbla_("DORMLQ", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -95186,8 +30158,8 @@ L110:
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	work[1] = 1.f;
+    if (*m == 0 || *n == 0 || *k == 0) {
+	work[1] = 1.;
 	return 0;
     }
 
@@ -95204,7 +30176,7 @@ L110:
 	    i__3[0] = 1, a__1[0] = side;
 	    i__3[1] = 1, a__1[1] = trans;
 	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	    i__1 = 2, i__2 = ilaenv_(&c__2, "SORMLQ", ch__1, m, n, k, &c_n1, (
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1, (
 		    ftnlen)6, (ftnlen)2);
 	    nbmin = max(i__1,i__2);
 	}
@@ -95212,17 +30184,17 @@ L110:
 	iws = nw;
     }
 
-    if ((nb < nbmin) || (nb >= *k)) {
+    if (nb < nbmin || nb >= *k) {
 
 /*        Use unblocked code */
 
-	sorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+	dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
 		c_offset], ldc, &work[1], &iinfo);
     } else {
 
 /*        Use blocked code */
 
-	if ((left && notran) || (! left && ! notran)) {
+	if (left && notran || ! left && ! notran) {
 	    i1 = 1;
 	    i2 = *k;
 	    i3 = nb;
@@ -95259,7 +30231,7 @@ L110:
 */
 
 	    i__4 = nq - i__ + 1;
-	    slarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1],
+	    dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1],
 		    lda, &tau[i__], t, &c__65);
 	    if (left) {
 
@@ -95277,22 +30249,22 @@ L110:
 
 /*           Apply H or H' */
 
-	    slarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__
+	    dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__
 		    + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1],
 		    ldc, &work[1], &ldwork);
 /* L10: */
 	}
     }
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     return 0;
 
-/*     End of SORMLQ */
+/*     End of DORMLQ */
 
-} /* sormlq_ */
+} /* dormlq_ */
 
-/* Subroutine */ int sormql_(char *side, char *trans, integer *m, integer *n,
-	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
-	 real *work, integer *lwork, integer *info)
+/* Subroutine */ int dormql_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     address a__1[2];
@@ -95305,20 +30277,20 @@ L110:
 
     /* Local variables */
     static integer i__;
-    static real t[4160]	/* was [65][64] */;
+    static doublereal t[4160]	/* was [65][64] */;
     static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws;
     static logical left;
     extern logical lsame_(char *, char *);
     static integer nbmin, iinfo;
-    extern /* Subroutine */ int sorm2l_(char *, char *, integer *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *, real *,
-	    integer *), slarfb_(char *, char *, char *, char *
-	    , integer *, integer *, integer *, real *, integer *, real *,
-	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int dorm2l_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *), dlarfb_(char
+	    *, char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
-	    real *, integer *, real *, real *, integer *);
     static logical notran;
     static integer ldwork, lwkopt;
     static logical lquery;
@@ -95334,7 +30306,7 @@ L110:
     Purpose
     =======
 
-    SORMQL overwrites the general real M-by-N matrix C with
+    DORMQL overwrites the general real M-by-N matrix C with
 
                     SIDE = 'L'     SIDE = 'R'
     TRANS = 'N':      Q * C          C * Q
@@ -95345,7 +30317,7 @@ L110:
 
           Q = H(k) . . . H(2) H(1)
 
-    as returned by SGEQLF. Q is of order M if SIDE = 'L' and of order N
+    as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N
     if SIDE = 'R'.
 
     Arguments
@@ -95371,10 +30343,10 @@ L110:
             If SIDE = 'L', M >= K >= 0;
             if SIDE = 'R', N >= K >= 0.
 
-    A       (input) REAL array, dimension (LDA,K)
+    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
             The i-th column must contain the vector which defines the
             elementary reflector H(i), for i = 1,2,...,k, as returned by
-            SGEQLF in the last k columns of its array argument A.
+            DGEQLF in the last k columns of its array argument A.
             A is modified by the routine but restored on exit.
 
     LDA     (input) INTEGER
@@ -95382,18 +30354,18 @@ L110:
             If SIDE = 'L', LDA >= max(1,M);
             if SIDE = 'R', LDA >= max(1,N).
 
-    TAU     (input) REAL array, dimension (K)
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by SGEQLF.
+            reflector H(i), as returned by DGEQLF.
 
-    C       (input/output) REAL array, dimension (LDC,N)
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
             On entry, the M-by-N matrix C.
             On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -95452,7 +30424,7 @@ L110:
 	*info = -3;
     } else if (*n < 0) {
 	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
+    } else if (*k < 0 || *k > nq) {
 	*info = -5;
     } else if (*lda < max(1,nq)) {
 	*info = -7;
@@ -95474,16 +30446,16 @@ L110:
 	i__3[0] = 1, a__1[0] = side;
 	i__3[1] = 1, a__1[1] = trans;
 	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQL", ch__1, m, n, k, &c_n1, (
+	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQL", ch__1, m, n, k, &c_n1, (
 		ftnlen)6, (ftnlen)2);
 	nb = min(i__1,i__2);
 	lwkopt = max(1,nw) * nb;
-	work[1] = (real) lwkopt;
+	work[1] = (doublereal) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORMQL", &i__1);
+	xerbla_("DORMQL", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -95491,8 +30463,8 @@ L110:
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	work[1] = 1.f;
+    if (*m == 0 || *n == 0 || *k == 0) {
+	work[1] = 1.;
 	return 0;
     }
 
@@ -95509,7 +30481,7 @@ L110:
 	    i__3[0] = 1, a__1[0] = side;
 	    i__3[1] = 1, a__1[1] = trans;
 	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	    i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQL", ch__1, m, n, k, &c_n1, (
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQL", ch__1, m, n, k, &c_n1, (
 		    ftnlen)6, (ftnlen)2);
 	    nbmin = max(i__1,i__2);
 	}
@@ -95517,17 +30489,17 @@ L110:
 	iws = nw;
     }
 
-    if ((nb < nbmin) || (nb >= *k)) {
+    if (nb < nbmin || nb >= *k) {
 
 /*        Use unblocked code */
 
-	sorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+	dorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
 		c_offset], ldc, &work[1], &iinfo);
     } else {
 
 /*        Use blocked code */
 
-	if ((left && notran) || (! left && ! notran)) {
+	if (left && notran || ! left && ! notran) {
 	    i1 = 1;
 	    i2 = *k;
 	    i3 = nb;
@@ -95556,7 +30528,7 @@ L110:
 */
 
 	    i__4 = nq - *k + i__ + ib - 1;
-	    slarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1]
+	    dlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1]
 		    , lda, &tau[i__], t, &c__65);
 	    if (left) {
 
@@ -95572,22 +30544,22 @@ L110:
 
 /*           Apply H or H' */
 
-	    slarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[
+	    dlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[
 		    i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, &
 		    work[1], &ldwork);
 /* L10: */
 	}
     }
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     return 0;
 
-/*     End of SORMQL */
+/*     End of DORMQL */
 
-} /* sormql_ */
+} /* dormql_ */
 
-/* Subroutine */ int sormqr_(char *side, char *trans, integer *m, integer *n,
-	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
-	 real *work, integer *lwork, integer *info)
+/* Subroutine */ int dormqr_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     address a__1[2];
@@ -95600,20 +30572,20 @@ L110:
 
     /* Local variables */
     static integer i__;
-    static real t[4160]	/* was [65][64] */;
+    static doublereal t[4160]	/* was [65][64] */;
     static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
     static logical left;
     extern logical lsame_(char *, char *);
     static integer nbmin, iinfo;
-    extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *, real *,
-	    integer *), slarfb_(char *, char *, char *, char *
-	    , integer *, integer *, integer *, real *, integer *, real *,
-	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int dorm2r_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *), dlarfb_(char
+	    *, char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
-	    real *, integer *, real *, real *, integer *);
     static logical notran;
     static integer ldwork, lwkopt;
     static logical lquery;
@@ -95629,7 +30601,7 @@ L110:
     Purpose
     =======
 
-    SORMQR overwrites the general real M-by-N matrix C with
+    DORMQR overwrites the general real M-by-N matrix C with
 
                     SIDE = 'L'     SIDE = 'R'
     TRANS = 'N':      Q * C          C * Q
@@ -95640,7 +30612,7 @@ L110:
 
           Q = H(1) H(2) . . . H(k)
 
-    as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N
+    as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
     if SIDE = 'R'.
 
     Arguments
@@ -95666,10 +30638,10 @@ L110:
             If SIDE = 'L', M >= K >= 0;
             if SIDE = 'R', N >= K >= 0.
 
-    A       (input) REAL array, dimension (LDA,K)
+    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
             The i-th column must contain the vector which defines the
             elementary reflector H(i), for i = 1,2,...,k, as returned by
-            SGEQRF in the first k columns of its array argument A.
+            DGEQRF in the first k columns of its array argument A.
             A is modified by the routine but restored on exit.
 
     LDA     (input) INTEGER
@@ -95677,18 +30649,18 @@ L110:
             If SIDE = 'L', LDA >= max(1,M);
             if SIDE = 'R', LDA >= max(1,N).
 
-    TAU     (input) REAL array, dimension (K)
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by SGEQRF.
+            reflector H(i), as returned by DGEQRF.
 
-    C       (input/output) REAL array, dimension (LDC,N)
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
             On entry, the M-by-N matrix C.
             On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -95747,7 +30719,7 @@ L110:
 	*info = -3;
     } else if (*n < 0) {
 	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
+    } else if (*k < 0 || *k > nq) {
 	*info = -5;
     } else if (*lda < max(1,nq)) {
 	*info = -7;
@@ -95769,16 +30741,16 @@ L110:
 	i__3[0] = 1, a__1[0] = side;
 	i__3[1] = 1, a__1[1] = trans;
 	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQR", ch__1, m, n, k, &c_n1, (
+	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, (
 		ftnlen)6, (ftnlen)2);
 	nb = min(i__1,i__2);
 	lwkopt = max(1,nw) * nb;
-	work[1] = (real) lwkopt;
+	work[1] = (doublereal) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SORMQR", &i__1);
+	xerbla_("DORMQR", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -95786,8 +30758,8 @@ L110:
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
-	work[1] = 1.f;
+    if (*m == 0 || *n == 0 || *k == 0) {
+	work[1] = 1.;
 	return 0;
     }
 
@@ -95804,7 +30776,7 @@ L110:
 	    i__3[0] = 1, a__1[0] = side;
 	    i__3[1] = 1, a__1[1] = trans;
 	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	    i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQR", ch__1, m, n, k, &c_n1, (
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, (
 		    ftnlen)6, (ftnlen)2);
 	    nbmin = max(i__1,i__2);
 	}
@@ -95812,17 +30784,17 @@ L110:
 	iws = nw;
     }
 
-    if ((nb < nbmin) || (nb >= *k)) {
+    if (nb < nbmin || nb >= *k) {
 
 /*        Use unblocked code */
 
-	sorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+	dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
 		c_offset], ldc, &work[1], &iinfo);
     } else {
 
 /*        Use blocked code */
 
-	if ((left && ! notran) || (! left && notran)) {
+	if (left && ! notran || ! left && notran) {
 	    i1 = 1;
 	    i2 = *k;
 	    i3 = nb;
@@ -95853,7 +30825,7 @@ L110:
 */
 
 	    i__4 = nq - i__ + 1;
-	    slarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ *
+	    dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ *
 		    a_dim1], lda, &tau[i__], t, &c__65)
 		    ;
 	    if (left) {
@@ -95872,22 +30844,22 @@ L110:
 
 /*           Apply H or H' */
 
-	    slarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
+	    dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
 		    i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc *
 		    c_dim1], ldc, &work[1], &ldwork);
 /* L10: */
 	}
     }
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     return 0;
 
-/*     End of SORMQR */
+/*     End of DORMQR */
 
-} /* sormqr_ */
+} /* dormqr_ */
 
-/* Subroutine */ int sormtr_(char *side, char *uplo, char *trans, integer *m,
-	integer *n, real *a, integer *lda, real *tau, real *c__, integer *ldc,
-	 real *work, integer *lwork, integer *info)
+/* Subroutine */ int dormtr_(char *side, char *uplo, char *trans, integer *m,
+	integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     address a__1[2];
@@ -95906,14 +30878,14 @@ L110:
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int sormql_(char *, char *, integer *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *, real *,
-	    integer *, integer *);
+    extern /* Subroutine */ int dormql_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *),
+	    dormqr_(char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, integer *);
     static integer lwkopt;
     static logical lquery;
-    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *, real *,
-	    integer *, integer *);
 
 
 /*
@@ -95926,7 +30898,7 @@ L110:
     Purpose
     =======
 
-    SORMTR overwrites the general real M-by-N matrix C with
+    DORMTR overwrites the general real M-by-N matrix C with
 
                     SIDE = 'L'     SIDE = 'R'
     TRANS = 'N':      Q * C          C * Q
@@ -95934,7 +30906,7 @@ L110:
 
     where Q is a real orthogonal matrix of order nq, with nq = m if
     SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
-    nq-1 elementary reflectors, as returned by SSYTRD:
+    nq-1 elementary reflectors, as returned by DSYTRD:
 
     if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
 
@@ -95949,9 +30921,9 @@ L110:
 
     UPLO    (input) CHARACTER*1
             = 'U': Upper triangle of A contains elementary reflectors
-                   from SSYTRD;
+                   from DSYTRD;
             = 'L': Lower triangle of A contains elementary reflectors
-                   from SSYTRD.
+                   from DSYTRD.
 
     TRANS   (input) CHARACTER*1
             = 'N':  No transpose, apply Q;
@@ -95963,30 +30935,30 @@ L110:
     N       (input) INTEGER
             The number of columns of the matrix C. N >= 0.
 
-    A       (input) REAL array, dimension
+    A       (input) DOUBLE PRECISION array, dimension
                                  (LDA,M) if SIDE = 'L'
                                  (LDA,N) if SIDE = 'R'
             The vectors which define the elementary reflectors, as
-            returned by SSYTRD.
+            returned by DSYTRD.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.
             LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
 
-    TAU     (input) REAL array, dimension
+    TAU     (input) DOUBLE PRECISION array, dimension
                                  (M-1) if SIDE = 'L'
                                  (N-1) if SIDE = 'R'
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by SSYTRD.
+            reflector H(i), as returned by DSYTRD.
 
-    C       (input/output) REAL array, dimension (LDC,N)
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
             On entry, the M-by-N matrix C.
             On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -96065,7 +31037,7 @@ L110:
 		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
 		i__2 = *m - 1;
 		i__3 = *m - 1;
-		nb = ilaenv_(&c__1, "SORMQL", ch__1, &i__2, n, &i__3, &c_n1, (
+		nb = ilaenv_(&c__1, "DORMQL", ch__1, &i__2, n, &i__3, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    } else {
 /* Writing concatenation */
@@ -96074,7 +31046,7 @@ L110:
 		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
 		i__2 = *n - 1;
 		i__3 = *n - 1;
-		nb = ilaenv_(&c__1, "SORMQL", ch__1, m, &i__2, &i__3, &c_n1, (
+		nb = ilaenv_(&c__1, "DORMQL", ch__1, m, &i__2, &i__3, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    }
 	} else {
@@ -96085,7 +31057,7 @@ L110:
 		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
 		i__2 = *m - 1;
 		i__3 = *m - 1;
-		nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__2, n, &i__3, &c_n1, (
+		nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__2, n, &i__3, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    } else {
 /* Writing concatenation */
@@ -96094,17 +31066,17 @@ L110:
 		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
 		i__2 = *n - 1;
 		i__3 = *n - 1;
-		nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__2, &i__3, &c_n1, (
+		nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__2, &i__3, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    }
 	}
 	lwkopt = max(1,nw) * nb;
-	work[1] = (real) lwkopt;
+	work[1] = (doublereal) lwkopt;
     }
 
     if (*info != 0) {
 	i__2 = -(*info);
-	xerbla_("SORMTR", &i__2);
+	xerbla_("DORMTR", &i__2);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -96112,8 +31084,8 @@ L110:
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (nq == 1)) {
-	work[1] = 1.f;
+    if (*m == 0 || *n == 0 || nq == 1) {
+	work[1] = 1.;
 	return 0;
     }
 
@@ -96127,14 +31099,14 @@ L110:
 
     if (upper) {
 
-/*        Q was determined by a call to SSYTRD with UPLO = 'U' */
+/*        Q was determined by a call to DSYTRD with UPLO = 'U' */
 
 	i__2 = nq - 1;
-	sormql_(side, trans, &mi, &ni, &i__2, &a[((a_dim1) << (1)) + 1], lda,
-		&tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
+	dormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, &
+		tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
     } else {
 
-/*        Q was determined by a call to SSYTRD with UPLO = 'L' */
+/*        Q was determined by a call to DSYTRD with UPLO = 'L' */
 
 	if (left) {
 	    i1 = 2;
@@ -96144,34 +31116,37 @@ L110:
 	    i2 = 2;
 	}
 	i__2 = nq - 1;
-	sormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], &
+	dormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], &
 		c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
     }
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     return 0;
 
-/*     End of SORMTR */
+/*     End of DORMTR */
 
-} /* sormtr_ */
+} /* dormtr_ */
 
-/* Subroutine */ int spotf2_(char *uplo, integer *n, real *a, integer *lda,
-	integer *info)
+/* Subroutine */ int dpotf2_(char *uplo, integer *n, doublereal *a, integer *
+	lda, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
-    real r__1;
+    doublereal d__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
     static integer j;
-    static real ajj;
-    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
+    static doublereal ajj;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
-	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
-	    real *, integer *, real *, real *, integer *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
     static logical upper;
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
@@ -96186,7 +31161,7 @@ L110:
     Purpose
     =======
 
-    SPOTF2 computes the Cholesky factorization of a real symmetric
+    DPOTF2 computes the Cholesky factorization of a real symmetric
     positive definite matrix A.
 
     The factorization has the form
@@ -96208,7 +31183,7 @@ L110:
     N       (input) INTEGER
             The order of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the symmetric matrix A.  If UPLO = 'U', the leading
             n by n upper triangular part of A contains the upper
             triangular part of the matrix A, and the strictly lower
@@ -96253,7 +31228,7 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SPOTF2", &i__1);
+	xerbla_("DPOTF2", &i__1);
 	return 0;
     }
 
@@ -96273,9 +31248,9 @@ L110:
 /*           Compute U(J,J) and test for non-positive-definiteness. */
 
 	    i__2 = j - 1;
-	    ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j * a_dim1 + 1], &c__1,
+	    ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j * a_dim1 + 1], &c__1,
 		    &a[j * a_dim1 + 1], &c__1);
-	    if (ajj <= 0.f) {
+	    if (ajj <= 0.) {
 		a[j + j * a_dim1] = ajj;
 		goto L30;
 	    }
@@ -96287,12 +31262,12 @@ L110:
 	    if (j < *n) {
 		i__2 = j - 1;
 		i__3 = *n - j;
-		sgemv_("Transpose", &i__2, &i__3, &c_b1290, &a[(j + 1) *
-			a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b1011,
-			 &a[j + (j + 1) * a_dim1], lda);
+		dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(j + 1) *
+			a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b15, &
+			a[j + (j + 1) * a_dim1], lda);
 		i__2 = *n - j;
-		r__1 = 1.f / ajj;
-		sscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda);
+		d__1 = 1. / ajj;
+		dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda);
 	    }
 /* L10: */
 	}
@@ -96306,9 +31281,9 @@ L110:
 /*           Compute L(J,J) and test for non-positive-definiteness. */
 
 	    i__2 = j - 1;
-	    ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j + a_dim1], lda, &a[j
+	    ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j + a_dim1], lda, &a[j
 		    + a_dim1], lda);
-	    if (ajj <= 0.f) {
+	    if (ajj <= 0.) {
 		a[j + j * a_dim1] = ajj;
 		goto L30;
 	    }
@@ -96320,12 +31295,12 @@ L110:
 	    if (j < *n) {
 		i__2 = *n - j;
 		i__3 = j - 1;
-		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[j + 1 +
-			a_dim1], lda, &a[j + a_dim1], lda, &c_b1011, &a[j + 1
-			+ j * a_dim1], &c__1);
+		dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[j + 1 +
+			a_dim1], lda, &a[j + a_dim1], lda, &c_b15, &a[j + 1 +
+			j * a_dim1], &c__1);
 		i__2 = *n - j;
-		r__1 = 1.f / ajj;
-		sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
+		d__1 = 1. / ajj;
+		dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
 	    }
 /* L20: */
 	}
@@ -96338,29 +31313,31 @@ L30:
 L40:
     return 0;
 
-/*     End of SPOTF2 */
+/*     End of DPOTF2 */
 
-} /* spotf2_ */
+} /* dpotf2_ */
 
-/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda,
-	integer *info)
+/* Subroutine */ int dpotrf_(char *uplo, integer *n, doublereal *a, integer *
+	lda, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
     static integer j, jb, nb;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *);
     static logical upper;
-    extern /* Subroutine */ int strsm_(char *, char *, char *, char *,
-	    integer *, integer *, real *, real *, integer *, real *, integer *
-	    ), ssyrk_(char *, char *, integer
-	    *, integer *, real *, real *, integer *, real *, real *, integer *
-	    ), spotf2_(char *, integer *, real *, integer *,
-	    integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
+	     integer *), dpotf2_(char *, integer *,
+	    doublereal *, integer *, integer *), xerbla_(char *,
+	    integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
 
@@ -96375,7 +31352,7 @@ L40:
     Purpose
     =======
 
-    SPOTRF computes the Cholesky factorization of a real symmetric
+    DPOTRF computes the Cholesky factorization of a real symmetric
     positive definite matrix A.
 
     The factorization has the form
@@ -96395,7 +31372,7 @@ L40:
     N       (input) INTEGER
             The order of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the symmetric matrix A.  If UPLO = 'U', the leading
             N-by-N upper triangular part of A contains the upper
             triangular part of the matrix A, and the strictly lower
@@ -96440,7 +31417,7 @@ L40:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SPOTRF", &i__1);
+	xerbla_("DPOTRF", &i__1);
 	return 0;
     }
 
@@ -96452,13 +31429,13 @@ L40:
 
 /*     Determine the block size for this environment. */
 
-    nb = ilaenv_(&c__1, "SPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+    nb = ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
-    if ((nb <= 1) || (nb >= *n)) {
+    if (nb <= 1 || nb >= *n) {
 
 /*        Use unblocked code. */
 
-	spotf2_(uplo, n, &a[a_offset], lda, info);
+	dpotf2_(uplo, n, &a[a_offset], lda, info);
     } else {
 
 /*        Use blocked code. */
@@ -96480,9 +31457,9 @@ L40:
 		i__3 = nb, i__4 = *n - j + 1;
 		jb = min(i__3,i__4);
 		i__3 = j - 1;
-		ssyrk_("Upper", "Transpose", &jb, &i__3, &c_b1290, &a[j *
-			a_dim1 + 1], lda, &c_b1011, &a[j + j * a_dim1], lda);
-		spotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
+		dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b151, &a[j *
+			a_dim1 + 1], lda, &c_b15, &a[j + j * a_dim1], lda);
+		dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
 		if (*info != 0) {
 		    goto L30;
 		}
@@ -96492,14 +31469,14 @@ L40:
 
 		    i__3 = *n - j - jb + 1;
 		    i__4 = j - 1;
-		    sgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, &
-			    c_b1290, &a[j * a_dim1 + 1], lda, &a[(j + jb) *
-			    a_dim1 + 1], lda, &c_b1011, &a[j + (j + jb) *
+		    dgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, &
+			    c_b151, &a[j * a_dim1 + 1], lda, &a[(j + jb) *
+			    a_dim1 + 1], lda, &c_b15, &a[j + (j + jb) *
 			    a_dim1], lda);
 		    i__3 = *n - j - jb + 1;
-		    strsm_("Left", "Upper", "Transpose", "Non-unit", &jb, &
-			    i__3, &c_b1011, &a[j + j * a_dim1], lda, &a[j + (
-			    j + jb) * a_dim1], lda);
+		    dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, &
+			    i__3, &c_b15, &a[j + j * a_dim1], lda, &a[j + (j
+			    + jb) * a_dim1], lda);
 		}
 /* L10: */
 	    }
@@ -96521,9 +31498,9 @@ L40:
 		i__3 = nb, i__4 = *n - j + 1;
 		jb = min(i__3,i__4);
 		i__3 = j - 1;
-		ssyrk_("Lower", "No transpose", &jb, &i__3, &c_b1290, &a[j +
-			a_dim1], lda, &c_b1011, &a[j + j * a_dim1], lda);
-		spotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
+		dsyrk_("Lower", "No transpose", &jb, &i__3, &c_b151, &a[j +
+			a_dim1], lda, &c_b15, &a[j + j * a_dim1], lda);
+		dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
 		if (*info != 0) {
 		    goto L30;
 		}
@@ -96533,13 +31510,13 @@ L40:
 
 		    i__3 = *n - j - jb + 1;
 		    i__4 = j - 1;
-		    sgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &
-			    c_b1290, &a[j + jb + a_dim1], lda, &a[j + a_dim1],
-			     lda, &c_b1011, &a[j + jb + j * a_dim1], lda);
+		    dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &
+			    c_b151, &a[j + jb + a_dim1], lda, &a[j + a_dim1],
+			    lda, &c_b15, &a[j + jb + j * a_dim1], lda);
 		    i__3 = *n - j - jb + 1;
-		    strsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, &
-			    jb, &c_b1011, &a[j + j * a_dim1], lda, &a[j + jb
-			    + j * a_dim1], lda);
+		    dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, &
+			    jb, &c_b15, &a[j + j * a_dim1], lda, &a[j + jb +
+			    j * a_dim1], lda);
 		}
 /* L20: */
 	    }
@@ -96553,257 +31530,17 @@ L30:
 L40:
     return 0;
 
-/*     End of SPOTRF */
-
-} /* spotrf_ */
-
-/* Subroutine */ int spotri_(char *uplo, integer *n, real *a, integer *lda,
-	integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1;
-
-    /* Local variables */
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *), slauum_(
-	    char *, integer *, real *, integer *, integer *), strtri_(
-	    char *, char *, integer *, real *, integer *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
-
-
-    Purpose
-    =======
-
-    SPOTRI computes the inverse of a real symmetric positive definite
-    matrix A using the Cholesky factorization A = U**T*U or A = L*L**T
-    computed by SPOTRF.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    A       (input/output) REAL array, dimension (LDA,N)
-            On entry, the triangular factor U or L from the Cholesky
-            factorization A = U**T*U or A = L*L**T, as computed by
-            SPOTRF.
-            On exit, the upper or lower triangle of the (symmetric)
-            inverse of A, overwriting the input factor U or L.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the (i,i) element of the factor U or L is
-                  zero, and the inverse could not be computed.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("SPOTRI", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Invert the triangular Cholesky factor U or L. */
-
-    strtri_(uplo, "Non-unit", n, &a[a_offset], lda, info);
-    if (*info > 0) {
-	return 0;
-    }
-
-/*     Form inv(U)*inv(U)' or inv(L)'*inv(L). */
-
-    slauum_(uplo, n, &a[a_offset], lda, info);
-
-    return 0;
-
-/*     End of SPOTRI */
-
-} /* spotri_ */
-
-/* Subroutine */ int spotrs_(char *uplo, integer *n, integer *nrhs, real *a,
-	integer *lda, real *b, integer *ldb, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
-
-    /* Local variables */
-    extern logical lsame_(char *, char *);
-    static logical upper;
-    extern /* Subroutine */ int strsm_(char *, char *, char *, char *,
-	    integer *, integer *, real *, real *, integer *, real *, integer *
-	    ), xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
-
-
-    Purpose
-    =======
-
-    SPOTRS solves a system of linear equations A*X = B with a symmetric
-    positive definite matrix A using the Cholesky factorization
-    A = U**T*U or A = L*L**T computed by SPOTRF.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrix B.  NRHS >= 0.
-
-    A       (input) REAL array, dimension (LDA,N)
-            The triangular factor U or L from the Cholesky factorization
-            A = U**T*U or A = L*L**T, as computed by SPOTRF.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    B       (input/output) REAL array, dimension (LDB,NRHS)
-            On entry, the right hand side matrix B.
-            On exit, the solution matrix X.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*nrhs < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*ldb < max(1,*n)) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("SPOTRS", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*n == 0) || (*nrhs == 0)) {
-	return 0;
-    }
-
-    if (upper) {
-
-/*
-          Solve A*X = B where A = U'*U.
-
-          Solve U'*X = B, overwriting B with X.
-*/
-
-	strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b1011, &
-		a[a_offset], lda, &b[b_offset], ldb);
-
-/*        Solve U*X = B, overwriting B with X. */
-
-	strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b1011,
-		 &a[a_offset], lda, &b[b_offset], ldb);
-    } else {
-
-/*
-          Solve A*X = B where A = L*L'.
-
-          Solve L*X = B, overwriting B with X.
-*/
-
-	strsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b1011,
-		 &a[a_offset], lda, &b[b_offset], ldb);
-
-/*        Solve L'*X = B, overwriting B with X. */
-
-	strsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b1011, &
-		a[a_offset], lda, &b[b_offset], ldb);
-    }
-
-    return 0;
-
-/*     End of SPOTRS */
+/*     End of DPOTRF */
 
-} /* spotrs_ */
+} /* dpotrf_ */
 
-/* Subroutine */ int sstedc_(char *compz, integer *n, real *d__, real *e,
-	real *z__, integer *ldz, real *work, integer *lwork, integer *iwork,
-	integer *liwork, integer *info)
+/* Subroutine */ int dstedc_(char *compz, integer *n, doublereal *d__,
+	doublereal *e, doublereal *z__, integer *ldz, doublereal *work,
+	integer *lwork, integer *iwork, integer *liwork, integer *info)
 {
     /* System generated locals */
     integer z_dim1, z_offset, i__1, i__2;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Builtin functions */
     double log(doublereal);
@@ -96812,36 +31549,39 @@ L40:
 
     /* Local variables */
     static integer i__, j, k, m;
-    static real p;
+    static doublereal p;
     static integer ii, end, lgn;
-    static real eps, tiny;
+    static doublereal eps, tiny;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
-	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *);
-    static integer lwmin, start;
-    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
-	    integer *), slaed0_(integer *, integer *, integer *, real *, real
-	    *, real *, integer *, real *, integer *, real *, integer *,
-	    integer *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer lwmin;
+    extern /* Subroutine */ int dlaed0_(integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, doublereal *, integer *, integer *);
+    static integer start;
+
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlacpy_(char *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, integer *),
+	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *,
-	    real *, integer *), slaset_(char *, integer *, integer *,
-	    real *, real *, real *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
+	     integer *), dlasrt_(char *, integer *, doublereal *, integer *);
     static integer liwmin, icompz;
-    static real orgnrm;
-    extern doublereal slanst_(char *, integer *, real *, real *);
-    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *),
-	     slasrt_(char *, integer *, real *, integer *);
+    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *);
+    static doublereal orgnrm;
     static logical lquery;
-    static integer smlsiz;
-    extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *,
-	    real *, integer *, real *, integer *);
-    static integer storez, strtrw;
+    static integer smlsiz, dtrtrw, storez;
 
 
 /*
@@ -96854,10 +31594,10 @@ L40:
     Purpose
     =======
 
-    SSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+    DSTEDC computes all eigenvalues and, optionally, eigenvectors of a
     symmetric tridiagonal matrix using the divide and conquer method.
     The eigenvectors of a full or band real symmetric matrix can also be
-    found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this
+    found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this
     matrix to tridiagonal form.
 
     This code makes very mild assumptions about floating point
@@ -96865,7 +31605,7 @@ L40:
     add/subtract, or on those binary machines without guard digits
     which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
     It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.  See SLAED3 for details.
+    without guard digits, but we know of none.  See DLAED3 for details.
 
     Arguments
     =========
@@ -96881,15 +31621,15 @@ L40:
     N       (input) INTEGER
             The dimension of the symmetric tridiagonal matrix.  N >= 0.
 
-    D       (input/output) REAL array, dimension (N)
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
             On entry, the diagonal elements of the tridiagonal matrix.
             On exit, if INFO = 0, the eigenvalues in ascending order.
 
-    E       (input/output) REAL array, dimension (N-1)
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
             On entry, the subdiagonal elements of the tridiagonal matrix.
             On exit, E has been destroyed.
 
-    Z       (input/output) REAL array, dimension (LDZ,N)
+    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
             On entry, if COMPZ = 'V', then Z contains the orthogonal
             matrix used in the reduction to tridiagonal form.
             On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
@@ -96902,7 +31642,7 @@ L40:
             The leading dimension of the array Z.  LDZ >= 1.
             If eigenvectors are desired, then LDZ >= max(1,N).
 
-    WORK    (workspace/output) REAL array,
+    WORK    (workspace/output) DOUBLE PRECISION array,
                                            dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
@@ -96969,7 +31709,7 @@ L40:
 
     /* Function Body */
     *info = 0;
-    lquery = (*lwork == -1) || (*liwork == -1);
+    lquery = *lwork == -1 || *liwork == -1;
 
     if (lsame_(compz, "N")) {
 	icompz = 0;
@@ -96980,11 +31720,11 @@ L40:
     } else {
 	icompz = -1;
     }
-    if ((*n <= 1) || (icompz <= 0)) {
+    if (*n <= 1 || icompz <= 0) {
 	liwmin = 1;
 	lwmin = 1;
     } else {
-	lgn = (integer) (log((real) (*n)) / log(2.f));
+	lgn = (integer) (log((doublereal) (*n)) / log(2.));
 	if (pow_ii(&c__2, &lgn) < *n) {
 	    ++lgn;
 	}
@@ -96994,12 +31734,12 @@ L40:
 	if (icompz == 1) {
 /* Computing 2nd power */
 	    i__1 = *n;
-	    lwmin = *n * 3 + 1 + ((*n) << (1)) * lgn + i__1 * i__1 * 3;
+	    lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3;
 	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
 	} else if (icompz == 2) {
 /* Computing 2nd power */
 	    i__1 = *n;
-	    lwmin = ((*n) << (2)) + 1 + i__1 * i__1;
+	    lwmin = (*n << 2) + 1 + i__1 * i__1;
 	    liwmin = *n * 5 + 3;
 	}
     }
@@ -97007,7 +31747,7 @@ L40:
 	*info = -1;
     } else if (*n < 0) {
 	*info = -2;
-    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
+    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
 	*info = -6;
     } else if (*lwork < lwmin && ! lquery) {
 	*info = -8;
@@ -97016,13 +31756,13 @@ L40:
     }
 
     if (*info == 0) {
-	work[1] = (real) lwmin;
+	work[1] = (doublereal) lwmin;
 	iwork[1] = liwmin;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SSTEDC", &i__1);
+	xerbla_("DSTEDC", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -97035,12 +31775,12 @@ L40:
     }
     if (*n == 1) {
 	if (icompz != 0) {
-	    z__[z_dim1 + 1] = 1.f;
+	    z__[z_dim1 + 1] = 1.;
 	}
 	return 0;
     }
 
-    smlsiz = ilaenv_(&c__9, "SSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+    smlsiz = ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
 	    ftnlen)6, (ftnlen)1);
 
 /*
@@ -97048,15 +31788,15 @@ L40:
        will use the Divide and Conquer routine to compute only the
        eigenvalues, which requires (3N + 3N**2) real workspace and
        (2 + 5N + 2N lg(N)) integer workspace.
-       Since on many architectures SSTERF is much faster than any other
+       Since on many architectures DSTERF is much faster than any other
        algorithm for finding eigenvalues only, it is used here
        as the default.
 
-       If COMPZ = 'N', use SSTERF to compute the eigenvalues.
+       If COMPZ = 'N', use DSTERF to compute the eigenvalues.
 */
 
     if (icompz == 0) {
-	ssterf_(n, &d__[1], &e[1], info);
+	dsterf_(n, &d__[1], &e[1], info);
 	return 0;
     }
 
@@ -97067,14 +31807,14 @@ L40:
 
     if (*n <= smlsiz) {
 	if (icompz == 0) {
-	    ssterf_(n, &d__[1], &e[1], info);
+	    dsterf_(n, &d__[1], &e[1], info);
 	    return 0;
 	} else if (icompz == 2) {
-	    ssteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
+	    dsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
 		    info);
 	    return 0;
 	} else {
-	    ssteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
+	    dsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
 		    info);
 	    return 0;
 	}
@@ -97092,17 +31832,17 @@ L40:
     }
 
     if (icompz == 2) {
-	slaset_("Full", n, n, &c_b320, &c_b1011, &z__[z_offset], ldz);
+	dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz);
     }
 
 /*     Scale. */
 
-    orgnrm = slanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.f) {
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
 	return 0;
     }
 
-    eps = slamch_("Epsilon");
+    eps = EPSILON;
 
     start = 1;
 
@@ -97121,9 +31861,9 @@ L10:
 	end = start;
 L20:
 	if (end < *n) {
-	    tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 =
-		    d__[end + 1], dabs(r__2)));
-	    if ((r__1 = e[end], dabs(r__1)) > tiny) {
+	    tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 =
+		    d__[end + 1], abs(d__2)));
+	    if ((d__1 = e[end], abs(d__1)) > tiny) {
 		++end;
 		goto L20;
 	    }
@@ -97141,20 +31881,20 @@ L20:
 
 /*           Scale. */
 
-	    orgnrm = slanst_("M", &m, &d__[start], &e[start]);
-	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &m, &c__1, &d__[
-		    start], &m, info);
+	    orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
+	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &m, &c__1, &d__[start]
+		    , &m, info);
 	    i__1 = m - 1;
 	    i__2 = m - 1;
-	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &i__1, &c__1, &e[
+	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &i__1, &c__1, &e[
 		    start], &i__2, info);
 
 	    if (icompz == 1) {
-		strtrw = 1;
+		dtrtrw = 1;
 	    } else {
-		strtrw = start;
+		dtrtrw = start;
 	    }
-	    slaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw +
+	    dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[dtrtrw +
 		    start * z_dim1], ldz, &work[1], n, &work[storez], &iwork[
 		    1], info);
 	    if (*info != 0) {
@@ -97165,8 +31905,8 @@ L20:
 
 /*           Scale back. */
 
-	    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, &m, &c__1, &d__[
-		    start], &m, info);
+	    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &m, &c__1, &d__[start]
+		    , &m, info);
 
 	} else {
 	    if (icompz == 1) {
@@ -97177,17 +31917,17 @@ L20:
        then multiply back into Z.
 */
 
-		ssteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[
+		dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[
 			m * m + 1], info);
-		slacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
+		dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
 			storez], n);
-		sgemm_("N", "N", n, &m, &m, &c_b1011, &work[storez], ldz, &
-			work[1], &m, &c_b320, &z__[start * z_dim1 + 1], ldz);
+		dgemm_("N", "N", n, &m, &m, &c_b15, &work[storez], ldz, &work[
+			1], &m, &c_b29, &z__[start * z_dim1 + 1], ldz);
 	    } else if (icompz == 2) {
-		ssteqr_("I", &m, &d__[start], &e[start], &z__[start + start *
+		dsteqr_("I", &m, &d__[start], &e[start], &z__[start + start *
 			z_dim1], ldz, &work[1], info);
 	    } else {
-		ssterf_(&m, &d__[start], &e[start], info);
+		dsterf_(&m, &d__[start], &e[start], info);
 	    }
 	    if (*info != 0) {
 		*info = start * (*n + 1) + end;
@@ -97212,7 +31952,7 @@ L20:
 
 /*        Use Quick Sort */
 
-	    slasrt_("I", n, &d__[1], info);
+	    dlasrt_("I", n, &d__[1], info);
 
 	} else {
 
@@ -97234,7 +31974,7 @@ L20:
 		if (k != i__) {
 		    d__[k] = d__[i__];
 		    d__[i__] = p;
-		    sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1
+		    dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1
 			    + 1], &c__1);
 		}
 /* L40: */
@@ -97242,60 +31982,64 @@ L20:
 	}
     }
 
-    work[1] = (real) lwmin;
+    work[1] = (doublereal) lwmin;
     iwork[1] = liwmin;
 
     return 0;
 
-/*     End of SSTEDC */
+/*     End of DSTEDC */
 
-} /* sstedc_ */
+} /* dstedc_ */
 
-/* Subroutine */ int ssteqr_(char *compz, integer *n, real *d__, real *e,
-	real *z__, integer *ldz, real *work, integer *info)
+/* Subroutine */ int dsteqr_(char *compz, integer *n, doublereal *d__,
+	doublereal *e, doublereal *z__, integer *ldz, doublereal *work,
+	integer *info)
 {
     /* System generated locals */
     integer z_dim1, z_offset, i__1, i__2;
-    real r__1, r__2;
+    doublereal d__1, d__2;
 
     /* Builtin functions */
-    double sqrt(doublereal), r_sign(real *, real *);
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
 
     /* Local variables */
-    static real b, c__, f, g;
+    static doublereal b, c__, f, g;
     static integer i__, j, k, l, m;
-    static real p, r__, s;
+    static doublereal p, r__, s;
     static integer l1, ii, mm, lm1, mm1, nm1;
-    static real rt1, rt2, eps;
+    static doublereal rt1, rt2, eps;
     static integer lsv;
-    static real tst, eps2;
+    static doublereal tst, eps2;
     static integer lend, jtot;
-    extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *)
-	    ;
+    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *);
     extern logical lsame_(char *, char *);
-    static real anorm;
-    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *,
-	    integer *, real *, real *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer *);
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *);
+    static doublereal anorm;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlaev2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *);
     static integer lendm1, lendp1;
-    extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real *
-	    , real *, real *);
-    extern doublereal slapy2_(real *, real *);
+
     static integer iscale;
-    extern doublereal slamch_(char *);
-    static real safmin;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlaset_(char *, integer *, integer
+	    *, doublereal *, doublereal *, doublereal *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
+    static doublereal safmax;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static real safmax;
-    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *);
-    static integer lendsv;
-    extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
-	    ), slaset_(char *, integer *, integer *, real *, real *, real *,
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
 	    integer *);
-    static real ssfmin;
+    static integer lendsv;
+    static doublereal ssfmin;
     static integer nmaxit, icompz;
-    static real ssfmax;
-    extern doublereal slanst_(char *, integer *, real *, real *);
-    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
+    static doublereal ssfmax;
 
 
 /*
@@ -97308,10 +32052,10 @@ L20:
     Purpose
     =======
 
-    SSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+    DSTEQR computes all eigenvalues and, optionally, eigenvectors of a
     symmetric tridiagonal matrix using the implicit QL or QR method.
     The eigenvectors of a full or band symmetric matrix can also be found
-    if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to
+    if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to
     tridiagonal form.
 
     Arguments
@@ -97330,16 +32074,16 @@ L20:
     N       (input) INTEGER
             The order of the matrix.  N >= 0.
 
-    D       (input/output) REAL array, dimension (N)
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
             On entry, the diagonal elements of the tridiagonal matrix.
             On exit, if INFO = 0, the eigenvalues in ascending order.
 
-    E       (input/output) REAL array, dimension (N-1)
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
             On entry, the (n-1) subdiagonal elements of the tridiagonal
             matrix.
             On exit, E has been destroyed.
 
-    Z       (input/output) REAL array, dimension (LDZ, N)
+    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ, N)
             On entry, if  COMPZ = 'V', then Z contains the orthogonal
             matrix used in the reduction to tridiagonal form.
             On exit, if INFO = 0, then if  COMPZ = 'V', Z contains the
@@ -97352,7 +32096,7 @@ L20:
             The leading dimension of the array Z.  LDZ >= 1, and if
             eigenvectors are desired, then  LDZ >= max(1,N).
 
-    WORK    (workspace) REAL array, dimension (max(1,2*N-2))
+    WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
             If COMPZ = 'N', then WORK is not referenced.
 
     INFO    (output) INTEGER
@@ -97395,12 +32139,12 @@ L20:
 	*info = -1;
     } else if (*n < 0) {
 	*info = -2;
-    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
+    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
 	*info = -6;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SSTEQR", &i__1);
+	xerbla_("DSTEQR", &i__1);
 	return 0;
     }
 
@@ -97412,20 +32156,20 @@ L20:
 
     if (*n == 1) {
 	if (icompz == 2) {
-	    z__[z_dim1 + 1] = 1.f;
+	    z__[z_dim1 + 1] = 1.;
 	}
 	return 0;
     }
 
 /*     Determine the unit roundoff and over/underflow thresholds. */
 
-    eps = slamch_("E");
+    eps = EPSILON;
 /* Computing 2nd power */
-    r__1 = eps;
-    eps2 = r__1 * r__1;
-    safmin = slamch_("S");
-    safmax = 1.f / safmin;
-    ssfmax = sqrt(safmax) / 3.f;
+    d__1 = eps;
+    eps2 = d__1 * d__1;
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    ssfmax = sqrt(safmax) / 3.;
     ssfmin = sqrt(safmin) / eps2;
 
 /*
@@ -97434,7 +32178,7 @@ L20:
 */
 
     if (icompz == 2) {
-	slaset_("Full", n, n, &c_b320, &c_b1011, &z__[z_offset], ldz);
+	dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz);
     }
 
     nmaxit = *n * 30;
@@ -97454,18 +32198,18 @@ L10:
 	goto L160;
     }
     if (l1 > 1) {
-	e[l1 - 1] = 0.f;
+	e[l1 - 1] = 0.;
     }
     if (l1 <= nm1) {
 	i__1 = nm1;
 	for (m = l1; m <= i__1; ++m) {
-	    tst = (r__1 = e[m], dabs(r__1));
-	    if (tst == 0.f) {
+	    tst = (d__1 = e[m], abs(d__1));
+	    if (tst == 0.) {
 		goto L30;
 	    }
-	    if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m
-		    + 1], dabs(r__2))) * eps) {
-		e[m] = 0.f;
+	    if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m
+		    + 1], abs(d__2))) * eps) {
+		e[m] = 0.;
 		goto L30;
 	    }
 /* L20: */
@@ -97486,32 +32230,32 @@ L30:
 /*     Scale submatrix in rows and columns L to LEND */
 
     i__1 = lend - l + 1;
-    anorm = slanst_("I", &i__1, &d__[l], &e[l]);
+    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
     iscale = 0;
-    if (anorm == 0.f) {
+    if (anorm == 0.) {
 	goto L10;
     }
     if (anorm > ssfmax) {
 	iscale = 1;
 	i__1 = lend - l + 1;
-	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
 		info);
 	i__1 = lend - l;
-	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
 		info);
     } else if (anorm < ssfmin) {
 	iscale = 2;
 	i__1 = lend - l + 1;
-	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
 		info);
 	i__1 = lend - l;
-	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
 		info);
     }
 
 /*     Choose between QL and QR iteration */
 
-    if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) {
+    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
 	lend = lsv;
 	l = lendsv;
     }
@@ -97530,10 +32274,10 @@ L40:
 	    i__1 = lendm1;
 	    for (m = l; m <= i__1; ++m) {
 /* Computing 2nd power */
-		r__2 = (r__1 = e[m], dabs(r__1));
-		tst = r__2 * r__2;
-		if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
-			+ 1], dabs(r__2)) + safmin) {
+		d__2 = (d__1 = e[m], abs(d__1));
+		tst = d__2 * d__2;
+		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
+			+ 1], abs(d__2)) + safmin) {
 		    goto L60;
 		}
 /* L50: */
@@ -97544,7 +32288,7 @@ L40:
 
 L60:
 	if (m < lend) {
-	    e[m] = 0.f;
+	    e[m] = 0.;
 	}
 	p = d__[l];
 	if (m == l) {
@@ -97552,23 +32296,23 @@ L60:
 	}
 
 /*
-          If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
+          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
           to compute its eigensystem.
 */
 
 	if (m == l + 1) {
 	    if (icompz > 0) {
-		slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
+		dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
 		work[l] = c__;
 		work[*n - 1 + l] = s;
-		slasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
+		dlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
 			z__[l * z_dim1 + 1], ldz);
 	    } else {
-		slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
+		dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
 	    }
 	    d__[l] = rt1;
 	    d__[l + 1] = rt2;
-	    e[l] = 0.f;
+	    e[l] = 0.;
 	    l += 2;
 	    if (l <= lend) {
 		goto L40;
@@ -97583,13 +32327,13 @@ L60:
 
 /*        Form shift. */
 
-	g = (d__[l + 1] - p) / (e[l] * 2.f);
-	r__ = slapy2_(&g, &c_b1011);
-	g = d__[m] - p + e[l] / (g + r_sign(&r__, &g));
+	g = (d__[l + 1] - p) / (e[l] * 2.);
+	r__ = dlapy2_(&g, &c_b15);
+	g = d__[m] - p + e[l] / (g + d_sign(&r__, &g));
 
-	s = 1.f;
-	c__ = 1.f;
-	p = 0.f;
+	s = 1.;
+	c__ = 1.;
+	p = 0.;
 
 /*        Inner loop */
 
@@ -97598,12 +32342,12 @@ L60:
 	for (i__ = mm1; i__ >= i__1; --i__) {
 	    f = s * e[i__];
 	    b = c__ * e[i__];
-	    slartg_(&g, &f, &c__, &s, &r__);
+	    dlartg_(&g, &f, &c__, &s, &r__);
 	    if (i__ != m - 1) {
 		e[i__ + 1] = r__;
 	    }
 	    g = d__[i__ + 1] - p;
-	    r__ = (d__[i__] - g) * s + c__ * 2.f * b;
+	    r__ = (d__[i__] - g) * s + c__ * 2. * b;
 	    p = s * r__;
 	    d__[i__ + 1] = g + p;
 	    g = c__ * r__ - b;
@@ -97622,7 +32366,7 @@ L60:
 
 	if (icompz > 0) {
 	    mm = m - l + 1;
-	    slasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
+	    dlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
 		    * z_dim1 + 1], ldz);
 	}
 
@@ -97655,10 +32399,10 @@ L90:
 	    i__1 = lendp1;
 	    for (m = l; m >= i__1; --m) {
 /* Computing 2nd power */
-		r__2 = (r__1 = e[m - 1], dabs(r__1));
-		tst = r__2 * r__2;
-		if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
-			- 1], dabs(r__2)) + safmin) {
+		d__2 = (d__1 = e[m - 1], abs(d__1));
+		tst = d__2 * d__2;
+		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
+			- 1], abs(d__2)) + safmin) {
 		    goto L110;
 		}
 /* L100: */
@@ -97669,7 +32413,7 @@ L90:
 
 L110:
 	if (m > lend) {
-	    e[m - 1] = 0.f;
+	    e[m - 1] = 0.;
 	}
 	p = d__[l];
 	if (m == l) {
@@ -97677,24 +32421,24 @@ L110:
 	}
 
 /*
-          If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
+          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
           to compute its eigensystem.
 */
 
 	if (m == l - 1) {
 	    if (icompz > 0) {
-		slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
+		dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
 			;
 		work[m] = c__;
 		work[*n - 1 + m] = s;
-		slasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
+		dlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
 			z__[(l - 1) * z_dim1 + 1], ldz);
 	    } else {
-		slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
+		dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
 	    }
 	    d__[l - 1] = rt1;
 	    d__[l] = rt2;
-	    e[l - 1] = 0.f;
+	    e[l - 1] = 0.;
 	    l += -2;
 	    if (l >= lend) {
 		goto L90;
@@ -97709,13 +32453,13 @@ L110:
 
 /*        Form shift. */
 
-	g = (d__[l - 1] - p) / (e[l - 1] * 2.f);
-	r__ = slapy2_(&g, &c_b1011);
-	g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g));
+	g = (d__[l - 1] - p) / (e[l - 1] * 2.);
+	r__ = dlapy2_(&g, &c_b15);
+	g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g));
 
-	s = 1.f;
-	c__ = 1.f;
-	p = 0.f;
+	s = 1.;
+	c__ = 1.;
+	p = 0.;
 
 /*        Inner loop */
 
@@ -97724,12 +32468,12 @@ L110:
 	for (i__ = m; i__ <= i__1; ++i__) {
 	    f = s * e[i__];
 	    b = c__ * e[i__];
-	    slartg_(&g, &f, &c__, &s, &r__);
+	    dlartg_(&g, &f, &c__, &s, &r__);
 	    if (i__ != m) {
 		e[i__ - 1] = r__;
 	    }
 	    g = d__[i__] - p;
-	    r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b;
+	    r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b;
 	    p = s * r__;
 	    d__[i__] = g + p;
 	    g = c__ * r__ - b;
@@ -97748,7 +32492,7 @@ L110:
 
 	if (icompz > 0) {
 	    mm = l - m + 1;
-	    slasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
+	    dlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
 		    * z_dim1 + 1], ldz);
 	}
 
@@ -97774,17 +32518,17 @@ L130:
 L140:
     if (iscale == 1) {
 	i__1 = lendsv - lsv + 1;
-	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
 		n, info);
 	i__1 = lendsv - lsv;
-	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
 		info);
     } else if (iscale == 2) {
 	i__1 = lendsv - lsv + 1;
-	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
 		n, info);
 	i__1 = lendsv - lsv;
-	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
 		info);
     }
 
@@ -97798,7 +32542,7 @@ L140:
     }
     i__1 = *n - 1;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	if (e[i__] != 0.f) {
+	if (e[i__] != 0.) {
 	    ++(*info);
 	}
 /* L150: */
@@ -97812,7 +32556,7 @@ L160:
 
 /*        Use Quick Sort */
 
-	slasrt_("I", n, &d__[1], info);
+	dlasrt_("I", n, &d__[1], info);
 
     } else {
 
@@ -97834,7 +32578,7 @@ L160:
 	    if (k != i__) {
 		d__[k] = d__[i__];
 		d__[i__] = p;
-		sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+		dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
 			 &c__1);
 	    }
 /* L180: */
@@ -97844,46 +32588,47 @@ L160:
 L190:
     return 0;
 
-/*     End of SSTEQR */
+/*     End of DSTEQR */
 
-} /* ssteqr_ */
+} /* dsteqr_ */
 
-/* Subroutine */ int ssterf_(integer *n, real *d__, real *e, integer *info)
+/* Subroutine */ int dsterf_(integer *n, doublereal *d__, doublereal *e,
+	integer *info)
 {
     /* System generated locals */
     integer i__1;
-    real r__1, r__2, r__3;
+    doublereal d__1, d__2, d__3;
 
     /* Builtin functions */
-    double sqrt(doublereal), r_sign(real *, real *);
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
 
     /* Local variables */
-    static real c__;
+    static doublereal c__;
     static integer i__, l, m;
-    static real p, r__, s;
+    static doublereal p, r__, s;
     static integer l1;
-    static real bb, rt1, rt2, eps, rte;
+    static doublereal bb, rt1, rt2, eps, rte;
     static integer lsv;
-    static real eps2, oldc;
+    static doublereal eps2, oldc;
     static integer lend, jtot;
-    extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *)
-	    ;
-    static real gamma, alpha, sigma, anorm;
-    extern doublereal slapy2_(real *, real *);
+    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *);
+    static doublereal gamma, alpha, sigma, anorm;
+
     static integer iscale;
-    static real oldgam;
-    extern doublereal slamch_(char *);
-    static real safmin;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *);
+    static doublereal oldgam, safmin;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static real safmax;
-    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *);
+    static doublereal safmax;
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
     static integer lendsv;
-    static real ssfmin;
+    static doublereal ssfmin;
     static integer nmaxit;
-    static real ssfmax;
-    extern doublereal slanst_(char *, integer *, real *, real *);
-    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
+    static doublereal ssfmax;
 
 
 /*
@@ -97896,7 +32641,7 @@ L190:
     Purpose
     =======
 
-    SSTERF computes all eigenvalues of a symmetric tridiagonal matrix
+    DSTERF computes all eigenvalues of a symmetric tridiagonal matrix
     using the Pal-Walker-Kahan variant of the QL or QR algorithm.
 
     Arguments
@@ -97905,11 +32650,11 @@ L190:
     N       (input) INTEGER
             The order of the matrix.  N >= 0.
 
-    D       (input/output) REAL array, dimension (N)
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
             On entry, the n diagonal elements of the tridiagonal matrix.
             On exit, if INFO = 0, the eigenvalues in ascending order.
 
-    E       (input/output) REAL array, dimension (N-1)
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
             On entry, the (n-1) subdiagonal elements of the tridiagonal
             matrix.
             On exit, E has been destroyed.
@@ -97939,7 +32684,7 @@ L190:
     if (*n < 0) {
 	*info = -1;
 	i__1 = -(*info);
-	xerbla_("SSTERF", &i__1);
+	xerbla_("DSTERF", &i__1);
 	return 0;
     }
     if (*n <= 1) {
@@ -97948,19 +32693,19 @@ L190:
 
 /*     Determine the unit roundoff for this environment. */
 
-    eps = slamch_("E");
+    eps = EPSILON;
 /* Computing 2nd power */
-    r__1 = eps;
-    eps2 = r__1 * r__1;
-    safmin = slamch_("S");
-    safmax = 1.f / safmin;
-    ssfmax = sqrt(safmax) / 3.f;
+    d__1 = eps;
+    eps2 = d__1 * d__1;
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    ssfmax = sqrt(safmax) / 3.;
     ssfmin = sqrt(safmin) / eps2;
 
 /*     Compute the eigenvalues of the tridiagonal matrix. */
 
     nmaxit = *n * 30;
-    sigma = 0.f;
+    sigma = 0.;
     jtot = 0;
 
 /*
@@ -97976,13 +32721,13 @@ L10:
 	goto L170;
     }
     if (l1 > 1) {
-	e[l1 - 1] = 0.f;
+	e[l1 - 1] = 0.;
     }
     i__1 = *n - 1;
     for (m = l1; m <= i__1; ++m) {
-	if ((r__3 = e[m], dabs(r__3)) <= sqrt((r__1 = d__[m], dabs(r__1))) *
-		sqrt((r__2 = d__[m + 1], dabs(r__2))) * eps) {
-	    e[m] = 0.f;
+	if ((d__3 = e[m], abs(d__3)) <= sqrt((d__1 = d__[m], abs(d__1))) *
+		sqrt((d__2 = d__[m + 1], abs(d__2))) * eps) {
+	    e[m] = 0.;
 	    goto L30;
 	}
 /* L20: */
@@ -98002,37 +32747,37 @@ L30:
 /*     Scale submatrix in rows and columns L to LEND */
 
     i__1 = lend - l + 1;
-    anorm = slanst_("I", &i__1, &d__[l], &e[l]);
+    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
     iscale = 0;
     if (anorm > ssfmax) {
 	iscale = 1;
 	i__1 = lend - l + 1;
-	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
 		info);
 	i__1 = lend - l;
-	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
 		info);
     } else if (anorm < ssfmin) {
 	iscale = 2;
 	i__1 = lend - l + 1;
-	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
 		info);
 	i__1 = lend - l;
-	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
 		info);
     }
 
     i__1 = lend - 1;
     for (i__ = l; i__ <= i__1; ++i__) {
 /* Computing 2nd power */
-	r__1 = e[i__];
-	e[i__] = r__1 * r__1;
+	d__1 = e[i__];
+	e[i__] = d__1 * d__1;
 /* L40: */
     }
 
 /*     Choose between QL and QR iteration */
 
-    if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) {
+    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
 	lend = lsv;
 	l = lendsv;
     }
@@ -98049,8 +32794,8 @@ L50:
 	if (l != lend) {
 	    i__1 = lend - 1;
 	    for (m = l; m <= i__1; ++m) {
-		if ((r__2 = e[m], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[
-			m + 1], dabs(r__1))) {
+		if ((d__2 = e[m], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m
+			+ 1], abs(d__1))) {
 		    goto L70;
 		}
 /* L60: */
@@ -98060,7 +32805,7 @@ L50:
 
 L70:
 	if (m < lend) {
-	    e[m] = 0.f;
+	    e[m] = 0.;
 	}
 	p = d__[l];
 	if (m == l) {
@@ -98068,16 +32813,16 @@ L70:
 	}
 
 /*
-          If remaining matrix is 2 by 2, use SLAE2 to compute its
+          If remaining matrix is 2 by 2, use DLAE2 to compute its
           eigenvalues.
 */
 
 	if (m == l + 1) {
 	    rte = sqrt(e[l]);
-	    slae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2);
+	    dlae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2);
 	    d__[l] = rt1;
 	    d__[l + 1] = rt2;
-	    e[l] = 0.f;
+	    e[l] = 0.;
 	    l += 2;
 	    if (l <= lend) {
 		goto L50;
@@ -98093,12 +32838,12 @@ L70:
 /*        Form shift. */
 
 	rte = sqrt(e[l]);
-	sigma = (d__[l + 1] - p) / (rte * 2.f);
-	r__ = slapy2_(&sigma, &c_b1011);
-	sigma = p - rte / (sigma + r_sign(&r__, &sigma));
+	sigma = (d__[l + 1] - p) / (rte * 2.);
+	r__ = dlapy2_(&sigma, &c_b15);
+	sigma = p - rte / (sigma + d_sign(&r__, &sigma));
 
-	c__ = 1.f;
-	s = 0.f;
+	c__ = 1.;
+	s = 0.;
 	gamma = d__[m] - sigma;
 	p = gamma * gamma;
 
@@ -98118,7 +32863,7 @@ L70:
 	    alpha = d__[i__];
 	    gamma = c__ * (alpha - sigma) - s * oldgam;
 	    d__[i__ + 1] = oldgam + (alpha - gamma);
-	    if (c__ != 0.f) {
+	    if (c__ != 0.) {
 		p = gamma * gamma / c__;
 	    } else {
 		p = oldc * bb;
@@ -98152,8 +32897,8 @@ L90:
 L100:
 	i__1 = lend + 1;
 	for (m = l; m >= i__1; --m) {
-	    if ((r__2 = e[m - 1], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[
-		    m - 1], dabs(r__1))) {
+	    if ((d__2 = e[m - 1], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m
+		    - 1], abs(d__1))) {
 		goto L120;
 	    }
 /* L110: */
@@ -98162,7 +32907,7 @@ L100:
 
 L120:
 	if (m > lend) {
-	    e[m - 1] = 0.f;
+	    e[m - 1] = 0.;
 	}
 	p = d__[l];
 	if (m == l) {
@@ -98170,16 +32915,16 @@ L120:
 	}
 
 /*
-          If remaining matrix is 2 by 2, use SLAE2 to compute its
+          If remaining matrix is 2 by 2, use DLAE2 to compute its
           eigenvalues.
 */
 
 	if (m == l - 1) {
 	    rte = sqrt(e[l - 1]);
-	    slae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2);
+	    dlae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2);
 	    d__[l] = rt1;
 	    d__[l - 1] = rt2;
-	    e[l - 1] = 0.f;
+	    e[l - 1] = 0.;
 	    l += -2;
 	    if (l >= lend) {
 		goto L100;
@@ -98195,12 +32940,12 @@ L120:
 /*        Form shift. */
 
 	rte = sqrt(e[l - 1]);
-	sigma = (d__[l - 1] - p) / (rte * 2.f);
-	r__ = slapy2_(&sigma, &c_b1011);
-	sigma = p - rte / (sigma + r_sign(&r__, &sigma));
+	sigma = (d__[l - 1] - p) / (rte * 2.);
+	r__ = dlapy2_(&sigma, &c_b15);
+	sigma = p - rte / (sigma + d_sign(&r__, &sigma));
 
-	c__ = 1.f;
-	s = 0.f;
+	c__ = 1.;
+	s = 0.;
 	gamma = d__[m] - sigma;
 	p = gamma * gamma;
 
@@ -98220,7 +32965,7 @@ L120:
 	    alpha = d__[i__ + 1];
 	    gamma = c__ * (alpha - sigma) - s * oldgam;
 	    d__[i__] = oldgam + (alpha - gamma);
-	    if (c__ != 0.f) {
+	    if (c__ != 0.) {
 		p = gamma * gamma / c__;
 	    } else {
 		p = oldc * bb;
@@ -98250,12 +32995,12 @@ L140:
 L150:
     if (iscale == 1) {
 	i__1 = lendsv - lsv + 1;
-	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
 		n, info);
     }
     if (iscale == 2) {
 	i__1 = lendsv - lsv + 1;
-	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
 		n, info);
     }
 
@@ -98269,7 +33014,7 @@ L150:
     }
     i__1 = *n - 1;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	if (e[i__] != 0.f) {
+	if (e[i__] != 0.) {
 	    ++(*info);
 	}
 /* L160: */
@@ -98279,61 +33024,64 @@ L150:
 /*     Sort eigenvalues in increasing order. */
 
 L170:
-    slasrt_("I", n, &d__[1], info);
+    dlasrt_("I", n, &d__[1], info);
 
 L180:
     return 0;
 
-/*     End of SSTERF */
+/*     End of DSTERF */
 
-} /* ssterf_ */
+} /* dsterf_ */
 
-/* Subroutine */ int ssyevd_(char *jobz, char *uplo, integer *n, real *a,
-	integer *lda, real *w, real *work, integer *lwork, integer *iwork,
-	integer *liwork, integer *info)
+/* Subroutine */ int dsyevd_(char *jobz, char *uplo, integer *n, doublereal *
+	a, integer *lda, doublereal *w, doublereal *work, integer *lwork,
+	integer *iwork, integer *liwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
-    real r__1;
+    doublereal d__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static real eps;
+    static doublereal eps;
     static integer inde;
-    static real anrm, rmin, rmax;
+    static doublereal anrm, rmin, rmax;
     static integer lopt;
-    static real sigma;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    static doublereal sigma;
     extern logical lsame_(char *, char *);
-    static integer iinfo;
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
-    static integer lwmin, liopt;
+    static integer iinfo, lwmin, liopt;
     static logical lower, wantz;
-    static integer indwk2, llwrk2, iscale;
-    extern doublereal slamch_(char *);
-    static real safmin;
+    static integer indwk2, llwrk2;
+
+    static integer iscale;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dstedc_(char *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, integer *, integer *, integer *), dlacpy_(
+	    char *, integer *, integer *, doublereal *, integer *, doublereal
+	    *, integer *);
+    static doublereal safmin;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static real bignum;
-    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
-	    real *, integer *, integer *, real *, integer *, integer *);
+    static doublereal bignum;
     static integer indtau;
-    extern /* Subroutine */ int sstedc_(char *, integer *, real *, real *,
-	    real *, integer *, real *, integer *, integer *, integer *,
-	    integer *), slacpy_(char *, integer *, integer *, real *,
-	    integer *, real *, integer *);
+    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
+	     integer *);
+    extern doublereal dlansy_(char *, char *, integer *, doublereal *,
+	    integer *, doublereal *);
     static integer indwrk, liwmin;
-    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
-    extern doublereal slansy_(char *, char *, integer *, real *, integer *,
-	    real *);
+    extern /* Subroutine */ int dormtr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *), dsytrd_(char *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *);
     static integer llwork;
-    static real smlnum;
+    static doublereal smlnum;
     static logical lquery;
-    extern /* Subroutine */ int sormtr_(char *, char *, char *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *, real *,
-	    integer *, integer *), ssytrd_(char *,
-	    integer *, real *, integer *, real *, real *, real *, real *,
-	    integer *, integer *);
 
 
 /*
@@ -98346,7 +33094,7 @@ L180:
     Purpose
     =======
 
-    SSYEVD computes all eigenvalues and, optionally, eigenvectors of a
+    DSYEVD computes all eigenvalues and, optionally, eigenvectors of a
     real symmetric matrix A. If eigenvectors are desired, it uses a
     divide and conquer algorithm.
 
@@ -98357,8 +33105,8 @@ L180:
     Cray-2. It could conceivably fail on hexadecimal or decimal machines
     without guard digits, but we know of none.
 
-    Because of large use of BLAS of level 3, SSYEVD needs N**2 more
-    workspace than SSYEVX.
+    Because of large use of BLAS of level 3, DSYEVD needs N**2 more
+    workspace than DSYEVX.
 
     Arguments
     =========
@@ -98374,7 +33122,7 @@ L180:
     N       (input) INTEGER
             The order of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA, N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
             On entry, the symmetric matrix A.  If UPLO = 'U', the
             leading N-by-N upper triangular part of A contains the
             upper triangular part of the matrix A.  If UPLO = 'L',
@@ -98389,10 +33137,10 @@ L180:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
-    W       (output) REAL array, dimension (N)
+    W       (output) DOUBLE PRECISION array, dimension (N)
             If INFO = 0, the eigenvalues in ascending order.
 
-    WORK    (workspace/output) REAL array,
+    WORK    (workspace/output) DOUBLE PRECISION array,
                                            dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
@@ -98454,7 +33202,7 @@ L180:
     /* Function Body */
     wantz = lsame_(jobz, "V");
     lower = lsame_(uplo, "L");
-    lquery = (*lwork == -1) || (*liwork == -1);
+    lquery = *lwork == -1 || *liwork == -1;
 
     *info = 0;
     if (*n <= 1) {
@@ -98467,17 +33215,17 @@ L180:
 	    liwmin = *n * 5 + 3;
 /* Computing 2nd power */
 	    i__1 = *n;
-	    lwmin = *n * 6 + 1 + ((i__1 * i__1) << (1));
+	    lwmin = *n * 6 + 1 + (i__1 * i__1 << 1);
 	} else {
 	    liwmin = 1;
-	    lwmin = ((*n) << (1)) + 1;
+	    lwmin = (*n << 1) + 1;
 	}
 	lopt = lwmin;
 	liopt = liwmin;
     }
-    if (! ((wantz) || (lsame_(jobz, "N")))) {
+    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
-    } else if (! ((lower) || (lsame_(uplo, "U")))) {
+    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
     } else if (*n < 0) {
 	*info = -3;
@@ -98490,13 +33238,13 @@ L180:
     }
 
     if (*info == 0) {
-	work[1] = (real) lopt;
+	work[1] = (doublereal) lopt;
 	iwork[1] = liopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SSYEVD", &i__1);
+	xerbla_("DSYEVD", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -98511,25 +33259,25 @@ L180:
     if (*n == 1) {
 	w[1] = a[a_dim1 + 1];
 	if (wantz) {
-	    a[a_dim1 + 1] = 1.f;
+	    a[a_dim1 + 1] = 1.;
 	}
 	return 0;
     }
 
 /*     Get machine constants. */
 
-    safmin = slamch_("Safe minimum");
-    eps = slamch_("Precision");
+    safmin = SAFEMINIMUM;
+    eps = PRECISION;
     smlnum = safmin / eps;
-    bignum = 1.f / smlnum;
+    bignum = 1. / smlnum;
     rmin = sqrt(smlnum);
     rmax = sqrt(bignum);
 
 /*     Scale matrix to allowable range, if necessary. */
 
-    anrm = slansy_("M", uplo, n, &a[a_offset], lda, &work[1]);
+    anrm = dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]);
     iscale = 0;
-    if (anrm > 0.f && anrm < rmin) {
+    if (anrm > 0. && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
     } else if (anrm > rmax) {
@@ -98537,11 +33285,11 @@ L180:
 	sigma = rmax / anrm;
     }
     if (iscale == 1) {
-	slascl_(uplo, &c__0, &c__0, &c_b1011, &sigma, n, n, &a[a_offset], lda,
-		 info);
+	dlascl_(uplo, &c__0, &c__0, &c_b15, &sigma, n, n, &a[a_offset], lda,
+		info);
     }
 
-/*     Call SSYTRD to reduce symmetric matrix to tridiagonal form. */
+/*     Call DSYTRD to reduce symmetric matrix to tridiagonal form. */
 
     inde = 1;
     indtau = inde + *n;
@@ -98550,70 +33298,74 @@ L180:
     indwk2 = indwrk + *n * *n;
     llwrk2 = *lwork - indwk2 + 1;
 
-    ssytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], &
+    dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], &
 	    work[indwrk], &llwork, &iinfo);
-    lopt = ((*n) << (1)) + work[indwrk];
+    lopt = (integer) ((*n << 1) + work[indwrk]);
 
 /*
-       For eigenvalues only, call SSTERF.  For eigenvectors, first call
-       SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
-       tridiagonal matrix, then call SORMTR to multiply it by the
+       For eigenvalues only, call DSTERF.  For eigenvectors, first call
+       DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+       tridiagonal matrix, then call DORMTR to multiply it by the
        Householder transformations stored in A.
 */
 
     if (! wantz) {
-	ssterf_(n, &w[1], &work[inde], info);
+	dsterf_(n, &w[1], &work[inde], info);
     } else {
-	sstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], &
+	dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], &
 		llwrk2, &iwork[1], liwork, info);
-	sormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
+	dormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
 		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
-	slacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
+	dlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
 /*
    Computing MAX
    Computing 2nd power
 */
 	i__3 = *n;
-	i__1 = lopt, i__2 = *n * 6 + 1 + ((i__3 * i__3) << (1));
+	i__1 = lopt, i__2 = *n * 6 + 1 + (i__3 * i__3 << 1);
 	lopt = max(i__1,i__2);
     }
 
 /*     If matrix was scaled, then rescale eigenvalues appropriately. */
 
     if (iscale == 1) {
-	r__1 = 1.f / sigma;
-	sscal_(n, &r__1, &w[1], &c__1);
+	d__1 = 1. / sigma;
+	dscal_(n, &d__1, &w[1], &c__1);
     }
 
-    work[1] = (real) lopt;
+    work[1] = (doublereal) lopt;
     iwork[1] = liopt;
 
     return 0;
 
-/*     End of SSYEVD */
+/*     End of DSYEVD */
 
-} /* ssyevd_ */
+} /* dsyevd_ */
 
-/* Subroutine */ int ssytd2_(char *uplo, integer *n, real *a, integer *lda,
-	real *d__, real *e, real *tau, integer *info)
+/* Subroutine */ int dsytd2_(char *uplo, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__;
-    static real taui;
-    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
-    extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *,
-	    integer *, real *, integer *, real *, integer *);
-    static real alpha;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static doublereal taui;
+    extern /* Subroutine */ int dsyr2_(char *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static doublereal alpha;
     extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *);
     static logical upper;
-    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *,
-	    real *, integer *), ssymv_(char *, integer *, real *, real *,
-	    integer *, real *, integer *, real *, real *, integer *),
-	    xerbla_(char *, integer *), slarfg_(integer *, real *,
-	    real *, integer *, real *);
+    extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *), dlarfg_(integer *, doublereal *,
+	     doublereal *, integer *, doublereal *), xerbla_(char *, integer *
+	    );
 
 
 /*
@@ -98626,7 +33378,7 @@ L180:
     Purpose
     =======
 
-    SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal
+    DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal
     form T by an orthogonal similarity transformation: Q' * A * Q = T.
 
     Arguments
@@ -98641,7 +33393,7 @@ L180:
     N       (input) INTEGER
             The order of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the symmetric matrix A.  If UPLO = 'U', the leading
             n-by-n upper triangular part of A contains the upper
             triangular part of the matrix A, and the strictly lower
@@ -98663,15 +33415,15 @@ L180:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
-    D       (output) REAL array, dimension (N)
+    D       (output) DOUBLE PRECISION array, dimension (N)
             The diagonal elements of the tridiagonal matrix T:
             D(i) = A(i,i).
 
-    E       (output) REAL array, dimension (N-1)
+    E       (output) DOUBLE PRECISION array, dimension (N-1)
             The off-diagonal elements of the tridiagonal matrix T:
             E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
 
-    TAU     (output) REAL array, dimension (N-1)
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
             The scalar factors of the elementary reflectors (see Further
             Details).
 
@@ -98748,7 +33500,7 @@ L180:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SSYTD2", &i__1);
+	xerbla_("DSYTD2", &i__1);
 	return 0;
     }
 
@@ -98769,26 +33521,27 @@ L180:
              to annihilate A(1:i-1,i+1)
 */
 
-	    slarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1
+	    dlarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1
 		    + 1], &c__1, &taui);
 	    e[i__] = a[i__ + (i__ + 1) * a_dim1];
 
-	    if (taui != 0.f) {
+	    if (taui != 0.) {
 
 /*              Apply H(i) from both sides to A(1:i,1:i) */
 
-		a[i__ + (i__ + 1) * a_dim1] = 1.f;
+		a[i__ + (i__ + 1) * a_dim1] = 1.;
 
 /*              Compute  x := tau * A * v  storing x in TAU(1:i) */
 
-		ssymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
-			a_dim1 + 1], &c__1, &c_b320, &tau[1], &c__1);
+		dsymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
+			a_dim1 + 1], &c__1, &c_b29, &tau[1], &c__1)
+			;
 
 /*              Compute  w := x - 1/2 * tau * (x'*v) * v */
 
-		alpha = taui * -.5f * sdot_(&i__, &tau[1], &c__1, &a[(i__ + 1)
-			 * a_dim1 + 1], &c__1);
-		saxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
+		alpha = taui * -.5 * ddot_(&i__, &tau[1], &c__1, &a[(i__ + 1)
+			* a_dim1 + 1], &c__1);
+		daxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
 			1], &c__1);
 
 /*
@@ -98796,8 +33549,8 @@ L180:
                    A := A - v * w' - w * v'
 */
 
-		ssyr2_(uplo, &i__, &c_b1290, &a[(i__ + 1) * a_dim1 + 1], &
-			c__1, &tau[1], &c__1, &a[a_offset], lda);
+		dsyr2_(uplo, &i__, &c_b151, &a[(i__ + 1) * a_dim1 + 1], &c__1,
+			 &tau[1], &c__1, &a[a_offset], lda);
 
 		a[i__ + (i__ + 1) * a_dim1] = e[i__];
 	    }
@@ -98821,30 +33574,30 @@ L180:
 	    i__2 = *n - i__;
 /* Computing MIN */
 	    i__3 = i__ + 2;
-	    slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+	    dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
 		     a_dim1], &c__1, &taui);
 	    e[i__] = a[i__ + 1 + i__ * a_dim1];
 
-	    if (taui != 0.f) {
+	    if (taui != 0.) {
 
 /*              Apply H(i) from both sides to A(i+1:n,i+1:n) */
 
-		a[i__ + 1 + i__ * a_dim1] = 1.f;
+		a[i__ + 1 + i__ * a_dim1] = 1.;
 
 /*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */
 
 		i__2 = *n - i__;
-		ssymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
-			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b320, &tau[
+		dsymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &tau[
 			i__], &c__1);
 
 /*              Compute  w := x - 1/2 * tau * (x'*v) * v */
 
 		i__2 = *n - i__;
-		alpha = taui * -.5f * sdot_(&i__2, &tau[i__], &c__1, &a[i__ +
+		alpha = taui * -.5 * ddot_(&i__2, &tau[i__], &c__1, &a[i__ +
 			1 + i__ * a_dim1], &c__1);
 		i__2 = *n - i__;
-		saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
 			i__], &c__1);
 
 /*
@@ -98853,7 +33606,7 @@ L180:
 */
 
 		i__2 = *n - i__;
-		ssyr2_(uplo, &i__2, &c_b1290, &a[i__ + 1 + i__ * a_dim1], &
+		dsyr2_(uplo, &i__2, &c_b151, &a[i__ + 1 + i__ * a_dim1], &
 			c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) *
 			a_dim1], lda);
 
@@ -98868,13 +33621,13 @@ L180:
 
     return 0;
 
-/*     End of SSYTD2 */
+/*     End of DSYTD2 */
 
-} /* ssytd2_ */
+} /* dsytd2_ */
 
-/* Subroutine */ int ssytrd_(char *uplo, integer *n, real *a, integer *lda,
-	real *d__, real *e, real *tau, real *work, integer *lwork, integer *
-	info)
+/* Subroutine */ int dsytrd_(char *uplo, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal *
+	work, integer *lwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
@@ -98884,15 +33637,15 @@ L180:
     extern logical lsame_(char *, char *);
     static integer nbmin, iinfo;
     static logical upper;
-    extern /* Subroutine */ int ssytd2_(char *, integer *, real *, integer *,
-	    real *, real *, real *, integer *), ssyr2k_(char *, char *
-	    , integer *, integer *, real *, real *, integer *, real *,
-	    integer *, real *, real *, integer *), xerbla_(
-	    char *, integer *);
+    extern /* Subroutine */ int dsytd2_(char *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dsyr2k_(char *, char *, integer *, integer *, doublereal
+	    *, doublereal *, integer *, doublereal *, integer *, doublereal *,
+	     doublereal *, integer *), dlatrd_(char *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *), xerbla_(char *,
+	    integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slatrd_(char *, integer *, integer *, real *,
-	    integer *, real *, real *, real *, integer *);
     static integer ldwork, lwkopt;
     static logical lquery;
 
@@ -98907,7 +33660,7 @@ L180:
     Purpose
     =======
 
-    SSYTRD reduces a real symmetric matrix A to real symmetric
+    DSYTRD reduces a real symmetric matrix A to real symmetric
     tridiagonal form T by an orthogonal similarity transformation:
     Q**T * A * Q = T.
 
@@ -98921,7 +33674,7 @@ L180:
     N       (input) INTEGER
             The order of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,N)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             On entry, the symmetric matrix A.  If UPLO = 'U', the leading
             N-by-N upper triangular part of A contains the upper
             triangular part of the matrix A, and the strictly lower
@@ -98943,19 +33696,19 @@ L180:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
-    D       (output) REAL array, dimension (N)
+    D       (output) DOUBLE PRECISION array, dimension (N)
             The diagonal elements of the tridiagonal matrix T:
             D(i) = A(i,i).
 
-    E       (output) REAL array, dimension (N-1)
+    E       (output) DOUBLE PRECISION array, dimension (N-1)
             The off-diagonal elements of the tridiagonal matrix T:
             E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
 
-    TAU     (output) REAL array, dimension (N-1)
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -99048,15 +33801,15 @@ L180:
 
 /*        Determine the block size. */
 
-	nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
+	nb = ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
 		 (ftnlen)1);
 	lwkopt = *n * nb;
-	work[1] = (real) lwkopt;
+	work[1] = (doublereal) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("SSYTRD", &i__1);
+	xerbla_("DSYTRD", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -99065,7 +33818,7 @@ L180:
 /*     Quick return if possible */
 
     if (*n == 0) {
-	work[1] = 1.f;
+	work[1] = 1.;
 	return 0;
     }
 
@@ -99079,7 +33832,7 @@ L180:
 
    Computing MAX
 */
-	i__1 = nb, i__2 = ilaenv_(&c__3, "SSYTRD", uplo, n, &c_n1, &c_n1, &
+	i__1 = nb, i__2 = ilaenv_(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, &
 		c_n1, (ftnlen)6, (ftnlen)1);
 	nx = max(i__1,i__2);
 	if (nx < *n) {
@@ -99099,7 +33852,7 @@ L180:
 */
 		i__1 = *lwork / ldwork;
 		nb = max(i__1,1);
-		nbmin = ilaenv_(&c__2, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
+		nbmin = ilaenv_(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
 			 (ftnlen)6, (ftnlen)1);
 		if (nb < nbmin) {
 		    nx = *n;
@@ -99132,7 +33885,7 @@ L180:
 */
 
 	    i__3 = i__ + nb - 1;
-	    slatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
+	    dlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
 		    work[1], &ldwork);
 
 /*
@@ -99141,9 +33894,9 @@ L180:
 */
 
 	    i__3 = i__ - 1;
-	    ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b1290, &a[i__ *
-		    a_dim1 + 1], lda, &work[1], &ldwork, &c_b1011, &a[
-		    a_offset], lda);
+	    dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b151, &a[i__ *
+		    a_dim1 + 1], lda, &work[1], &ldwork, &c_b15, &a[a_offset],
+		     lda);
 
 /*
              Copy superdiagonal elements back into A, and diagonal
@@ -99161,7 +33914,7 @@ L180:
 
 /*        Use unblocked code to reduce the last or only block */
 
-	ssytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
+	dsytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
     } else {
 
 /*        Reduce the lower triangle of A */
@@ -99177,7 +33930,7 @@ L180:
 */
 
 	    i__3 = *n - i__ + 1;
-	    slatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
+	    dlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
 		    tau[i__], &work[1], &ldwork);
 
 /*
@@ -99186,8 +33939,8 @@ L180:
 */
 
 	    i__3 = *n - i__ - nb + 1;
-	    ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b1290, &a[i__ + nb +
-		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b1011, &a[
+	    dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b151, &a[i__ + nb +
+		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b15, &a[
 		    i__ + nb + (i__ + nb) * a_dim1], lda);
 
 /*
@@ -99207,65 +33960,71 @@ L180:
 /*        Use unblocked code to reduce the last or only block */
 
 	i__1 = *n - i__ + 1;
-	ssytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__],
+	dsytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__],
 		&tau[i__], &iinfo);
     }
 
-    work[1] = (real) lwkopt;
+    work[1] = (doublereal) lwkopt;
     return 0;
 
-/*     End of SSYTRD */
+/*     End of DSYTRD */
 
-} /* ssytrd_ */
+} /* dsytrd_ */
 
-/* Subroutine */ int strevc_(char *side, char *howmny, logical *select,
-	integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr,
-	integer *ldvr, integer *mm, integer *m, real *work, integer *info)
+/* Subroutine */ int dtrevc_(char *side, char *howmny, logical *select,
+	integer *n, doublereal *t, integer *ldt, doublereal *vl, integer *
+	ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m,
+	doublereal *work, integer *info)
 {
     /* System generated locals */
     integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
 	    i__2, i__3;
-    real r__1, r__2, r__3, r__4;
+    doublereal d__1, d__2, d__3, d__4;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
     static integer i__, j, k;
-    static real x[4]	/* was [2][2] */;
+    static doublereal x[4]	/* was [2][2] */;
     static integer j1, j2, n2, ii, ki, ip, is;
-    static real wi, wr, rec, ulp, beta, emax;
-    static logical pair, allv;
+    static doublereal wi, wr, rec, ulp, beta, emax;
+    static logical pair;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static logical allv;
     static integer ierr;
-    static real unfl, ovfl, smin;
-    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
+    static doublereal unfl, ovfl, smin;
     static logical over;
-    static real vmax;
+    static doublereal vmax;
     static integer jnxt;
-    static real scale;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    static doublereal scale;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
-    static real remax;
-    static logical leftv;
-    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
-	    real *, integer *, real *, integer *, real *, real *, integer *);
-    static logical bothv;
-    static real vcrit;
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+    static doublereal remax;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical leftv, bothv;
+    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *);
+    static doublereal vcrit;
     static logical somev;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *);
-    static real xnorm;
-    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *,
-	    real *, integer *), slaln2_(logical *, integer *, integer *, real
-	    *, real *, real *, integer *, real *, real *, real *, integer *,
-	    real *, real *, real *, integer *, real *, real *, integer *),
-	    slabad_(real *, real *);
-    extern doublereal slamch_(char *);
+    static doublereal xnorm;
+    extern /* Subroutine */ int dlaln2_(logical *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     doublereal *, doublereal *, integer *, doublereal *, doublereal *
+	    , doublereal *, integer *, doublereal *, doublereal *, integer *),
+	     dlabad_(doublereal *, doublereal *);
+
+    extern integer idamax_(integer *, doublereal *, integer *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static real bignum;
-    extern integer isamax_(integer *, real *, integer *);
+    static doublereal bignum;
     static logical rightv;
-    static real smlnum;
+    static doublereal smlnum;
 
 
 /*
@@ -99278,7 +34037,7 @@ L180:
     Purpose
     =======
 
-    STREVC computes some or all of the right and/or left eigenvectors of
+    DTREVC computes some or all of the right and/or left eigenvectors of
     a real upper quasi-triangular matrix T.
 
     The right eigenvector x and the left eigenvector y of T corresponding
@@ -99295,7 +34054,7 @@ L180:
     original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
     right or left eigenvectors of A.
 
-    T must be in Schur canonical form (as returned by SHSEQR), that is,
+    T must be in Schur canonical form (as returned by DHSEQR), that is,
     block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
     2-by-2 diagonal block has its diagonal elements equal and its
     off-diagonal elements of opposite sign.  Corresponding to each 2-by-2
@@ -99333,16 +34092,16 @@ L180:
     N       (input) INTEGER
             The order of the matrix T. N >= 0.
 
-    T       (input) REAL array, dimension (LDT,N)
+    T       (input) DOUBLE PRECISION array, dimension (LDT,N)
             The upper quasi-triangular matrix T in Schur canonical form.
 
     LDT     (input) INTEGER
             The leading dimension of the array T. LDT >= max(1,N).
 
-    VL      (input/output) REAL array, dimension (LDVL,MM)
+    VL      (input/output) DOUBLE PRECISION array, dimension (LDVL,MM)
             On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
             contain an N-by-N matrix Q (usually the orthogonal matrix Q
-            of Schur vectors returned by SHSEQR).
+            of Schur vectors returned by DHSEQR).
             On exit, if SIDE = 'L' or 'B', VL contains:
             if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
                              VL has the same quasi-lower triangular form
@@ -99368,10 +34127,10 @@ L180:
             The leading dimension of the array VL.  LDVL >= max(1,N) if
             SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
 
-    VR      (input/output) REAL array, dimension (LDVR,MM)
+    VR      (input/output) DOUBLE PRECISION array, dimension (LDVR,MM)
             On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
             contain an N-by-N matrix Q (usually the orthogonal matrix Q
-            of Schur vectors returned by SHSEQR).
+            of Schur vectors returned by DHSEQR).
             On exit, if SIDE = 'R' or 'B', VR contains:
             if HOWMNY = 'A', the matrix X of right eigenvectors of T;
                              VR has the same quasi-upper triangular form
@@ -99407,7 +34166,7 @@ L180:
             Each selected real eigenvector occupies one column and each
             selected complex eigenvector occupies two columns.
 
-    WORK    (workspace) REAL array, dimension (3*N)
+    WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
 
     INFO    (output) INTEGER
             = 0:  successful exit
@@ -99417,7 +34176,7 @@ L180:
     ===============
 
     The algorithm used in this program is basically backward (forward)
-    substitution, with scaling to make the code robust against
+    substitution, with scaling to make the the code robust against
     possible overflow.
 
     Each eigenvector is normalized so that the element of largest
@@ -99445,8 +34204,8 @@ L180:
 
     /* Function Body */
     bothv = lsame_(side, "B");
-    rightv = (lsame_(side, "R")) || (bothv);
-    leftv = (lsame_(side, "L")) || (bothv);
+    rightv = lsame_(side, "R") || bothv;
+    leftv = lsame_(side, "L") || bothv;
 
     allv = lsame_(howmny, "A");
     over = lsame_(howmny, "B");
@@ -99461,9 +34220,9 @@ L180:
 	*info = -4;
     } else if (*ldt < max(1,*n)) {
 	*info = -6;
-    } else if ((*ldvl < 1) || (leftv && *ldvl < *n)) {
+    } else if (*ldvl < 1 || leftv && *ldvl < *n) {
 	*info = -8;
-    } else if ((*ldvr < 1) || (rightv && *ldvr < *n)) {
+    } else if (*ldvr < 1 || rightv && *ldvr < *n) {
 	*info = -10;
     } else {
 
@@ -99483,13 +34242,13 @@ L180:
 		    select[j] = FALSE_;
 		} else {
 		    if (j < *n) {
-			if (t[j + 1 + j * t_dim1] == 0.f) {
+			if (t[j + 1 + j * t_dim1] == 0.) {
 			    if (select[j]) {
 				++(*m);
 			    }
 			} else {
 			    pair = TRUE_;
-			    if ((select[j]) || (select[j + 1])) {
+			    if (select[j] || select[j + 1]) {
 				select[j] = TRUE_;
 				*m += 2;
 			    }
@@ -99512,7 +34271,7 @@ L180:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("STREVC", &i__1);
+	xerbla_("DTREVC", &i__1);
 	return 0;
     }
 
@@ -99524,25 +34283,25 @@ L180:
 
 /*     Set the constants to control overflow. */
 
-    unfl = slamch_("Safe minimum");
-    ovfl = 1.f / unfl;
-    slabad_(&unfl, &ovfl);
-    ulp = slamch_("Precision");
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
     smlnum = unfl * (*n / ulp);
-    bignum = (1.f - ulp) / smlnum;
+    bignum = (1. - ulp) / smlnum;
 
 /*
        Compute 1-norm of each column of strictly upper triangular
        part of T to control overflow in triangular solver.
 */
 
-    work[1] = 0.f;
+    work[1] = 0.;
     i__1 = *n;
     for (j = 2; j <= i__1; ++j) {
-	work[j] = 0.f;
+	work[j] = 0.;
 	i__2 = j - 1;
 	for (i__ = 1; i__ <= i__2; ++i__) {
-	    work[j] += (r__1 = t[i__ + j * t_dim1], dabs(r__1));
+	    work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1));
 /* L20: */
 	}
 /* L30: */
@@ -99555,7 +34314,7 @@ L180:
              -1, second of conjugate complex pair: (wr,wi)
 */
 
-    n2 = (*n) << (1);
+    n2 = *n << 1;
 
     if (rightv) {
 
@@ -99571,7 +34330,7 @@ L180:
 	    if (ki == 1) {
 		goto L40;
 	    }
-	    if (t[ki + (ki - 1) * t_dim1] == 0.f) {
+	    if (t[ki + (ki - 1) * t_dim1] == 0.) {
 		goto L40;
 	    }
 	    ip = -1;
@@ -99592,20 +34351,20 @@ L40:
 /*           Compute the KI-th eigenvalue (WR,WI). */
 
 	    wr = t[ki + ki * t_dim1];
-	    wi = 0.f;
+	    wi = 0.;
 	    if (ip != 0) {
-		wi = sqrt((r__1 = t[ki + (ki - 1) * t_dim1], dabs(r__1))) *
-			sqrt((r__2 = t[ki - 1 + ki * t_dim1], dabs(r__2)));
+		wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) *
+			sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2)));
 	    }
 /* Computing MAX */
-	    r__1 = ulp * (dabs(wr) + dabs(wi));
-	    smin = dmax(r__1,smlnum);
+	    d__1 = ulp * (abs(wr) + abs(wi));
+	    smin = max(d__1,smlnum);
 
 	    if (ip == 0) {
 
 /*              Real right eigenvector */
 
-		work[ki + *n] = 1.f;
+		work[ki + *n] = 1.;
 
 /*              Form right-hand side */
 
@@ -99629,7 +34388,7 @@ L40:
 		    j2 = j;
 		    jnxt = j - 1;
 		    if (j > 1) {
-			if (t[j + (j - 1) * t_dim1] != 0.f) {
+			if (t[j + (j - 1) * t_dim1] != 0.) {
 			    j1 = j - 1;
 			    jnxt = j - 2;
 			}
@@ -99639,17 +34398,17 @@ L40:
 
 /*                    1-by-1 diagonal block */
 
-			slaln2_(&c_false, &c__1, &c__1, &smin, &c_b1011, &t[j
-				+ j * t_dim1], ldt, &c_b1011, &c_b1011, &work[
-				j + *n], n, &wr, &c_b320, x, &c__2, &scale, &
-				xnorm, &ierr);
+			dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b15, &t[j +
+				j * t_dim1], ldt, &c_b15, &c_b15, &work[j + *
+				n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm,
+				&ierr);
 
 /*
                       Scale X(1,1) to avoid overflow when updating
                       the right-hand side.
 */
 
-			if (xnorm > 1.f) {
+			if (xnorm > 1.) {
 			    if (work[j] > bignum / xnorm) {
 				x[0] /= xnorm;
 				scale /= xnorm;
@@ -99658,36 +34417,36 @@ L40:
 
 /*                    Scale if necessary */
 
-			if (scale != 1.f) {
-			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
+			if (scale != 1.) {
+			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
 			}
 			work[j + *n] = x[0];
 
 /*                    Update right-hand side */
 
 			i__1 = j - 1;
-			r__1 = -x[0];
-			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+			d__1 = -x[0];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
 				*n + 1], &c__1);
 
 		    } else {
 
 /*                    2-by-2 diagonal block */
 
-			slaln2_(&c_false, &c__2, &c__1, &smin, &c_b1011, &t[j
-				- 1 + (j - 1) * t_dim1], ldt, &c_b1011, &
-				c_b1011, &work[j - 1 + *n], n, &wr, &c_b320,
-				x, &c__2, &scale, &xnorm, &ierr);
+			dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b15, &t[j -
+				1 + (j - 1) * t_dim1], ldt, &c_b15, &c_b15, &
+				work[j - 1 + *n], n, &wr, &c_b29, x, &c__2, &
+				scale, &xnorm, &ierr);
 
 /*
                       Scale X(1,1) and X(2,1) to avoid overflow when
                       updating the right-hand side.
 */
 
-			if (xnorm > 1.f) {
+			if (xnorm > 1.) {
 /* Computing MAX */
-			    r__1 = work[j - 1], r__2 = work[j];
-			    beta = dmax(r__1,r__2);
+			    d__1 = work[j - 1], d__2 = work[j];
+			    beta = max(d__1,d__2);
 			    if (beta > bignum / xnorm) {
 				x[0] /= xnorm;
 				x[1] /= xnorm;
@@ -99697,8 +34456,8 @@ L40:
 
 /*                    Scale if necessary */
 
-			if (scale != 1.f) {
-			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
+			if (scale != 1.) {
+			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
 			}
 			work[j - 1 + *n] = x[0];
 			work[j + *n] = x[1];
@@ -99706,12 +34465,12 @@ L40:
 /*                    Update right-hand side */
 
 			i__1 = j - 2;
-			r__1 = -x[0];
-			saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+			d__1 = -x[0];
+			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
 				&work[*n + 1], &c__1);
 			i__1 = j - 2;
-			r__1 = -x[1];
-			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+			d__1 = -x[1];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
 				*n + 1], &c__1);
 		    }
 L60:
@@ -99721,29 +34480,29 @@ L60:
 /*              Copy the vector x or Q*x to VR and normalize. */
 
 		if (! over) {
-		    scopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], &
+		    dcopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], &
 			    c__1);
 
-		    ii = isamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
-		    remax = 1.f / (r__1 = vr[ii + is * vr_dim1], dabs(r__1));
-		    sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+		    ii = idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
+		    remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1));
+		    dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
 
 		    i__1 = *n;
 		    for (k = ki + 1; k <= i__1; ++k) {
-			vr[k + is * vr_dim1] = 0.f;
+			vr[k + is * vr_dim1] = 0.;
 /* L70: */
 		    }
 		} else {
 		    if (ki > 1) {
 			i__1 = ki - 1;
-			sgemv_("N", n, &i__1, &c_b1011, &vr[vr_offset], ldvr,
-				&work[*n + 1], &c__1, &work[ki + *n], &vr[ki *
-				 vr_dim1 + 1], &c__1);
+			dgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, &
+				work[*n + 1], &c__1, &work[ki + *n], &vr[ki *
+				vr_dim1 + 1], &c__1);
 		    }
 
-		    ii = isamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
-		    remax = 1.f / (r__1 = vr[ii + ki * vr_dim1], dabs(r__1));
-		    sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+		    ii = idamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
+		    remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1));
+		    dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
 		}
 
 	    } else {
@@ -99756,16 +34515,16 @@ L60:
                   [ (T(KI,KI-1)   T(KI,KI)   )               ]
 */
 
-		if ((r__1 = t[ki - 1 + ki * t_dim1], dabs(r__1)) >= (r__2 = t[
-			ki + (ki - 1) * t_dim1], dabs(r__2))) {
-		    work[ki - 1 + *n] = 1.f;
+		if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= (d__2 = t[
+			ki + (ki - 1) * t_dim1], abs(d__2))) {
+		    work[ki - 1 + *n] = 1.;
 		    work[ki + n2] = wi / t[ki - 1 + ki * t_dim1];
 		} else {
 		    work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1];
-		    work[ki + n2] = 1.f;
+		    work[ki + n2] = 1.;
 		}
-		work[ki + *n] = 0.f;
-		work[ki - 1 + n2] = 0.f;
+		work[ki + *n] = 0.;
+		work[ki - 1 + n2] = 0.;
 
 /*              Form right-hand side */
 
@@ -99791,7 +34550,7 @@ L60:
 		    j2 = j;
 		    jnxt = j - 1;
 		    if (j > 1) {
-			if (t[j + (j - 1) * t_dim1] != 0.f) {
+			if (t[j + (j - 1) * t_dim1] != 0.) {
 			    j1 = j - 1;
 			    jnxt = j - 2;
 			}
@@ -99801,17 +34560,17 @@ L60:
 
 /*                    1-by-1 diagonal block */
 
-			slaln2_(&c_false, &c__1, &c__2, &smin, &c_b1011, &t[j
-				+ j * t_dim1], ldt, &c_b1011, &c_b1011, &work[
-				j + *n], n, &wr, &wi, x, &c__2, &scale, &
-				xnorm, &ierr);
+			dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b15, &t[j +
+				j * t_dim1], ldt, &c_b15, &c_b15, &work[j + *
+				n], n, &wr, &wi, x, &c__2, &scale, &xnorm, &
+				ierr);
 
 /*
                       Scale X(1,1) and X(1,2) to avoid overflow when
                       updating the right-hand side.
 */
 
-			if (xnorm > 1.f) {
+			if (xnorm > 1.) {
 			    if (work[j] > bignum / xnorm) {
 				x[0] /= xnorm;
 				x[2] /= xnorm;
@@ -99821,9 +34580,9 @@ L60:
 
 /*                    Scale if necessary */
 
-			if (scale != 1.f) {
-			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
-			    sscal_(&ki, &scale, &work[n2 + 1], &c__1);
+			if (scale != 1.) {
+			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
+			    dscal_(&ki, &scale, &work[n2 + 1], &c__1);
 			}
 			work[j + *n] = x[0];
 			work[j + n2] = x[2];
@@ -99831,34 +34590,34 @@ L60:
 /*                    Update the right-hand side */
 
 			i__1 = j - 1;
-			r__1 = -x[0];
-			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+			d__1 = -x[0];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
 				*n + 1], &c__1);
 			i__1 = j - 1;
-			r__1 = -x[2];
-			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+			d__1 = -x[2];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
 				n2 + 1], &c__1);
 
 		    } else {
 
 /*                    2-by-2 diagonal block */
 
-			slaln2_(&c_false, &c__2, &c__2, &smin, &c_b1011, &t[j
-				- 1 + (j - 1) * t_dim1], ldt, &c_b1011, &
-				c_b1011, &work[j - 1 + *n], n, &wr, &wi, x, &
-				c__2, &scale, &xnorm, &ierr);
+			dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b15, &t[j -
+				1 + (j - 1) * t_dim1], ldt, &c_b15, &c_b15, &
+				work[j - 1 + *n], n, &wr, &wi, x, &c__2, &
+				scale, &xnorm, &ierr);
 
 /*
                       Scale X to avoid overflow when updating
                       the right-hand side.
 */
 
-			if (xnorm > 1.f) {
+			if (xnorm > 1.) {
 /* Computing MAX */
-			    r__1 = work[j - 1], r__2 = work[j];
-			    beta = dmax(r__1,r__2);
+			    d__1 = work[j - 1], d__2 = work[j];
+			    beta = max(d__1,d__2);
 			    if (beta > bignum / xnorm) {
-				rec = 1.f / xnorm;
+				rec = 1. / xnorm;
 				x[0] *= rec;
 				x[2] *= rec;
 				x[1] *= rec;
@@ -99869,9 +34628,9 @@ L60:
 
 /*                    Scale if necessary */
 
-			if (scale != 1.f) {
-			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
-			    sscal_(&ki, &scale, &work[n2 + 1], &c__1);
+			if (scale != 1.) {
+			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
+			    dscal_(&ki, &scale, &work[n2 + 1], &c__1);
 			}
 			work[j - 1 + *n] = x[0];
 			work[j + *n] = x[1];
@@ -99881,20 +34640,20 @@ L60:
 /*                    Update the right-hand side */
 
 			i__1 = j - 2;
-			r__1 = -x[0];
-			saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+			d__1 = -x[0];
+			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
 				&work[*n + 1], &c__1);
 			i__1 = j - 2;
-			r__1 = -x[1];
-			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+			d__1 = -x[1];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
 				*n + 1], &c__1);
 			i__1 = j - 2;
-			r__1 = -x[2];
-			saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+			d__1 = -x[2];
+			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
 				&work[n2 + 1], &c__1);
 			i__1 = j - 2;
-			r__1 = -x[3];
-			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+			d__1 = -x[3];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
 				n2 + 1], &c__1);
 		    }
 L90:
@@ -99904,30 +34663,30 @@ L90:
 /*              Copy the vector x or Q*x to VR and normalize. */
 
 		if (! over) {
-		    scopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1
+		    dcopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1
 			    + 1], &c__1);
-		    scopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], &
+		    dcopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], &
 			    c__1);
 
-		    emax = 0.f;
+		    emax = 0.;
 		    i__1 = ki;
 		    for (k = 1; k <= i__1; ++k) {
 /* Computing MAX */
-			r__3 = emax, r__4 = (r__1 = vr[k + (is - 1) * vr_dim1]
-				, dabs(r__1)) + (r__2 = vr[k + is * vr_dim1],
-				dabs(r__2));
-			emax = dmax(r__3,r__4);
+			d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1]
+				, abs(d__1)) + (d__2 = vr[k + is * vr_dim1],
+				abs(d__2));
+			emax = max(d__3,d__4);
 /* L100: */
 		    }
 
-		    remax = 1.f / emax;
-		    sscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1);
-		    sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+		    remax = 1. / emax;
+		    dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1);
+		    dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
 
 		    i__1 = *n;
 		    for (k = ki + 1; k <= i__1; ++k) {
-			vr[k + (is - 1) * vr_dim1] = 0.f;
-			vr[k + is * vr_dim1] = 0.f;
+			vr[k + (is - 1) * vr_dim1] = 0.;
+			vr[k + is * vr_dim1] = 0.;
 /* L110: */
 		    }
 
@@ -99935,33 +34694,33 @@ L90:
 
 		    if (ki > 2) {
 			i__1 = ki - 2;
-			sgemv_("N", n, &i__1, &c_b1011, &vr[vr_offset], ldvr,
-				&work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[
-				(ki - 1) * vr_dim1 + 1], &c__1);
+			dgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, &
+				work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[(
+				ki - 1) * vr_dim1 + 1], &c__1);
 			i__1 = ki - 2;
-			sgemv_("N", n, &i__1, &c_b1011, &vr[vr_offset], ldvr,
-				&work[n2 + 1], &c__1, &work[ki + n2], &vr[ki *
-				 vr_dim1 + 1], &c__1);
+			dgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, &
+				work[n2 + 1], &c__1, &work[ki + n2], &vr[ki *
+				vr_dim1 + 1], &c__1);
 		    } else {
-			sscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1
+			dscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1
 				+ 1], &c__1);
-			sscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], &
+			dscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], &
 				c__1);
 		    }
 
-		    emax = 0.f;
+		    emax = 0.;
 		    i__1 = *n;
 		    for (k = 1; k <= i__1; ++k) {
 /* Computing MAX */
-			r__3 = emax, r__4 = (r__1 = vr[k + (ki - 1) * vr_dim1]
-				, dabs(r__1)) + (r__2 = vr[k + ki * vr_dim1],
-				dabs(r__2));
-			emax = dmax(r__3,r__4);
+			d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1]
+				, abs(d__1)) + (d__2 = vr[k + ki * vr_dim1],
+				abs(d__2));
+			emax = max(d__3,d__4);
 /* L120: */
 		    }
-		    remax = 1.f / emax;
-		    sscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1);
-		    sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+		    remax = 1. / emax;
+		    dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1);
+		    dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
 		}
 	    }
 
@@ -99995,7 +34754,7 @@ L130:
 	    if (ki == *n) {
 		goto L150;
 	    }
-	    if (t[ki + 1 + ki * t_dim1] == 0.f) {
+	    if (t[ki + 1 + ki * t_dim1] == 0.) {
 		goto L150;
 	    }
 	    ip = 1;
@@ -100010,20 +34769,20 @@ L150:
 /*           Compute the KI-th eigenvalue (WR,WI). */
 
 	    wr = t[ki + ki * t_dim1];
-	    wi = 0.f;
+	    wi = 0.;
 	    if (ip != 0) {
-		wi = sqrt((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1))) *
-			sqrt((r__2 = t[ki + 1 + ki * t_dim1], dabs(r__2)));
+		wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) *
+			sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2)));
 	    }
 /* Computing MAX */
-	    r__1 = ulp * (dabs(wr) + dabs(wi));
-	    smin = dmax(r__1,smlnum);
+	    d__1 = ulp * (abs(wr) + abs(wi));
+	    smin = max(d__1,smlnum);
 
 	    if (ip == 0) {
 
 /*              Real left eigenvector. */
 
-		work[ki + *n] = 1.f;
+		work[ki + *n] = 1.;
 
 /*              Form right-hand side */
 
@@ -100038,7 +34797,7 @@ L150:
                    (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK
 */
 
-		vmax = 1.f;
+		vmax = 1.;
 		vcrit = bignum;
 
 		jnxt = ki + 1;
@@ -100051,7 +34810,7 @@ L150:
 		    j2 = j;
 		    jnxt = j + 1;
 		    if (j < *n) {
-			if (t[j + 1 + j * t_dim1] != 0.f) {
+			if (t[j + 1 + j * t_dim1] != 0.) {
 			    j2 = j + 1;
 			    jnxt = j + 2;
 			}
@@ -100067,34 +34826,34 @@ L150:
 */
 
 			if (work[j] > vcrit) {
-			    rec = 1.f / vmax;
+			    rec = 1. / vmax;
 			    i__3 = *n - ki + 1;
-			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
-			    vmax = 1.f;
+			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    vmax = 1.;
 			    vcrit = bignum;
 			}
 
 			i__3 = j - ki - 1;
-			work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1],
+			work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1],
 				&c__1, &work[ki + 1 + *n], &c__1);
 
 /*                    Solve (T(J,J)-WR)'*X = WORK */
 
-			slaln2_(&c_false, &c__1, &c__1, &smin, &c_b1011, &t[j
-				+ j * t_dim1], ldt, &c_b1011, &c_b1011, &work[
-				j + *n], n, &wr, &c_b320, x, &c__2, &scale, &
-				xnorm, &ierr);
+			dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b15, &t[j +
+				j * t_dim1], ldt, &c_b15, &c_b15, &work[j + *
+				n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm,
+				&ierr);
 
 /*                    Scale if necessary */
 
-			if (scale != 1.f) {
+			if (scale != 1.) {
 			    i__3 = *n - ki + 1;
-			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
 			}
 			work[j + *n] = x[0];
 /* Computing MAX */
-			r__2 = (r__1 = work[j + *n], dabs(r__1));
-			vmax = dmax(r__2,vmax);
+			d__2 = (d__1 = work[j + *n], abs(d__1));
+			vmax = max(d__2,vmax);
 			vcrit = bignum / vmax;
 
 		    } else {
@@ -100107,22 +34866,22 @@ L150:
 
    Computing MAX
 */
-			r__1 = work[j], r__2 = work[j + 1];
-			beta = dmax(r__1,r__2);
+			d__1 = work[j], d__2 = work[j + 1];
+			beta = max(d__1,d__2);
 			if (beta > vcrit) {
-			    rec = 1.f / vmax;
+			    rec = 1. / vmax;
 			    i__3 = *n - ki + 1;
-			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
-			    vmax = 1.f;
+			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    vmax = 1.;
 			    vcrit = bignum;
 			}
 
 			i__3 = j - ki - 1;
-			work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1],
+			work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1],
 				&c__1, &work[ki + 1 + *n], &c__1);
 
 			i__3 = j - ki - 1;
-			work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 1 + (j + 1) *
+			work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 1 + (j + 1) *
 				 t_dim1], &c__1, &work[ki + 1 + *n], &c__1);
 
 /*
@@ -100131,25 +34890,25 @@ L150:
                         [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 )
 */
 
-			slaln2_(&c_true, &c__2, &c__1, &smin, &c_b1011, &t[j
-				+ j * t_dim1], ldt, &c_b1011, &c_b1011, &work[
-				j + *n], n, &wr, &c_b320, x, &c__2, &scale, &
-				xnorm, &ierr);
+			dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b15, &t[j +
+				j * t_dim1], ldt, &c_b15, &c_b15, &work[j + *
+				n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm,
+				&ierr);
 
 /*                    Scale if necessary */
 
-			if (scale != 1.f) {
+			if (scale != 1.) {
 			    i__3 = *n - ki + 1;
-			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
 			}
 			work[j + *n] = x[0];
 			work[j + 1 + *n] = x[1];
 
 /* Computing MAX */
-			r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = (
-				r__2 = work[j + 1 + *n], dabs(r__2)), r__3 =
-				max(r__3,r__4);
-			vmax = dmax(r__3,vmax);
+			d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2
+				= work[j + 1 + *n], abs(d__2)), d__3 = max(
+				d__3,d__4);
+			vmax = max(d__3,vmax);
 			vcrit = bignum / vmax;
 
 		    }
@@ -100161,19 +34920,19 @@ L170:
 
 		if (! over) {
 		    i__2 = *n - ki + 1;
-		    scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
+		    dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
 			    vl_dim1], &c__1);
 
 		    i__2 = *n - ki + 1;
-		    ii = isamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki -
+		    ii = idamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki -
 			    1;
-		    remax = 1.f / (r__1 = vl[ii + is * vl_dim1], dabs(r__1));
+		    remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1));
 		    i__2 = *n - ki + 1;
-		    sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+		    dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
 
 		    i__2 = ki - 1;
 		    for (k = 1; k <= i__2; ++k) {
-			vl[k + is * vl_dim1] = 0.f;
+			vl[k + is * vl_dim1] = 0.;
 /* L180: */
 		    }
 
@@ -100181,14 +34940,14 @@ L170:
 
 		    if (ki < *n) {
 			i__2 = *n - ki;
-			sgemv_("N", n, &i__2, &c_b1011, &vl[(ki + 1) *
-				vl_dim1 + 1], ldvl, &work[ki + 1 + *n], &c__1,
-				 &work[ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
+			dgemv_("N", n, &i__2, &c_b15, &vl[(ki + 1) * vl_dim1
+				+ 1], ldvl, &work[ki + 1 + *n], &c__1, &work[
+				ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
 		    }
 
-		    ii = isamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
-		    remax = 1.f / (r__1 = vl[ii + ki * vl_dim1], dabs(r__1));
-		    sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+		    ii = idamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
+		    remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1));
+		    dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
 
 		}
 
@@ -100202,16 +34961,16 @@ L170:
                    ((T(KI+1,KI) T(KI+1,KI+1))                )
 */
 
-		if ((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1)) >= (r__2 =
-			t[ki + 1 + ki * t_dim1], dabs(r__2))) {
+		if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= (d__2 =
+			t[ki + 1 + ki * t_dim1], abs(d__2))) {
 		    work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1];
-		    work[ki + 1 + n2] = 1.f;
+		    work[ki + 1 + n2] = 1.;
 		} else {
-		    work[ki + *n] = 1.f;
+		    work[ki + *n] = 1.;
 		    work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1];
 		}
-		work[ki + 1 + *n] = 0.f;
-		work[ki + n2] = 0.f;
+		work[ki + 1 + *n] = 0.;
+		work[ki + n2] = 0.;
 
 /*              Form right-hand side */
 
@@ -100228,7 +34987,7 @@ L170:
                 ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
 */
 
-		vmax = 1.f;
+		vmax = 1.;
 		vcrit = bignum;
 
 		jnxt = ki + 2;
@@ -100241,7 +35000,7 @@ L170:
 		    j2 = j;
 		    jnxt = j + 1;
 		    if (j < *n) {
-			if (t[j + 1 + j * t_dim1] != 0.f) {
+			if (t[j + 1 + j * t_dim1] != 0.) {
 			    j2 = j + 1;
 			    jnxt = j + 2;
 			}
@@ -100257,45 +35016,45 @@ L170:
 */
 
 			if (work[j] > vcrit) {
-			    rec = 1.f / vmax;
+			    rec = 1. / vmax;
 			    i__3 = *n - ki + 1;
-			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
 			    i__3 = *n - ki + 1;
-			    sscal_(&i__3, &rec, &work[ki + n2], &c__1);
-			    vmax = 1.f;
+			    dscal_(&i__3, &rec, &work[ki + n2], &c__1);
+			    vmax = 1.;
 			    vcrit = bignum;
 			}
 
 			i__3 = j - ki - 2;
-			work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1],
+			work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
 				&c__1, &work[ki + 2 + *n], &c__1);
 			i__3 = j - ki - 2;
-			work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1],
+			work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
 				&c__1, &work[ki + 2 + n2], &c__1);
 
 /*                    Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */
 
-			r__1 = -wi;
-			slaln2_(&c_false, &c__1, &c__2, &smin, &c_b1011, &t[j
-				+ j * t_dim1], ldt, &c_b1011, &c_b1011, &work[
-				j + *n], n, &wr, &r__1, x, &c__2, &scale, &
-				xnorm, &ierr);
+			d__1 = -wi;
+			dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b15, &t[j +
+				j * t_dim1], ldt, &c_b15, &c_b15, &work[j + *
+				n], n, &wr, &d__1, x, &c__2, &scale, &xnorm, &
+				ierr);
 
 /*                    Scale if necessary */
 
-			if (scale != 1.f) {
+			if (scale != 1.) {
 			    i__3 = *n - ki + 1;
-			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
 			    i__3 = *n - ki + 1;
-			    sscal_(&i__3, &scale, &work[ki + n2], &c__1);
+			    dscal_(&i__3, &scale, &work[ki + n2], &c__1);
 			}
 			work[j + *n] = x[0];
 			work[j + n2] = x[2];
 /* Computing MAX */
-			r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = (
-				r__2 = work[j + n2], dabs(r__2)), r__3 = max(
-				r__3,r__4);
-			vmax = dmax(r__3,vmax);
+			d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2
+				= work[j + n2], abs(d__2)), d__3 = max(d__3,
+				d__4);
+			vmax = max(d__3,vmax);
 			vcrit = bignum / vmax;
 
 		    } else {
@@ -100308,32 +35067,32 @@ L170:
 
    Computing MAX
 */
-			r__1 = work[j], r__2 = work[j + 1];
-			beta = dmax(r__1,r__2);
+			d__1 = work[j], d__2 = work[j + 1];
+			beta = max(d__1,d__2);
 			if (beta > vcrit) {
-			    rec = 1.f / vmax;
+			    rec = 1. / vmax;
 			    i__3 = *n - ki + 1;
-			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
 			    i__3 = *n - ki + 1;
-			    sscal_(&i__3, &rec, &work[ki + n2], &c__1);
-			    vmax = 1.f;
+			    dscal_(&i__3, &rec, &work[ki + n2], &c__1);
+			    vmax = 1.;
 			    vcrit = bignum;
 			}
 
 			i__3 = j - ki - 2;
-			work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1],
+			work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
 				&c__1, &work[ki + 2 + *n], &c__1);
 
 			i__3 = j - ki - 2;
-			work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1],
+			work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
 				&c__1, &work[ki + 2 + n2], &c__1);
 
 			i__3 = j - ki - 2;
-			work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 2 + (j + 1) *
+			work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 2 + (j + 1) *
 				 t_dim1], &c__1, &work[ki + 2 + *n], &c__1);
 
 			i__3 = j - ki - 2;
-			work[j + 1 + n2] -= sdot_(&i__3, &t[ki + 2 + (j + 1) *
+			work[j + 1 + n2] -= ddot_(&i__3, &t[ki + 2 + (j + 1) *
 				 t_dim1], &c__1, &work[ki + 2 + n2], &c__1);
 
 /*
@@ -100342,30 +35101,29 @@ L170:
                         ([T(j+1,j) T(j+1,j+1)]             )
 */
 
-			r__1 = -wi;
-			slaln2_(&c_true, &c__2, &c__2, &smin, &c_b1011, &t[j
-				+ j * t_dim1], ldt, &c_b1011, &c_b1011, &work[
-				j + *n], n, &wr, &r__1, x, &c__2, &scale, &
-				xnorm, &ierr);
+			d__1 = -wi;
+			dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b15, &t[j +
+				j * t_dim1], ldt, &c_b15, &c_b15, &work[j + *
+				n], n, &wr, &d__1, x, &c__2, &scale, &xnorm, &
+				ierr);
 
 /*                    Scale if necessary */
 
-			if (scale != 1.f) {
+			if (scale != 1.) {
 			    i__3 = *n - ki + 1;
-			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
 			    i__3 = *n - ki + 1;
-			    sscal_(&i__3, &scale, &work[ki + n2], &c__1);
+			    dscal_(&i__3, &scale, &work[ki + n2], &c__1);
 			}
 			work[j + *n] = x[0];
 			work[j + n2] = x[2];
 			work[j + 1 + *n] = x[1];
 			work[j + 1 + n2] = x[3];
 /* Computing MAX */
-			r__1 = dabs(x[0]), r__2 = dabs(x[2]), r__1 = max(r__1,
-				r__2), r__2 = dabs(x[1]), r__1 = max(r__1,
-				r__2), r__2 = dabs(x[3]), r__1 = max(r__1,
-				r__2);
-			vmax = dmax(r__1,vmax);
+			d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1,
+				d__2), d__2 = abs(x[1]), d__1 = max(d__1,d__2)
+				, d__2 = abs(x[3]), d__1 = max(d__1,d__2);
+			vmax = max(d__1,vmax);
 			vcrit = bignum / vmax;
 
 		    }
@@ -100380,66 +35138,66 @@ L200:
 */
 		if (! over) {
 		    i__2 = *n - ki + 1;
-		    scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
+		    dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
 			    vl_dim1], &c__1);
 		    i__2 = *n - ki + 1;
-		    scopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) *
+		    dcopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) *
 			    vl_dim1], &c__1);
 
-		    emax = 0.f;
+		    emax = 0.;
 		    i__2 = *n;
 		    for (k = ki; k <= i__2; ++k) {
 /* Computing MAX */
-			r__3 = emax, r__4 = (r__1 = vl[k + is * vl_dim1],
-				dabs(r__1)) + (r__2 = vl[k + (is + 1) *
-				vl_dim1], dabs(r__2));
-			emax = dmax(r__3,r__4);
+			d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs(
+				d__1)) + (d__2 = vl[k + (is + 1) * vl_dim1],
+				abs(d__2));
+			emax = max(d__3,d__4);
 /* L220: */
 		    }
-		    remax = 1.f / emax;
+		    remax = 1. / emax;
 		    i__2 = *n - ki + 1;
-		    sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+		    dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
 		    i__2 = *n - ki + 1;
-		    sscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1)
+		    dscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1)
 			    ;
 
 		    i__2 = ki - 1;
 		    for (k = 1; k <= i__2; ++k) {
-			vl[k + is * vl_dim1] = 0.f;
-			vl[k + (is + 1) * vl_dim1] = 0.f;
+			vl[k + is * vl_dim1] = 0.;
+			vl[k + (is + 1) * vl_dim1] = 0.;
 /* L230: */
 		    }
 		} else {
 		    if (ki < *n - 1) {
 			i__2 = *n - ki - 1;
-			sgemv_("N", n, &i__2, &c_b1011, &vl[(ki + 2) *
-				vl_dim1 + 1], ldvl, &work[ki + 2 + *n], &c__1,
-				 &work[ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
+			dgemv_("N", n, &i__2, &c_b15, &vl[(ki + 2) * vl_dim1
+				+ 1], ldvl, &work[ki + 2 + *n], &c__1, &work[
+				ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
 			i__2 = *n - ki - 1;
-			sgemv_("N", n, &i__2, &c_b1011, &vl[(ki + 2) *
-				vl_dim1 + 1], ldvl, &work[ki + 2 + n2], &c__1,
-				 &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 +
-				1], &c__1);
+			dgemv_("N", n, &i__2, &c_b15, &vl[(ki + 2) * vl_dim1
+				+ 1], ldvl, &work[ki + 2 + n2], &c__1, &work[
+				ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + 1], &
+				c__1);
 		    } else {
-			sscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], &
+			dscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], &
 				c__1);
-			sscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1
+			dscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1
 				+ 1], &c__1);
 		    }
 
-		    emax = 0.f;
+		    emax = 0.;
 		    i__2 = *n;
 		    for (k = 1; k <= i__2; ++k) {
 /* Computing MAX */
-			r__3 = emax, r__4 = (r__1 = vl[k + ki * vl_dim1],
-				dabs(r__1)) + (r__2 = vl[k + (ki + 1) *
-				vl_dim1], dabs(r__2));
-			emax = dmax(r__3,r__4);
+			d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs(
+				d__1)) + (d__2 = vl[k + (ki + 1) * vl_dim1],
+				abs(d__2));
+			emax = max(d__3,d__4);
 /* L240: */
 		    }
-		    remax = 1.f / emax;
-		    sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
-		    sscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1);
+		    remax = 1. / emax;
+		    dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+		    dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1);
 
 		}
 
@@ -100462,371 +35220,770 @@ L250:
 
     }
 
-    return 0;
+    return 0;
+
+/*     End of DTREVC */
+
+} /* dtrevc_ */
+
+integer ieeeck_(integer *ispec, real *zero, real *one)
+{
+    /* System generated locals */
+    integer ret_val;
+
+    /* Local variables */
+    static real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro,
+	    newzro;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1998
+
+
+    Purpose
+    =======
+
+    IEEECK is called from the ILAENV to verify that Infinity and
+    possibly NaN arithmetic is safe (i.e. will not trap).
+
+    Arguments
+    =========
+
+    ISPEC   (input) INTEGER
+            Specifies whether to test just for inifinity arithmetic
+            or whether to test for infinity and NaN arithmetic.
+            = 0: Verify infinity arithmetic only.
+            = 1: Verify infinity and NaN arithmetic.
+
+    ZERO    (input) REAL
+            Must contain the value 0.0
+            This is passed to prevent the compiler from optimizing
+            away this code.
+
+    ONE     (input) REAL
+            Must contain the value 1.0
+            This is passed to prevent the compiler from optimizing
+            away this code.
+
+    RETURN VALUE:  INTEGER
+            = 0:  Arithmetic failed to produce the correct answers
+            = 1:  Arithmetic produced the correct answers
+*/
+
+    ret_val = 1;
+
+    posinf = *one / *zero;
+    if (posinf <= *one) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    neginf = -(*one) / *zero;
+    if (neginf >= *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    negzro = *one / (neginf + *one);
+    if (negzro != *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    neginf = *one / negzro;
+    if (neginf >= *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    newzro = negzro + *zero;
+    if (newzro != *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    posinf = *one / newzro;
+    if (posinf <= *one) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    neginf *= posinf;
+    if (neginf >= *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    posinf *= posinf;
+    if (posinf <= *one) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+
+/*     Return if we were only asked to check infinity arithmetic */
+
+    if (*ispec == 0) {
+	return ret_val;
+    }
+
+    nan1 = posinf + neginf;
+
+    nan2 = posinf / neginf;
+
+    nan3 = posinf / posinf;
+
+    nan4 = posinf * *zero;
+
+    nan5 = neginf * negzro;
+
+    nan6 = nan5 * 0.f;
+
+    if (nan1 == nan1) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan2 == nan2) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan3 == nan3) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan4 == nan4) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan5 == nan5) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan6 == nan6) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    return ret_val;
+} /* ieeeck_ */
+
+integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
+	integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen
+	opts_len)
+{
+    /* System generated locals */
+    integer ret_val;
+
+    /* Builtin functions */
+    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
+    integer s_cmp(char *, char *, ftnlen, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static char c1[1], c2[2], c3[3], c4[2];
+    static integer ic, nb, iz, nx;
+    static logical cname, sname;
+    static integer nbmin;
+    extern integer ieeeck_(integer *, real *, real *);
+    static char subnam[6];
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ILAENV is called from the LAPACK routines to choose problem-dependent
+    parameters for the local environment.  See ISPEC for a description of
+    the parameters.
+
+    This version provides a set of parameters which should give good,
+    but not optimal, performance on many of the currently available
+    computers.  Users are encouraged to modify this subroutine to set
+    the tuning parameters for their particular machine using the option
+    and problem size information in the arguments.
+
+    This routine will not function correctly if it is converted to all
+    lower case.  Converting it to all upper case is allowed.
+
+    Arguments
+    =========
+
+    ISPEC   (input) INTEGER
+            Specifies the parameter to be returned as the value of
+            ILAENV.
+            = 1: the optimal blocksize; if this value is 1, an unblocked
+                 algorithm will give the best performance.
+            = 2: the minimum block size for which the block routine
+                 should be used; if the usable block size is less than
+                 this value, an unblocked routine should be used.
+            = 3: the crossover point (in a block routine, for N less
+                 than this value, an unblocked routine should be used)
+            = 4: the number of shifts, used in the nonsymmetric
+                 eigenvalue routines
+            = 5: the minimum column dimension for blocking to be used;
+                 rectangular blocks must have dimension at least k by m,
+                 where k is given by ILAENV(2,...) and m by ILAENV(5,...)
+            = 6: the crossover point for the SVD (when reducing an m by n
+                 matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds
+                 this value, a QR factorization is used first to reduce
+                 the matrix to a triangular form.)
+            = 7: the number of processors
+            = 8: the crossover point for the multishift QR and QZ methods
+                 for nonsymmetric eigenvalue problems.
+            = 9: maximum size of the subproblems at the bottom of the
+                 computation tree in the divide-and-conquer algorithm
+                 (used by xGELSD and xGESDD)
+            =10: ieee NaN arithmetic can be trusted not to trap
+            =11: infinity arithmetic can be trusted not to trap
+
+    NAME    (input) CHARACTER*(*)
+            The name of the calling subroutine, in either upper case or
+            lower case.
+
+    OPTS    (input) CHARACTER*(*)
+            The character options to the subroutine NAME, concatenated
+            into a single character string.  For example, UPLO = 'U',
+            TRANS = 'T', and DIAG = 'N' for a triangular routine would
+            be specified as OPTS = 'UTN'.
+
+    N1      (input) INTEGER
+    N2      (input) INTEGER
+    N3      (input) INTEGER
+    N4      (input) INTEGER
+            Problem dimensions for the subroutine NAME; these may not all
+            be required.
+
+   (ILAENV) (output) INTEGER
+            >= 0: the value of the parameter specified by ISPEC
+            < 0:  if ILAENV = -k, the k-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The following conventions have been used when calling ILAENV from the
+    LAPACK routines:
+    1)  OPTS is a concatenation of all of the character options to
+        subroutine NAME, in the same order that they appear in the
+        argument list for NAME, even if they are not used in determining
+        the value of the parameter specified by ISPEC.
+    2)  The problem dimensions N1, N2, N3, N4 are specified in the order
+        that they appear in the argument list for NAME.  N1 is used
+        first, N2 second, and so on, and unused problem dimensions are
+        passed a value of -1.
+    3)  The parameter value returned by ILAENV is checked for validity in
+        the calling subroutine.  For example, ILAENV is used to retrieve
+        the optimal blocksize for STRTRI as follows:
+
+        NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 )
+        IF( NB.LE.1 ) NB = MAX( 1, N )
 
-/*     End of STREVC */
+    =====================================================================
+*/
 
-} /* strevc_ */
 
-/* Subroutine */ int strti2_(char *uplo, char *diag, integer *n, real *a,
-	integer *lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
+    switch (*ispec) {
+	case 1:  goto L100;
+	case 2:  goto L100;
+	case 3:  goto L100;
+	case 4:  goto L400;
+	case 5:  goto L500;
+	case 6:  goto L600;
+	case 7:  goto L700;
+	case 8:  goto L800;
+	case 9:  goto L900;
+	case 10:  goto L1000;
+	case 11:  goto L1100;
+    }
 
-    /* Local variables */
-    static integer j;
-    static real ajj;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int strmv_(char *, char *, char *, integer *,
-	    real *, integer *, real *, integer *),
-	    xerbla_(char *, integer *);
-    static logical nounit;
+/*     Invalid value for ISPEC */
 
+    ret_val = -1;
+    return ret_val;
 
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+L100:
 
+/*     Convert NAME to upper case if the first character is lower case. */
 
-    Purpose
-    =======
+    ret_val = 1;
+    s_copy(subnam, name__, (ftnlen)6, name_len);
+    ic = *(unsigned char *)subnam;
+    iz = 'Z';
+    if (iz == 90 || iz == 122) {
 
-    STRTI2 computes the inverse of a real upper or lower triangular
-    matrix.
+/*        ASCII character set */
 
-    This is the Level 2 BLAS version of the algorithm.
+	if (ic >= 97 && ic <= 122) {
+	    *(unsigned char *)subnam = (char) (ic - 32);
+	    for (i__ = 2; i__ <= 6; ++i__) {
+		ic = *(unsigned char *)&subnam[i__ - 1];
+		if (ic >= 97 && ic <= 122) {
+		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
+		}
+/* L10: */
+	    }
+	}
 
-    Arguments
-    =========
+    } else if (iz == 233 || iz == 169) {
 
-    UPLO    (input) CHARACTER*1
-            Specifies whether the matrix A is upper or lower triangular.
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
+/*        EBCDIC character set */
 
-    DIAG    (input) CHARACTER*1
-            Specifies whether or not the matrix A is unit triangular.
-            = 'N':  Non-unit triangular
-            = 'U':  Unit triangular
+	if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 &&
+		ic <= 169) {
+	    *(unsigned char *)subnam = (char) (ic + 64);
+	    for (i__ = 2; i__ <= 6; ++i__) {
+		ic = *(unsigned char *)&subnam[i__ - 1];
+		if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >=
+			162 && ic <= 169) {
+		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64);
+		}
+/* L20: */
+	    }
+	}
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+    } else if (iz == 218 || iz == 250) {
 
-    A       (input/output) REAL array, dimension (LDA,N)
-            On entry, the triangular matrix A.  If UPLO = 'U', the
-            leading n by n upper triangular part of the array A contains
-            the upper triangular matrix, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading n by n lower triangular part of the array A contains
-            the lower triangular matrix, and the strictly upper
-            triangular part of A is not referenced.  If DIAG = 'U', the
-            diagonal elements of A are also not referenced and are
-            assumed to be 1.
+/*        Prime machines:  ASCII+128 */
 
-            On exit, the (triangular) inverse of the original matrix, in
-            the same storage format.
+	if (ic >= 225 && ic <= 250) {
+	    *(unsigned char *)subnam = (char) (ic - 32);
+	    for (i__ = 2; i__ <= 6; ++i__) {
+		ic = *(unsigned char *)&subnam[i__ - 1];
+		if (ic >= 225 && ic <= 250) {
+		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
+		}
+/* L30: */
+	    }
+	}
+    }
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+    *(unsigned char *)c1 = *(unsigned char *)subnam;
+    sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D';
+    cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z';
+    if (! (cname || sname)) {
+	return ret_val;
+    }
+    s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2);
+    s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3);
+    s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2);
 
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
+    switch (*ispec) {
+	case 1:  goto L110;
+	case 2:  goto L200;
+	case 3:  goto L300;
+    }
 
-    =====================================================================
+L110:
 
+/*
+       ISPEC = 1:  block size
 
-       Test the input parameters.
+       In these examples, separate code is provided for setting NB for
+       real and complex.  We assume that NB will take the same value in
+       single or double precision.
 */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
+    nb = 1;
 
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    nounit = lsame_(diag, "N");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (! nounit && ! lsame_(diag, "U")) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("STRTI2", &i__1);
-	return 0;
+    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	} else if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3,
+		"RQF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)
+		3, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3)
+		== 0) {
+	    if (sname) {
+		nb = 32;
+	    } else {
+		nb = 32;
+	    }
+	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 32;
+	    } else {
+		nb = 32;
+	    }
+	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 32;
+	    } else {
+		nb = 32;
+	    }
+	} else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	}
+    } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	}
+    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	} else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 32;
+	} else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 64;
+	}
+    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 64;
+	} else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 32;
+	} else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 64;
+	}
+    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nb = 32;
+	    }
+	} else if (*(unsigned char *)c3 == 'M') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nb = 32;
+	    }
+	}
+    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nb = 32;
+	    }
+	} else if (*(unsigned char *)c3 == 'M') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nb = 32;
+	    }
+	}
+    } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		if (*n4 <= 64) {
+		    nb = 1;
+		} else {
+		    nb = 32;
+		}
+	    } else {
+		if (*n4 <= 64) {
+		    nb = 1;
+		} else {
+		    nb = 32;
+		}
+	    }
+	}
+    } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		if (*n2 <= 64) {
+		    nb = 1;
+		} else {
+		    nb = 32;
+		}
+	    } else {
+		if (*n2 <= 64) {
+		    nb = 1;
+		} else {
+		    nb = 32;
+		}
+	    }
+	}
+    } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	}
+    } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	}
+    } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 1;
+	}
     }
+    ret_val = nb;
+    return ret_val;
 
-    if (upper) {
+L200:
 
-/*        Compute inverse of upper triangular matrix. */
+/*     ISPEC = 2:  minimum block size */
 
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (nounit) {
-		a[j + j * a_dim1] = 1.f / a[j + j * a_dim1];
-		ajj = -a[j + j * a_dim1];
+    nbmin = 2;
+    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
+		ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, (
+		ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0)
+		 {
+	    if (sname) {
+		nbmin = 2;
 	    } else {
-		ajj = -1.f;
+		nbmin = 2;
+	    }
+	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nbmin = 2;
+	    } else {
+		nbmin = 2;
+	    }
+	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nbmin = 2;
+	    } else {
+		nbmin = 2;
+	    }
+	} else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nbmin = 2;
+	    } else {
+		nbmin = 2;
 	    }
-
-/*           Compute elements 1:j-1 of j-th column. */
-
-	    i__2 = j - 1;
-	    strmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, &
-		    a[j * a_dim1 + 1], &c__1);
-	    i__2 = j - 1;
-	    sscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1);
-/* L10: */
 	}
-    } else {
-
-/*        Compute inverse of lower triangular matrix. */
-
-	for (j = *n; j >= 1; --j) {
-	    if (nounit) {
-		a[j + j * a_dim1] = 1.f / a[j + j * a_dim1];
-		ajj = -a[j + j * a_dim1];
+    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nbmin = 8;
 	    } else {
-		ajj = -1.f;
+		nbmin = 8;
 	    }
-	    if (j < *n) {
-
-/*              Compute elements j+1:n of j-th column. */
-
-		i__1 = *n - j;
-		strmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j +
-			1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1);
-		i__1 = *n - j;
-		sscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1);
+	} else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nbmin = 2;
+	}
+    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nbmin = 2;
+	}
+    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nbmin = 2;
+	    }
+	} else if (*(unsigned char *)c3 == 'M') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nbmin = 2;
+	    }
+	}
+    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nbmin = 2;
+	    }
+	} else if (*(unsigned char *)c3 == 'M') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nbmin = 2;
 	    }
-/* L20: */
 	}
     }
+    ret_val = nbmin;
+    return ret_val;
 
-    return 0;
-
-/*     End of STRTI2 */
-
-} /* strti2_ */
+L300:
 
-/* Subroutine */ int strtri_(char *uplo, char *diag, integer *n, real *a,
-	integer *lda, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5;
-    char ch__1[2];
+/*     ISPEC = 3:  crossover point */
 
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+    nx = 0;
+    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
+		ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, (
+		ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0)
+		 {
+	    if (sname) {
+		nx = 128;
+	    } else {
+		nx = 128;
+	    }
+	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nx = 128;
+	    } else {
+		nx = 128;
+	    }
+	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nx = 128;
+	    } else {
+		nx = 128;
+	    }
+	}
+    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
+	if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nx = 32;
+	}
+    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nx = 32;
+	}
+    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nx = 128;
+	    }
+	}
+    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nx = 128;
+	    }
+	}
+    }
+    ret_val = nx;
+    return ret_val;
 
-    /* Local variables */
-    static integer j, jb, nb, nn;
-    extern logical lsame_(char *, char *);
-    static logical upper;
-    extern /* Subroutine */ int strmm_(char *, char *, char *, char *,
-	    integer *, integer *, real *, real *, integer *, real *, integer *
-	    ), strsm_(char *, char *, char *,
-	    char *, integer *, integer *, real *, real *, integer *, real *,
-	    integer *), strti2_(char *, char *
-	    , integer *, real *, integer *, integer *),
-	    xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static logical nounit;
+L400:
 
+/*     ISPEC = 4:  number of shifts (used by xHSEQR) */
 
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    ret_val = 6;
+    return ret_val;
 
+L500:
 
-    Purpose
-    =======
+/*     ISPEC = 5:  minimum column dimension (not used) */
 
-    STRTRI computes the inverse of a real upper or lower triangular
-    matrix A.
+    ret_val = 2;
+    return ret_val;
 
-    This is the Level 3 BLAS version of the algorithm.
+L600:
 
-    Arguments
-    =========
+/*     ISPEC = 6:  crossover point for SVD (used by xGELSS and xGESVD) */
 
-    UPLO    (input) CHARACTER*1
-            = 'U':  A is upper triangular;
-            = 'L':  A is lower triangular.
+    ret_val = (integer) ((real) min(*n1,*n2) * 1.6f);
+    return ret_val;
 
-    DIAG    (input) CHARACTER*1
-            = 'N':  A is non-unit triangular;
-            = 'U':  A is unit triangular.
+L700:
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+/*     ISPEC = 7:  number of processors (not used) */
 
-    A       (input/output) REAL array, dimension (LDA,N)
-            On entry, the triangular matrix A.  If UPLO = 'U', the
-            leading N-by-N upper triangular part of the array A contains
-            the upper triangular matrix, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading N-by-N lower triangular part of the array A contains
-            the lower triangular matrix, and the strictly upper
-            triangular part of A is not referenced.  If DIAG = 'U', the
-            diagonal elements of A are also not referenced and are
-            assumed to be 1.
-            On exit, the (triangular) inverse of the original matrix, in
-            the same storage format.
+    ret_val = 1;
+    return ret_val;
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+L800:
 
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
-                 matrix is singular and its inverse can not be computed.
+/*     ISPEC = 8:  crossover point for multishift (used by xHSEQR) */
 
-    =====================================================================
+    ret_val = 50;
+    return ret_val;
 
+L900:
 
-       Test the input parameters.
+/*
+       ISPEC = 9:  maximum size of the subproblems at the bottom of the
+                   computation tree in the divide-and-conquer algorithm
+                   (used by xGELSD and xGESDD)
 */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
+    ret_val = 25;
+    return ret_val;
 
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    nounit = lsame_(diag, "N");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (! nounit && ! lsame_(diag, "U")) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("STRTRI", &i__1);
-	return 0;
-    }
+L1000:
 
-/*     Quick return if possible */
+/*
+       ISPEC = 10: ieee NaN arithmetic can be trusted not to trap
 
-    if (*n == 0) {
-	return 0;
+       ILAENV = 0
+*/
+    ret_val = 1;
+    if (ret_val == 1) {
+	ret_val = ieeeck_(&c__0, &c_b3825, &c_b3826);
     }
+    return ret_val;
 
-/*     Check for singularity if non-unit. */
-
-    if (nounit) {
-	i__1 = *n;
-	for (*info = 1; *info <= i__1; ++(*info)) {
-	    if (a[*info + *info * a_dim1] == 0.f) {
-		return 0;
-	    }
-/* L10: */
-	}
-	*info = 0;
-    }
+L1100:
 
 /*
-       Determine the block size for this environment.
+       ISPEC = 11: infinity arithmetic can be trusted not to trap
 
-   Writing concatenation
+       ILAENV = 0
 */
-    i__2[0] = 1, a__1[0] = uplo;
-    i__2[1] = 1, a__1[1] = diag;
-    s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
-    nb = ilaenv_(&c__1, "STRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-    if ((nb <= 1) || (nb >= *n)) {
-
-/*        Use unblocked code */
-
-	strti2_(uplo, diag, n, &a[a_offset], lda, info);
-    } else {
-
-/*        Use blocked code */
-
-	if (upper) {
-
-/*           Compute inverse of upper triangular matrix */
-
-	    i__1 = *n;
-	    i__3 = nb;
-	    for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) {
-/* Computing MIN */
-		i__4 = nb, i__5 = *n - j + 1;
-		jb = min(i__4,i__5);
-
-/*              Compute rows 1:j-1 of current block column */
-
-		i__4 = j - 1;
-		strmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
-			c_b1011, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
-		i__4 = j - 1;
-		strsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
-			c_b1290, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1],
-			lda);
-
-/*              Compute inverse of current diagonal block */
-
-		strti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
-/* L20: */
-	    }
-	} else {
-
-/*           Compute inverse of lower triangular matrix */
-
-	    nn = (*n - 1) / nb * nb + 1;
-	    i__3 = -nb;
-	    for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) {
-/* Computing MIN */
-		i__1 = nb, i__4 = *n - j + 1;
-		jb = min(i__1,i__4);
-		if (j + jb <= *n) {
-
-/*                 Compute rows j+jb:n of current block column */
-
-		    i__1 = *n - j - jb + 1;
-		    strmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
-			    &c_b1011, &a[j + jb + (j + jb) * a_dim1], lda, &a[
-			    j + jb + j * a_dim1], lda);
-		    i__1 = *n - j - jb + 1;
-		    strsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
-			     &c_b1290, &a[j + j * a_dim1], lda, &a[j + jb + j
-			    * a_dim1], lda);
-		}
-
-/*              Compute inverse of current diagonal block */
-
-		strti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
-/* L30: */
-	    }
-	}
+    ret_val = 1;
+    if (ret_val == 1) {
+	ret_val = ieeeck_(&c__1, &c_b3825, &c_b3826);
     }
+    return ret_val;
 
-    return 0;
+/*     End of ILAENV */
 
-/*     End of STRTRI */
+} /* ilaenv_ */
 
-} /* strtri_ */
diff --git a/numpy/linalg/lapack_lite/zlapack_lite.c b/numpy/linalg/lapack_lite/zlapack_lite.c
index 29b017c89..0df4bdcda 100644
--- a/numpy/linalg/lapack_lite/zlapack_lite.c
+++ b/numpy/linalg/lapack_lite/zlapack_lite.c
@@ -233,9 +233,9 @@ L20:
 	*info = -2;
     } else if (*n < 0) {
 	*info = -3;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
 	*info = -4;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
 	*info = -5;
     } else if (*m < 0) {
 	*info = -7;
@@ -266,7 +266,7 @@ L20:
 
 /*     Backward balance */
 
-    if ((lsame_(job, "S")) || (lsame_(job, "B"))) {
+    if (lsame_(job, "S") || lsame_(job, "B")) {
 
 	if (rightv) {
 	    i__1 = *ihi;
@@ -296,7 +296,7 @@ L20:
 */
 
 L30:
-    if ((lsame_(job, "P")) || (lsame_(job, "B"))) {
+    if (lsame_(job, "P") || lsame_(job, "B")) {
 	if (rightv) {
 	    i__1 = *n;
 	    for (ii = 1; ii <= i__1; ++ii) {
@@ -550,7 +550,7 @@ L50:
 		goto L60;
 	    }
 	    i__2 = j + i__ * a_dim1;
-	    if ((a[i__2].r != 0.) || (d_imag(&a[j + i__ * a_dim1]) != 0.)) {
+	    if (a[i__2].r != 0. || d_imag(&a[j + i__ * a_dim1]) != 0.) {
 		goto L70;
 	    }
 L60:
@@ -581,7 +581,7 @@ L90:
 		goto L100;
 	    }
 	    i__3 = i__ + j * a_dim1;
-	    if ((a[i__3].r != 0.) || (d_imag(&a[i__ + j * a_dim1]) != 0.)) {
+	    if (a[i__3].r != 0. || d_imag(&a[i__ + j * a_dim1]) != 0.) {
 		goto L110;
 	    }
 L100:
@@ -646,7 +646,7 @@ L150:
 
 /*        Guard against zero C or R due to underflow. */
 
-	if ((c__ == 0.) || (r__ == 0.)) {
+	if (c__ == 0. || r__ == 0.) {
 	    goto L200;
 	}
 	g = r__ / 8.;
@@ -657,8 +657,7 @@ L160:
 	d__1 = max(f,c__);
 /* Computing MIN */
 	d__2 = min(r__,g);
-	if (((c__ >= g) || (max(d__1,ca) >= sfmax2)) || (min(d__2,ra) <=
-		sfmin2)) {
+	if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) {
 	    goto L170;
 	}
 	f *= 8.;
@@ -674,8 +673,7 @@ L170:
 L180:
 /* Computing MIN */
 	d__1 = min(f,c__), d__1 = min(d__1,g);
-	if (((g < r__) || (max(r__,ra) >= sfmax2)) || (min(d__1,ca) <= sfmin2)
-		) {
+	if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) {
 	    goto L190;
 	}
 	f /= 8.;
@@ -1569,9 +1567,9 @@ L210:
 	*info = -3;
     } else if (*lda < max(1,*n)) {
 	*info = -5;
-    } else if ((*ldvl < 1) || (wantvl && *ldvl < *n)) {
+    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
 	*info = -8;
-    } else if ((*ldvr < 1) || (wantvr && *ldvr < *n)) {
+    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
 	*info = -10;
     }
 
@@ -1589,12 +1587,12 @@ L210:
 */
 
     minwrk = 1;
-    if (*info == 0 && ((*lwork >= 1) || (lquery))) {
+    if (*info == 0 && (*lwork >= 1 || lquery)) {
 	maxwrk = *n + *n * ilaenv_(&c__1, "ZGEHRD", " ", n, &c__1, n, &c__0, (
 		ftnlen)6, (ftnlen)1);
 	if (! wantvl && ! wantvr) {
 /* Computing MAX */
-	    i__1 = 1, i__2 = (*n) << (1);
+	    i__1 = 1, i__2 = *n << 1;
 	    minwrk = max(i__1,i__2);
 /* Computing MAX */
 	    i__1 = ilaenv_(&c__8, "ZHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
@@ -1609,12 +1607,12 @@ L210:
 	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
 	    k = min(i__1,i__2);
 /* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = (*n) << (1);
+	    i__1 = k * (k + 2), i__2 = *n << 1;
 	    hswork = max(i__1,i__2);
 	    maxwrk = max(maxwrk,hswork);
 	} else {
 /* Computing MAX */
-	    i__1 = 1, i__2 = (*n) << (1);
+	    i__1 = 1, i__2 = *n << 1;
 	    minwrk = max(i__1,i__2);
 /* Computing MAX */
 	    i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "ZUNGHR",
@@ -1633,10 +1631,10 @@ L210:
 	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
 	    k = min(i__1,i__2);
 /* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = (*n) << (1);
+	    i__1 = k * (k + 2), i__2 = *n << 1;
 	    hswork = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = max(maxwrk,hswork), i__2 = (*n) << (1);
+	    i__1 = max(maxwrk,hswork), i__2 = *n << 1;
 	    maxwrk = max(i__1,i__2);
 	}
 	work[1].r = (doublereal) maxwrk, work[1].i = 0.;
@@ -1799,7 +1797,7 @@ L210:
 	goto L50;
     }
 
-    if ((wantvl) || (wantvr)) {
+    if (wantvl || wantvr) {
 
 /*
           Compute left and/or right eigenvectors
@@ -2039,9 +2037,9 @@ L50:
     *info = 0;
     if (*n < 0) {
 	*info = -1;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
 	*info = -2;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
 	*info = -3;
     } else if (*lda < max(1,*n)) {
 	*info = -5;
@@ -2242,9 +2240,9 @@ L50:
     lquery = *lwork == -1;
     if (*n < 0) {
 	*info = -1;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
 	*info = -2;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
 	*info = -3;
     } else if (*lda < max(1,*n)) {
 	*info = -5;
@@ -2322,7 +2320,7 @@ L50:
     }
     ldwork = *n;
 
-    if ((nb < nbmin) || (nb >= nh)) {
+    if (nb < nbmin || nb >= nh) {
 
 /*        Use unblocked code below */
 
@@ -3034,24 +3032,23 @@ L50:
 
    Computing MAX
 */
-	    i__1 = maxwrk, i__2 = ((*n) << (1)) + (mm + *n) * ilaenv_(&c__1,
+	    i__1 = maxwrk, i__2 = (*n << 1) + (mm + *n) * ilaenv_(&c__1,
 		    "ZGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1)
 		    ;
 	    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = ((*n) << (1)) + *nrhs * ilaenv_(&c__1,
-		    "ZUNMBR", "QLC", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)
-		    3);
+	    i__1 = maxwrk, i__2 = (*n << 1) + *nrhs * ilaenv_(&c__1, "ZUNMBR",
+		     "QLC", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
 	    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = ((*n) << (1)) + (*n - 1) * ilaenv_(&c__1,
-		    "ZUNMBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+	    i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "ZUN"
+		    "MBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
 	    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * *nrhs;
+	    i__1 = maxwrk, i__2 = (*n << 1) + *n * *nrhs;
 	    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = ((*n) << (1)) + mm, i__2 = ((*n) << (1)) + *n * *nrhs;
+	    i__1 = (*n << 1) + mm, i__2 = (*n << 1) + *n * *nrhs;
 	    minwrk = max(i__1,i__2);
 	}
 	if (*n > *m) {
@@ -3065,17 +3062,17 @@ L50:
 		maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, &c_n1,
 			&c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + ((*m) << (1))
-			* ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) *
+			ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (
 			ftnlen)6, (ftnlen)1);
 		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + *nrhs *
-			ilaenv_(&c__1, "ZUNMBR", "QLC", m, nrhs, m, &c_n1, (
-			ftnlen)6, (ftnlen)3);
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(&
+			c__1, "ZUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, (
+			ftnlen)3);
 		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + (*m - 1) *
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) *
 			ilaenv_(&c__1, "ZUNMLQ", "LC", n, nrhs, m, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 		maxwrk = max(i__1,i__2);
@@ -3085,34 +3082,33 @@ L50:
 		    maxwrk = max(i__1,i__2);
 		} else {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = *m * *m + ((*m) << (1));
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
 		    maxwrk = max(i__1,i__2);
 		}
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + *m * *nrhs;
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *m * *nrhs;
 		maxwrk = max(i__1,i__2);
 	    } else {
 
 /*              Path 2 - underdetermined. */
 
-		maxwrk = ((*m) << (1)) + (*n + *m) * ilaenv_(&c__1, "ZGEBRD",
+		maxwrk = (*m << 1) + (*n + *m) * ilaenv_(&c__1, "ZGEBRD",
 			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = ((*m) << (1)) + *nrhs * ilaenv_(&c__1,
+		i__1 = maxwrk, i__2 = (*m << 1) + *nrhs * ilaenv_(&c__1,
 			"ZUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, (
 			ftnlen)3);
 		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
-			"ZUNMBR", "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (
-			ftnlen)3);
+		i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, "ZUNMBR"
+			, "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3);
 		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * *nrhs;
+		i__1 = maxwrk, i__2 = (*m << 1) + *m * *nrhs;
 		maxwrk = max(i__1,i__2);
 	    }
 /* Computing MAX */
-	    i__1 = ((*m) << (1)) + *n, i__2 = ((*m) << (1)) + *m * *nrhs;
+	    i__1 = (*m << 1) + *n, i__2 = (*m << 1) + *m * *nrhs;
 	    minwrk = max(i__1,i__2);
 	}
 	minwrk = min(minwrk,maxwrk);
@@ -3134,7 +3130,7 @@ L50:
 
 /*     Quick return if possible. */
 
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	*rank = 0;
 	return 0;
     }
@@ -3290,10 +3286,9 @@ L50:
 
     } else /* if(complicated condition) */ {
 /* Computing MAX */
-	i__1 = *m, i__2 = ((*m) << (1)) - 4, i__1 = max(i__1,i__2), i__1 =
-		max(i__1,*nrhs), i__2 = *n - *m * 3;
-	if (*n >= mnthr && *lwork >= ((*m) << (2)) + *m * *m + max(i__1,i__2))
-		 {
+	i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max(
+		i__1,*nrhs), i__2 = *n - *m * 3;
+	if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) {
 
 /*
           Path 2a - underdetermined, with many more columns than rows
@@ -3305,10 +3300,10 @@ L50:
    Computing MAX
    Computing MAX
 */
-	    i__3 = *m, i__4 = ((*m) << (1)) - 4, i__3 = max(i__3,i__4), i__3 =
-		     max(i__3,*nrhs), i__4 = *n - *m * 3;
-	    i__1 = ((*m) << (2)) + *m * *lda + max(i__3,i__4), i__2 = *m * *
-		    lda + *m + *m * *nrhs;
+	    i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 =
+		    max(i__3,*nrhs), i__4 = *n - *m * 3;
+	    i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda +
+		    *m + *m * *nrhs;
 	    if (*lwork >= max(i__1,i__2)) {
 		ldwork = *lda;
 	    }
@@ -4084,14 +4079,14 @@ L10:
     mnthr2 = (integer) (minmn * 5. / 3.);
     wntqa = lsame_(jobz, "A");
     wntqs = lsame_(jobz, "S");
-    wntqas = (wntqa) || (wntqs);
+    wntqas = wntqa || wntqs;
     wntqo = lsame_(jobz, "O");
     wntqn = lsame_(jobz, "N");
     minwrk = 1;
     maxwrk = 1;
     lquery = *lwork == -1;
 
-    if (! ((((wntqa) || (wntqs)) || (wntqo)) || (wntqn))) {
+    if (! (wntqa || wntqs || wntqo || wntqn)) {
 	*info = -1;
     } else if (*m < 0) {
 	*info = -2;
@@ -4099,11 +4094,11 @@ L10:
 	*info = -3;
     } else if (*lda < max(1,*m)) {
 	*info = -5;
-    } else if (((*ldu < 1) || (wntqas && *ldu < *m)) || (wntqo && *m < *n && *
-	    ldu < *m)) {
+    } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < *
+	    m) {
 	*info = -8;
-    } else if ((((*ldvt < 1) || (wntqa && *ldvt < *n)) || (wntqs && *ldvt <
-	    minmn)) || (wntqo && *m >= *n && *ldvt < *n)) {
+    } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn ||
+	    wntqo && *m >= *n && *ldvt < *n) {
 	*info = -10;
     }
 
@@ -4134,9 +4129,9 @@ L10:
 		    wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
-			    ilaenv_(&c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(&
+			    c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 		    maxwrk = wrkbl;
 		    minwrk = *n * 3;
@@ -4151,22 +4146,22 @@ L10:
 			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
-			    ilaenv_(&c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(&
+			    c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 		    maxwrk = *m * *n + *n * *n + wrkbl;
-		    minwrk = ((*n) << (1)) * *n + *n * 3;
+		    minwrk = (*n << 1) * *n + *n * 3;
 		} else if (wntqs) {
 
 /*                 Path 3 (M much larger than N, JOBZ='S') */
@@ -4178,17 +4173,17 @@ L10:
 			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
-			    ilaenv_(&c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(&
+			    c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    wrkbl = max(i__1,i__2);
@@ -4205,38 +4200,38 @@ L10:
 			    " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
-			    ilaenv_(&c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(&
+			    c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 		    maxwrk = *n * *n + wrkbl;
-		    minwrk = *n * *n + ((*n) << (1)) + *m;
+		    minwrk = *n * *n + (*n << 1) + *m;
 		}
 	    } else if (*m >= mnthr2) {
 
 /*              Path 5 (M much larger than N, but not as much as MNTHR1) */
 
-		maxwrk = ((*n) << (1)) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD",
+		maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD",
 			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-		minwrk = ((*n) << (1)) + *m;
+		minwrk = (*n << 1) + *m;
 		if (wntqo) {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
 			    1);
 		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)
 			    1);
 		    maxwrk = max(i__1,i__2);
@@ -4244,23 +4239,23 @@ L10:
 		    minwrk += *n * *n;
 		} else if (wntqs) {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
 			    1);
 		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)
 			    1);
 		    maxwrk = max(i__1,i__2);
 		} else if (wntqa) {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
 			    1);
 		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1,
 			    "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
 			    1);
 		    maxwrk = max(i__1,i__2);
@@ -4269,17 +4264,17 @@ L10:
 
 /*              Path 6 (M at least N, but not much larger) */
 
-		maxwrk = ((*n) << (1)) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD",
+		maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD",
 			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-		minwrk = ((*n) << (1)) + *m;
+		minwrk = (*n << 1) + *m;
 		if (wntqo) {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    maxwrk = max(i__1,i__2);
@@ -4287,23 +4282,23 @@ L10:
 		    minwrk += *n * *n;
 		} else if (wntqs) {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    maxwrk = max(i__1,i__2);
 		} else if (wntqa) {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
 			    "ZUNGBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1,
 			    "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    maxwrk = max(i__1,i__2);
@@ -4325,9 +4320,9 @@ L10:
 		    maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + ((*m) << (1)) *
-			    ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
+		    i__1 = maxwrk, i__2 = (*m << 1) + (*m << 1) * ilaenv_(&
+			    c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
 		    maxwrk = max(i__1,i__2);
 		    minwrk = *m * 3;
 		} else if (wntqo) {
@@ -4341,22 +4336,22 @@ L10:
 			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + ((*m) << (1)) *
-			    ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(&
+			    c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 		    maxwrk = *m * *n + *m * *m + wrkbl;
-		    minwrk = ((*m) << (1)) * *m + *m * 3;
+		    minwrk = (*m << 1) * *m + *m * 3;
 		} else if (wntqs) {
 
 /*                 Path 3t (N much larger than M, JOBZ='S') */
@@ -4368,17 +4363,17 @@ L10:
 			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + ((*m) << (1)) *
-			    ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(&
+			    c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    wrkbl = max(i__1,i__2);
@@ -4395,38 +4390,38 @@ L10:
 			    " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + ((*m) << (1)) *
-			    ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1,
-			    (ftnlen)6, (ftnlen)1);
+		    i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(&
+			    c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    wrkbl = max(i__1,i__2);
 		    maxwrk = *m * *m + wrkbl;
-		    minwrk = *m * *m + ((*m) << (1)) + *n;
+		    minwrk = *m * *m + (*m << 1) + *n;
 		}
 	    } else if (*n >= mnthr2) {
 
 /*              Path 5t (N much larger than M, but not as much as MNTHR1) */
 
-		maxwrk = ((*m) << (1)) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD",
+		maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD",
 			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-		minwrk = ((*m) << (1)) + *n;
+		minwrk = (*m << 1) + *n;
 		if (wntqo) {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)
 			    1);
 		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
 			    1);
 		    maxwrk = max(i__1,i__2);
@@ -4434,23 +4429,23 @@ L10:
 		    minwrk += *m * *m;
 		} else if (wntqs) {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)
 			    1);
 		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
 			    1);
 		    maxwrk = max(i__1,i__2);
 		} else if (wntqa) {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1,
 			    "ZUNGBR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen)
 			    1);
 		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
 			    1);
 		    maxwrk = max(i__1,i__2);
@@ -4459,17 +4454,17 @@ L10:
 
 /*              Path 6t (N greater than M, but not much larger) */
 
-		maxwrk = ((*m) << (1)) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD",
+		maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD",
 			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-		minwrk = ((*m) << (1)) + *n;
+		minwrk = (*m << 1) + *n;
 		if (wntqo) {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNMBR", "PRC", m, n, m, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNMBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    maxwrk = max(i__1,i__2);
@@ -4477,23 +4472,23 @@ L10:
 		    minwrk += *m * *m;
 		} else if (wntqs) {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNGBR", "PRC", m, n, m, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    maxwrk = max(i__1,i__2);
 		} else if (wntqa) {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *n * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1,
 			    "ZUNGBR", "PRC", n, n, m, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
 			    "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
 			    ftnlen)3);
 		    maxwrk = max(i__1,i__2);
@@ -4517,7 +4512,7 @@ L10:
 
 /*     Quick return if possible */
 
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	if (*lwork >= 1) {
 	    work[1].r = 1., work[1].i = 0.;
 	}
@@ -5462,8 +5457,8 @@ L10:
 
 		i__2 = *m - 1;
 		i__1 = *m - 1;
-		zlaset_("U", &i__2, &i__1, &c_b59, &c_b59, &a[((a_dim1) << (1)
-			) + 1], lda);
+		zlaset_("U", &i__2, &i__1, &c_b59, &c_b59, &a[(a_dim1 << 1) +
+			1], lda);
 		ie = 1;
 		itauq = 1;
 		itaup = itauq + *m;
@@ -5773,8 +5768,8 @@ L10:
 
 		i__1 = *m - 1;
 		i__2 = *m - 1;
-		zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &a[((a_dim1) << (1)
-			) + 1], lda);
+		zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &a[(a_dim1 << 1) +
+			1], lda);
 		ie = 1;
 		itauq = itau;
 		itaup = itauq + *m;
@@ -6562,7 +6557,7 @@ L10:
 
 /*     Quick return if possible */
 
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -6575,7 +6570,7 @@ L10:
 	jp = j - 1 + izamax_(&i__2, &a[j + j * a_dim1], &c__1);
 	ipiv[j] = jp;
 	i__2 = jp + j * a_dim1;
-	if ((a[i__2].r != 0.) || (a[i__2].i != 0.)) {
+	if (a[i__2].r != 0. || a[i__2].i != 0.) {
 
 /*           Apply the interchange to columns 1:N. */
 
@@ -6717,7 +6712,7 @@ L10:
 
 /*     Quick return if possible */
 
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -6725,7 +6720,7 @@ L10:
 
     nb = ilaenv_(&c__1, "ZGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
 	    1);
-    if ((nb <= 1) || (nb >= min(*m,*n))) {
+    if (nb <= 1 || nb >= min(*m,*n)) {
 
 /*        Use unblocked code. */
 
@@ -6914,7 +6909,7 @@ L10:
 
 /*     Quick return if possible */
 
-    if ((*n == 0) || (*nrhs == 0)) {
+    if (*n == 0 || *nrhs == 0) {
 	return 0;
     }
 
@@ -7150,7 +7145,7 @@ L10:
     /* Function Body */
     wantz = lsame_(jobz, "V");
     lower = lsame_(uplo, "L");
-    lquery = ((*lwork == -1) || (*lrwork == -1)) || (*liwork == -1);
+    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;
 
     *info = 0;
     if (*n <= 1) {
@@ -7162,10 +7157,10 @@ L10:
 	liopt = liwmin;
     } else {
 	if (wantz) {
-	    lwmin = ((*n) << (1)) + *n * *n;
+	    lwmin = (*n << 1) + *n * *n;
 /* Computing 2nd power */
 	    i__1 = *n;
-	    lrwmin = *n * 5 + 1 + ((i__1 * i__1) << (1));
+	    lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
 	    liwmin = *n * 5 + 3;
 	} else {
 	    lwmin = *n + 1;
@@ -7176,9 +7171,9 @@ L10:
 	lropt = lrwmin;
 	liopt = liwmin;
     }
-    if (! ((wantz) || (lsame_(jobz, "N")))) {
+    if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
-    } else if (! ((lower) || (lsame_(uplo, "U")))) {
+    } else if (! (lower || lsame_(uplo, "U"))) {
 	*info = -2;
     } else if (*n < 0) {
 	*info = -3;
@@ -7505,7 +7500,7 @@ L10:
 	    i__1 = i__;
 	    e[i__1] = alpha.r;
 
-	    if ((taui.r != 0.) || (taui.i != 0.)) {
+	    if (taui.r != 0. || taui.i != 0.) {
 
 /*              Apply H(i) from both sides to A(1:i,1:i) */
 
@@ -7584,7 +7579,7 @@ L10:
 	    i__2 = i__;
 	    e[i__2] = alpha.r;
 
-	    if ((taui.r != 0.) || (taui.i != 0.)) {
+	    if (taui.r != 0. || taui.i != 0.) {
 
 /*              Apply H(i) from both sides to A(i+1:n,i+1:n) */
 
@@ -8188,7 +8183,7 @@ L10:
     /* Function Body */
     wantt = lsame_(job, "S");
     initz = lsame_(compz, "I");
-    wantz = (initz) || (lsame_(compz, "V"));
+    wantz = initz || lsame_(compz, "V");
 
     *info = 0;
     i__1 = max(1,*n);
@@ -8200,13 +8195,13 @@ L10:
 	*info = -2;
     } else if (*n < 0) {
 	*info = -3;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
 	*info = -4;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
 	*info = -5;
     } else if (*ldh < max(1,*n)) {
 	*info = -7;
-    } else if ((*ldz < 1) || (wantz && *ldz < max(1,*n))) {
+    } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) {
 	*info = -10;
     } else if (*lwork < max(1,*n) && ! lquery) {
 	*info = -12;
@@ -8336,7 +8331,7 @@ L10:
     s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
     maxb = ilaenv_(&c__8, "ZHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
 	    ftnlen)2);
-    if (((ns <= 1) || (ns > nh)) || (maxb >= nh)) {
+    if (ns <= 1 || ns > nh || maxb >= nh) {
 
 /*        Use the standard double-shift algorithm */
 
@@ -8439,7 +8434,7 @@ L80:
 	    i2 = i__;
 	}
 
-	if ((its == 20) || (its == 30)) {
+	if (its == 20 || its == 30) {
 
 /*           Exceptional shifts. */
 
@@ -8828,7 +8823,7 @@ L180:
     y -= y_offset;
 
     /* Function Body */
-    if ((*m <= 0) || (*n <= 0)) {
+    if (*m <= 0 || *n <= 0) {
 	return 0;
     }
 
@@ -9516,7 +9511,7 @@ L180:
     --rwork;
 
     /* Function Body */
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -9806,7 +9801,7 @@ L10:
     if (iwork[subpbs] > smlsiz) {
 	for (j = subpbs; j >= 1; --j) {
 	    iwork[j * 2] = (iwork[j] + 1) / 2;
-	    iwork[((j) << (1)) - 1] = iwork[j] / 2;
+	    iwork[(j << 1) - 1] = iwork[j] / 2;
 /* L20: */
 	}
 	++tlvls;
@@ -9834,7 +9829,7 @@ L10:
 /* L40: */
     }
 
-    indxq = ((*n) << (2)) + 3;
+    indxq = (*n << 2) + 3;
 
 /*
        Set up workspaces for eigenvalues only/accumulate new vectors
@@ -9856,7 +9851,7 @@ L10:
     igivcl = igivpt + *n * lgn;
 
     igivnm = 1;
-    iq = igivnm + ((*n) << (1)) * lgn;
+    iq = igivnm + (*n << 1) * lgn;
 /* Computing 2nd power */
     i__1 = *n;
     iwrem = iq + i__1 * i__1 + 1;
@@ -10044,7 +10039,7 @@ L80:
 
           The first stage consists of deflating the size of the problem
           when there are multiple eigenvalues or if there is a zero in
-          the Z vector.  For each such occurrence the dimension of the
+          the Z vector.  For each such occurence the dimension of the
           secular equation problem is reduced by one.  This stage is
           performed by the routine DLAED2.
 
@@ -10184,7 +10179,7 @@ L80:
 */
     if (*n < 0) {
 	*info = -1;
-    } else if ((min(1,*n) > *cutpnt) || (*n < *cutpnt)) {
+    } else if (min(1,*n) > *cutpnt || *n < *cutpnt) {
 	*info = -2;
     } else if (*qsiz < *n) {
 	*info = -3;
@@ -10253,8 +10248,7 @@ L80:
     zlaed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz],
 	    &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[
 	    indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[
-	    ((givptr[curr]) << (1)) + 1], &givnum[((givptr[curr]) << (1)) + 1]
-	    , info);
+	    (givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], info);
     prmptr[curr + 1] = prmptr[curr] + *n;
     givptr[curr + 1] += givptr[curr];
 
@@ -10478,7 +10472,7 @@ L80:
 	*info = -3;
     } else if (*ldq < max(1,*n)) {
 	*info = -5;
-    } else if ((*cutpnt < min(1,*n)) || (*cutpnt > *n)) {
+    } else if (*cutpnt < min(1,*n) || *cutpnt > *n) {
 	*info = -8;
     } else if (*ldq2 < max(1,*n)) {
 	*info = -12;
@@ -10628,10 +10622,10 @@ L70:
 /*           Record the appropriate Givens rotation */
 
 	    ++(*givptr);
-	    givcol[((*givptr) << (1)) + 1] = indxq[indx[jlam]];
-	    givcol[((*givptr) << (1)) + 2] = indxq[indx[j]];
-	    givnum[((*givptr) << (1)) + 1] = c__;
-	    givnum[((*givptr) << (1)) + 2] = s;
+	    givcol[(*givptr << 1) + 1] = indxq[indx[jlam]];
+	    givcol[(*givptr << 1) + 2] = indxq[indx[j]];
+	    givnum[(*givptr << 1) + 1] = c__;
+	    givnum[(*givptr << 1) + 2] = s;
 	    zdrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[
 		    indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
 	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
@@ -10962,7 +10956,7 @@ L30:
 	    i2 = i__;
 	}
 
-	if ((its == 10) || (its == 20)) {
+	if (its == 10 || its == 20) {
 
 /*           Exceptional shift. */
 
@@ -10982,7 +10976,7 @@ L30:
 	    d__1 = h__[i__3].r;
 	    z__1.r = d__1 * h__[i__2].r, z__1.i = d__1 * h__[i__2].i;
 	    u.r = z__1.r, u.i = z__1.i;
-	    if ((u.r != 0.) || (u.i != 0.)) {
+	    if (u.r != 0. || u.i != 0.) {
 		i__2 = i__ - 1 + (i__ - 1) * h_dim1;
 		z__2.r = h__[i__2].r - t.r, z__2.i = h__[i__2].i - t.i;
 		z__1.r = z__2.r * .5, z__1.i = z__2.i * .5;
@@ -11784,13 +11778,13 @@ L130:
     /* Function Body */
     *info = 0;
 
-    if ((*icompq < 0) || (*icompq > 1)) {
+    if (*icompq < 0 || *icompq > 1) {
 	*info = -1;
     } else if (*nl < 1) {
 	*info = -2;
     } else if (*nr < 1) {
 	*info = -3;
-    } else if ((*sqre < 0) || (*sqre > 1)) {
+    } else if (*sqre < 0 || *sqre > 1) {
 	*info = -4;
     }
 
@@ -11830,10 +11824,9 @@ L130:
 
 	i__1 = *givptr;
 	for (i__ = 1; i__ <= i__1; ++i__) {
-	    zdrot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
-		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
-		    i__ + ((givnum_dim1) << (1))], &givnum[i__ + givnum_dim1])
-		    ;
+	    zdrot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
+		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ +
+		    (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]);
 /* L10: */
 	}
 
@@ -11862,41 +11855,40 @@ L130:
 	    for (j = 1; j <= i__1; ++j) {
 		diflj = difl[j];
 		dj = poles[j + poles_dim1];
-		dsigj = -poles[j + ((poles_dim1) << (1))];
+		dsigj = -poles[j + (poles_dim1 << 1)];
 		if (j < *k) {
 		    difrj = -difr[j + difr_dim1];
-		    dsigjp = -poles[j + 1 + ((poles_dim1) << (1))];
+		    dsigjp = -poles[j + 1 + (poles_dim1 << 1)];
 		}
-		if ((z__[j] == 0.) || (poles[j + ((poles_dim1) << (1))] == 0.)
-			) {
+		if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) {
 		    rwork[j] = 0.;
 		} else {
-		    rwork[j] = -poles[j + ((poles_dim1) << (1))] * z__[j] /
-			    diflj / (poles[j + ((poles_dim1) << (1))] + dj);
+		    rwork[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj
+			    / (poles[j + (poles_dim1 << 1)] + dj);
 		}
 		i__2 = j - 1;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    if ((z__[i__] == 0.) || (poles[i__ + ((poles_dim1) << (1))
-			    ] == 0.)) {
+		    if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] ==
+			    0.) {
 			rwork[i__] = 0.;
 		    } else {
-			rwork[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
-				i__] / (dlamc3_(&poles[i__ + ((poles_dim1) <<
-				(1))], &dsigj) - diflj) / (poles[i__ + ((
-				poles_dim1) << (1))] + dj);
+			rwork[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__]
+				 / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], &
+				dsigj) - diflj) / (poles[i__ + (poles_dim1 <<
+				1)] + dj);
 		    }
 /* L30: */
 		}
 		i__2 = *k;
 		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    if ((z__[i__] == 0.) || (poles[i__ + ((poles_dim1) << (1))
-			    ] == 0.)) {
+		    if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] ==
+			    0.) {
 			rwork[i__] = 0.;
 		    } else {
-			rwork[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
-				i__] / (dlamc3_(&poles[i__ + ((poles_dim1) <<
-				(1))], &dsigjp) + difrj) / (poles[i__ + ((
-				poles_dim1) << (1))] + dj);
+			rwork[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__]
+				 / (dlamc3_(&poles[i__ + (poles_dim1 << 1)], &
+				dsigjp) + difrj) / (poles[i__ + (poles_dim1 <<
+				 1)] + dj);
 		    }
 /* L40: */
 		}
@@ -11911,7 +11903,7 @@ L130:
       $                     B( J, 1 ), LDB )
 */
 
-		i__ = *k + ((*nrhs) << (1));
+		i__ = *k + (*nrhs << 1);
 		i__2 = *nrhs;
 		for (jcol = 1; jcol <= i__2; ++jcol) {
 		    i__3 = *k;
@@ -11923,10 +11915,9 @@ L130:
 		    }
 /* L60: */
 		}
-		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + ((*nrhs) << (1)
-			)], k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1], &
-			c__1);
-		i__ = *k + ((*nrhs) << (1));
+		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)],
+			k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1], &c__1);
+		i__ = *k + (*nrhs << 1);
 		i__2 = *nrhs;
 		for (jcol = 1; jcol <= i__2; ++jcol) {
 		    i__3 = *k;
@@ -11937,9 +11928,9 @@ L130:
 		    }
 /* L80: */
 		}
-		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + ((*nrhs) << (1)
-			)], k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1 + *
-			nrhs], &c__1);
+		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)],
+			k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1 + *nrhs],
+			&c__1);
 		i__2 = *nrhs;
 		for (jcol = 1; jcol <= i__2; ++jcol) {
 		    i__3 = j + jcol * b_dim1;
@@ -11976,23 +11967,22 @@ L130:
 	} else {
 	    i__1 = *k;
 	    for (j = 1; j <= i__1; ++j) {
-		dsigj = poles[j + ((poles_dim1) << (1))];
+		dsigj = poles[j + (poles_dim1 << 1)];
 		if (z__[j] == 0.) {
 		    rwork[j] = 0.;
 		} else {
 		    rwork[j] = -z__[j] / difl[j] / (dsigj + poles[j +
-			    poles_dim1]) / difr[j + ((difr_dim1) << (1))];
+			    poles_dim1]) / difr[j + (difr_dim1 << 1)];
 		}
 		i__2 = j - 1;
 		for (i__ = 1; i__ <= i__2; ++i__) {
 		    if (z__[j] == 0.) {
 			rwork[i__] = 0.;
 		    } else {
-			d__1 = -poles[i__ + 1 + ((poles_dim1) << (1))];
+			d__1 = -poles[i__ + 1 + (poles_dim1 << 1)];
 			rwork[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[
 				i__ + difr_dim1]) / (dsigj + poles[i__ +
-				poles_dim1]) / difr[i__ + ((difr_dim1) << (1))
-				];
+				poles_dim1]) / difr[i__ + (difr_dim1 << 1)];
 		    }
 /* L110: */
 		}
@@ -12001,10 +11991,10 @@ L130:
 		    if (z__[j] == 0.) {
 			rwork[i__] = 0.;
 		    } else {
-			d__1 = -poles[i__ + ((poles_dim1) << (1))];
+			d__1 = -poles[i__ + (poles_dim1 << 1)];
 			rwork[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[
 				i__]) / (dsigj + poles[i__ + poles_dim1]) /
-				difr[i__ + ((difr_dim1) << (1))];
+				difr[i__ + (difr_dim1 << 1)];
 		    }
 /* L120: */
 		}
@@ -12017,7 +12007,7 @@ L130:
       $                     BX( J, 1 ), LDBX )
 */
 
-		i__ = *k + ((*nrhs) << (1));
+		i__ = *k + (*nrhs << 1);
 		i__2 = *nrhs;
 		for (jcol = 1; jcol <= i__2; ++jcol) {
 		    i__3 = *k;
@@ -12029,10 +12019,9 @@ L130:
 		    }
 /* L140: */
 		}
-		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + ((*nrhs) << (1)
-			)], k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1], &
-			c__1);
-		i__ = *k + ((*nrhs) << (1));
+		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)],
+			k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1], &c__1);
+		i__ = *k + (*nrhs << 1);
 		i__2 = *nrhs;
 		for (jcol = 1; jcol <= i__2; ++jcol) {
 		    i__3 = *k;
@@ -12043,9 +12032,9 @@ L130:
 		    }
 /* L160: */
 		}
-		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + ((*nrhs) << (1)
-			)], k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1 + *
-			nrhs], &c__1);
+		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)],
+			k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1 + *nrhs],
+			&c__1);
 		i__2 = *nrhs;
 		for (jcol = 1; jcol <= i__2; ++jcol) {
 		    i__3 = j + jcol * bx_dim1;
@@ -12092,9 +12081,9 @@ L130:
 
 	for (i__ = *givptr; i__ >= 1; --i__) {
 	    d__1 = -givnum[i__ + givnum_dim1];
-	    zdrot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
-		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
-		    i__ + ((givnum_dim1) << (1))], &d__1);
+	    zdrot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
+		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ +
+		    (givnum_dim1 << 1)], &d__1);
 /* L200: */
 	}
     }
@@ -12333,7 +12322,7 @@ L130:
     /* Function Body */
     *info = 0;
 
-    if ((*icompq < 0) || (*icompq > 1)) {
+    if (*icompq < 0 || *icompq > 1) {
 	*info = -1;
     } else if (*smlsiz < 3) {
 	*info = -2;
@@ -12408,7 +12397,7 @@ L130:
        $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
 */
 
-	j = (nl * *nrhs) << (1);
+	j = nl * *nrhs << 1;
 	i__2 = *nrhs;
 	for (jcol = 1; jcol <= i__2; ++jcol) {
 	    i__3 = nlf + nl - 1;
@@ -12421,9 +12410,8 @@ L130:
 /* L20: */
 	}
 	dgemm_("T", "N", &nl, nrhs, &nl, &c_b1015, &u[nlf + u_dim1], ldu, &
-		rwork[((nl * *nrhs) << (1)) + 1], &nl, &c_b324, &rwork[1], &
-		nl);
-	j = (nl * *nrhs) << (1);
+		rwork[(nl * *nrhs << 1) + 1], &nl, &c_b324, &rwork[1], &nl);
+	j = nl * *nrhs << 1;
 	i__2 = *nrhs;
 	for (jcol = 1; jcol <= i__2; ++jcol) {
 	    i__3 = nlf + nl - 1;
@@ -12435,8 +12423,8 @@ L130:
 /* L40: */
 	}
 	dgemm_("T", "N", &nl, nrhs, &nl, &c_b1015, &u[nlf + u_dim1], ldu, &
-		rwork[((nl * *nrhs) << (1)) + 1], &nl, &c_b324, &rwork[nl * *
-		nrhs + 1], &nl);
+		rwork[(nl * *nrhs << 1) + 1], &nl, &c_b324, &rwork[nl * *nrhs
+		+ 1], &nl);
 	jreal = 0;
 	jimag = nl * *nrhs;
 	i__2 = *nrhs;
@@ -12463,7 +12451,7 @@ L130:
       $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
 */
 
-	j = (nr * *nrhs) << (1);
+	j = nr * *nrhs << 1;
 	i__2 = *nrhs;
 	for (jcol = 1; jcol <= i__2; ++jcol) {
 	    i__3 = nrf + nr - 1;
@@ -12476,9 +12464,8 @@ L130:
 /* L80: */
 	}
 	dgemm_("T", "N", &nr, nrhs, &nr, &c_b1015, &u[nrf + u_dim1], ldu, &
-		rwork[((nr * *nrhs) << (1)) + 1], &nr, &c_b324, &rwork[1], &
-		nr);
-	j = (nr * *nrhs) << (1);
+		rwork[(nr * *nrhs << 1) + 1], &nr, &c_b324, &rwork[1], &nr);
+	j = nr * *nrhs << 1;
 	i__2 = *nrhs;
 	for (jcol = 1; jcol <= i__2; ++jcol) {
 	    i__3 = nrf + nr - 1;
@@ -12490,8 +12477,8 @@ L130:
 /* L100: */
 	}
 	dgemm_("T", "N", &nr, nrhs, &nr, &c_b1015, &u[nrf + u_dim1], ldu, &
-		rwork[((nr * *nrhs) << (1)) + 1], &nr, &c_b324, &rwork[nr * *
-		nrhs + 1], &nr);
+		rwork[(nr * *nrhs << 1) + 1], &nr, &c_b324, &rwork[nr * *nrhs
+		+ 1], &nr);
 	jreal = 0;
 	jimag = nr * *nrhs;
 	i__2 = *nrhs;
@@ -12534,7 +12521,7 @@ L130:
     sqre = 0;
 
     for (lvl = nlvl; lvl >= 1; --lvl) {
-	lvl2 = ((lvl) << (1)) - 1;
+	lvl2 = (lvl << 1) - 1;
 
 /*
           find the first node LF and last node LL on
@@ -12547,7 +12534,7 @@ L130:
 	} else {
 	    i__1 = lvl - 1;
 	    lf = pow_ii(&c__2, &i__1);
-	    ll = ((lf) << (1)) - 1;
+	    ll = (lf << 1) - 1;
 	}
 	i__1 = ll;
 	for (i__ = lf; i__ <= i__1; ++i__) {
@@ -12583,7 +12570,7 @@ L170:
     j = 0;
     i__1 = nlvl;
     for (lvl = 1; lvl <= i__1; ++lvl) {
-	lvl2 = ((lvl) << (1)) - 1;
+	lvl2 = (lvl << 1) - 1;
 
 /*
           Find the first node LF and last node LL on
@@ -12596,7 +12583,7 @@ L170:
 	} else {
 	    i__2 = lvl - 1;
 	    lf = pow_ii(&c__2, &i__2);
-	    ll = ((lf) << (1)) - 1;
+	    ll = (lf << 1) - 1;
 	}
 	i__2 = lf;
 	for (i__ = ll; i__ >= i__2; --i__) {
@@ -12654,7 +12641,7 @@ L170:
       $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
 */
 
-	j = (nlp1 * *nrhs) << (1);
+	j = nlp1 * *nrhs << 1;
 	i__2 = *nrhs;
 	for (jcol = 1; jcol <= i__2; ++jcol) {
 	    i__3 = nlf + nlp1 - 1;
@@ -12667,9 +12654,9 @@ L170:
 /* L210: */
 	}
 	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1015, &vt[nlf + vt_dim1],
-		ldu, &rwork[((nlp1 * *nrhs) << (1)) + 1], &nlp1, &c_b324, &
-		rwork[1], &nlp1);
-	j = (nlp1 * *nrhs) << (1);
+		ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b324, &rwork[
+		1], &nlp1);
+	j = nlp1 * *nrhs << 1;
 	i__2 = *nrhs;
 	for (jcol = 1; jcol <= i__2; ++jcol) {
 	    i__3 = nlf + nlp1 - 1;
@@ -12681,8 +12668,8 @@ L170:
 /* L230: */
 	}
 	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1015, &vt[nlf + vt_dim1],
-		ldu, &rwork[((nlp1 * *nrhs) << (1)) + 1], &nlp1, &c_b324, &
-		rwork[nlp1 * *nrhs + 1], &nlp1);
+		ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b324, &rwork[
+		nlp1 * *nrhs + 1], &nlp1);
 	jreal = 0;
 	jimag = nlp1 * *nrhs;
 	i__2 = *nrhs;
@@ -12709,7 +12696,7 @@ L170:
       $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
 */
 
-	j = (nrp1 * *nrhs) << (1);
+	j = nrp1 * *nrhs << 1;
 	i__2 = *nrhs;
 	for (jcol = 1; jcol <= i__2; ++jcol) {
 	    i__3 = nrf + nrp1 - 1;
@@ -12722,9 +12709,9 @@ L170:
 /* L270: */
 	}
 	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1015, &vt[nrf + vt_dim1],
-		ldu, &rwork[((nrp1 * *nrhs) << (1)) + 1], &nrp1, &c_b324, &
-		rwork[1], &nrp1);
-	j = (nrp1 * *nrhs) << (1);
+		ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b324, &rwork[
+		1], &nrp1);
+	j = nrp1 * *nrhs << 1;
 	i__2 = *nrhs;
 	for (jcol = 1; jcol <= i__2; ++jcol) {
 	    i__3 = nrf + nrp1 - 1;
@@ -12736,8 +12723,8 @@ L170:
 /* L290: */
 	}
 	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1015, &vt[nrf + vt_dim1],
-		ldu, &rwork[((nrp1 * *nrhs) << (1)) + 1], &nrp1, &c_b324, &
-		rwork[nrp1 * *nrhs + 1], &nrp1);
+		ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b324, &rwork[
+		nrp1 * *nrhs + 1], &nrp1);
 	jreal = 0;
 	jimag = nrp1 * *nrhs;
 	i__2 = *nrhs;
@@ -12961,7 +12948,7 @@ L330:
 	*info = -3;
     } else if (*nrhs < 1) {
 	*info = -4;
-    } else if ((*ldb < 1) || (*ldb < *n)) {
+    } else if (*ldb < 1 || *ldb < *n) {
 	*info = -8;
     }
     if (*info != 0) {
@@ -12974,7 +12961,7 @@ L330:
 
 /*     Set up the tolerance. */
 
-    if ((*rcond <= 0.) || (*rcond >= 1.)) {
+    if (*rcond <= 0. || *rcond >= 1.) {
 	*rcond = eps;
     }
 
@@ -13009,7 +12996,7 @@ L330:
 		zdrot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], &
 			c__1, &cs, &sn);
 	    } else {
-		rwork[((i__) << (1)) - 1] = cs;
+		rwork[(i__ << 1) - 1] = cs;
 		rwork[i__ * 2] = sn;
 	    }
 /* L10: */
@@ -13019,7 +13006,7 @@ L330:
 	    for (i__ = 1; i__ <= i__1; ++i__) {
 		i__2 = *n - 1;
 		for (j = 1; j <= i__2; ++j) {
-		    cs = rwork[((j) << (1)) - 1];
+		    cs = rwork[(j << 1) - 1];
 		    sn = rwork[j * 2];
 		    zdrot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__
 			    * b_dim1], &c__1, &cs, &sn);
@@ -13204,12 +13191,12 @@ L330:
     vt = *smlsiz * *n + 1;
     difl = vt + smlszp * *n;
     difr = difl + nlvl * *n;
-    z__ = difr + ((nlvl * *n) << (1));
+    z__ = difr + (nlvl * *n << 1);
     c__ = z__ + nlvl * *n;
     s = c__ + *n;
     poles = s + *n;
-    givnum = poles + ((nlvl) << (1)) * *n;
-    nrwork = givnum + ((nlvl) << (1)) * *n;
+    givnum = poles + (nlvl << 1) * *n;
+    nrwork = givnum + (nlvl << 1) * *n;
     bx = 1;
 
     irwrb = nrwork;
@@ -13221,7 +13208,7 @@ L330:
     givptr = k + *n;
     perm = givptr + *n;
     givcol = perm + nlvl * *n;
-    iwk = givcol + ((nlvl * *n) << (1));
+    iwk = givcol + (nlvl * *n << 1);
 
     st = 1;
     sqre = 0;
@@ -13239,7 +13226,7 @@ L330:
 
     i__1 = nm1;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	if (((d__1 = e[i__], abs(d__1)) < eps) || (i__ == nm1)) {
+	if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) {
 	    ++nsub;
 	    iwork[nsub] = st;
 
@@ -13612,8 +13599,8 @@ doublereal zlange_(char *norm, integer *m, integer *n, doublecomplex *a,
 	    }
 /* L20: */
 	}
-    } else if ((lsame_(norm, "O")) || (*(unsigned char *
-	    )norm == '1')) {
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1') {
 
 /*        Find norm1(A). */
 
@@ -13655,8 +13642,7 @@ doublereal zlange_(char *norm, integer *m, integer *n, doublecomplex *a,
 	    value = max(d__1,d__2);
 /* L80: */
 	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
 
 /*        Find normF(A). */
 
@@ -13813,8 +13799,7 @@ doublereal zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a,
 /* L40: */
 	    }
 	}
-    } else if (((lsame_(norm, "I")) || (lsame_(norm,
-	    "O"))) || (*(unsigned char *)norm == '1')) {
+    } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') {
 
 /*        Find normI(A) ( = norm1(A), since A is hermitian). */
 
@@ -13862,8 +13847,7 @@ doublereal zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a,
 /* L100: */
 	    }
 	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
 
 /*        Find normF(A). */
 
@@ -14018,8 +14002,8 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	    }
 /* L20: */
 	}
-    } else if ((lsame_(norm, "O")) || (*(unsigned char *
-	    )norm == '1')) {
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1') {
 
 /*        Find norm1(A). */
 
@@ -14065,8 +14049,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	    value = max(d__1,d__2);
 /* L80: */
 	}
-    } else if ((lsame_(norm, "F")) || (lsame_(norm,
-	    "E"))) {
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
 
 /*        Find normF(A). */
 
@@ -14176,7 +14159,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     --rwork;
 
     /* Function Body */
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -14331,7 +14314,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 /*        Form  H * C */
 
-	if ((tau->r != 0.) || (tau->i != 0.)) {
+	if (tau->r != 0. || tau->i != 0.) {
 
 /*           w := C' * v */
 
@@ -14348,7 +14331,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 /*        Form  C * H */
 
-	if ((tau->r != 0.) || (tau->i != 0.)) {
+	if (tau->r != 0. || tau->i != 0.) {
 
 /*           w := C * v */
 
@@ -14495,7 +14478,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     work -= work_offset;
 
     /* Function Body */
-    if ((*m <= 0) || (*n <= 0)) {
+    if (*m <= 0 || *n <= 0) {
 	return 0;
     }
 
@@ -16771,7 +16754,7 @@ L230:
 	    i__2 = j + c_dim1;
 	    z__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__2.i = v1.r *
 		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
+	    i__3 = j + (c_dim1 << 1);
 	    z__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__3.i = v2.r *
 		    c__[i__3].i + v2.i * c__[i__3].r;
 	    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
@@ -16782,8 +16765,8 @@ L230:
 		    sum.i * t1.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
 	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
 		    sum.i * t2.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -16815,7 +16798,7 @@ L250:
 	    i__2 = j + c_dim1;
 	    z__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__3.i = v1.r *
 		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
+	    i__3 = j + (c_dim1 << 1);
 	    z__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__4.i = v2.r *
 		    c__[i__3].i + v2.i * c__[i__3].r;
 	    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
@@ -16830,8 +16813,8 @@ L250:
 		    sum.i * t1.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
 	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
 		    sum.i * t2.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -16874,7 +16857,7 @@ L270:
 	    i__2 = j + c_dim1;
 	    z__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__4.i = v1.r *
 		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
+	    i__3 = j + (c_dim1 << 1);
 	    z__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__5.i = v2.r *
 		    c__[i__3].i + v2.i * c__[i__3].r;
 	    z__3.r = z__4.r + z__5.r, z__3.i = z__4.i + z__5.i;
@@ -16882,7 +16865,7 @@ L270:
 	    z__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__6.i = v3.r *
 		    c__[i__4].i + v3.i * c__[i__4].r;
 	    z__2.r = z__3.r + z__6.r, z__2.i = z__3.i + z__6.i;
-	    i__5 = j + ((c_dim1) << (2));
+	    i__5 = j + (c_dim1 << 2);
 	    z__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__7.i = v4.r *
 		    c__[i__5].i + v4.i * c__[i__5].r;
 	    z__1.r = z__2.r + z__7.r, z__1.i = z__2.i + z__7.i;
@@ -16893,8 +16876,8 @@ L270:
 		    sum.i * t1.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
 	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
 		    sum.i * t2.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -16905,8 +16888,8 @@ L270:
 		    sum.i * t3.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
 	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
 		    sum.i * t4.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -16948,7 +16931,7 @@ L290:
 	    i__2 = j + c_dim1;
 	    z__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__5.i = v1.r *
 		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
+	    i__3 = j + (c_dim1 << 1);
 	    z__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__6.i = v2.r *
 		    c__[i__3].i + v2.i * c__[i__3].r;
 	    z__4.r = z__5.r + z__6.r, z__4.i = z__5.i + z__6.i;
@@ -16956,7 +16939,7 @@ L290:
 	    z__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__7.i = v3.r *
 		    c__[i__4].i + v3.i * c__[i__4].r;
 	    z__3.r = z__4.r + z__7.r, z__3.i = z__4.i + z__7.i;
-	    i__5 = j + ((c_dim1) << (2));
+	    i__5 = j + (c_dim1 << 2);
 	    z__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__8.i = v4.r *
 		    c__[i__5].i + v4.i * c__[i__5].r;
 	    z__2.r = z__3.r + z__8.r, z__2.i = z__3.i + z__8.i;
@@ -16971,8 +16954,8 @@ L290:
 		    sum.i * t1.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
 	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
 		    sum.i * t2.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -16983,8 +16966,8 @@ L290:
 		    sum.i * t3.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
 	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
 		    sum.i * t4.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17037,7 +17020,7 @@ L310:
 	    i__2 = j + c_dim1;
 	    z__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__6.i = v1.r *
 		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
+	    i__3 = j + (c_dim1 << 1);
 	    z__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__7.i = v2.r *
 		    c__[i__3].i + v2.i * c__[i__3].r;
 	    z__5.r = z__6.r + z__7.r, z__5.i = z__6.i + z__7.i;
@@ -17045,7 +17028,7 @@ L310:
 	    z__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__8.i = v3.r *
 		    c__[i__4].i + v3.i * c__[i__4].r;
 	    z__4.r = z__5.r + z__8.r, z__4.i = z__5.i + z__8.i;
-	    i__5 = j + ((c_dim1) << (2));
+	    i__5 = j + (c_dim1 << 2);
 	    z__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__9.i = v4.r *
 		    c__[i__5].i + v4.i * c__[i__5].r;
 	    z__3.r = z__4.r + z__9.r, z__3.i = z__4.i + z__9.i;
@@ -17064,8 +17047,8 @@ L310:
 		    sum.i * t1.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
 	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
 		    sum.i * t2.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17076,8 +17059,8 @@ L310:
 		    sum.i * t3.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
 	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
 		    sum.i * t4.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17141,7 +17124,7 @@ L330:
 	    i__2 = j + c_dim1;
 	    z__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__7.i = v1.r *
 		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
+	    i__3 = j + (c_dim1 << 1);
 	    z__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__8.i = v2.r *
 		    c__[i__3].i + v2.i * c__[i__3].r;
 	    z__6.r = z__7.r + z__8.r, z__6.i = z__7.i + z__8.i;
@@ -17149,7 +17132,7 @@ L330:
 	    z__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__9.i = v3.r *
 		    c__[i__4].i + v3.i * c__[i__4].r;
 	    z__5.r = z__6.r + z__9.r, z__5.i = z__6.i + z__9.i;
-	    i__5 = j + ((c_dim1) << (2));
+	    i__5 = j + (c_dim1 << 2);
 	    z__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__10.i = v4.r
 		    * c__[i__5].i + v4.i * c__[i__5].r;
 	    z__4.r = z__5.r + z__10.r, z__4.i = z__5.i + z__10.i;
@@ -17172,8 +17155,8 @@ L330:
 		    sum.i * t1.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
 	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
 		    sum.i * t2.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17184,8 +17167,8 @@ L330:
 		    sum.i * t3.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
 	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
 		    sum.i * t4.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17260,7 +17243,7 @@ L350:
 	    i__2 = j + c_dim1;
 	    z__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__8.i = v1.r *
 		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
+	    i__3 = j + (c_dim1 << 1);
 	    z__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__9.i = v2.r *
 		    c__[i__3].i + v2.i * c__[i__3].r;
 	    z__7.r = z__8.r + z__9.r, z__7.i = z__8.i + z__9.i;
@@ -17268,7 +17251,7 @@ L350:
 	    z__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__10.i = v3.r
 		    * c__[i__4].i + v3.i * c__[i__4].r;
 	    z__6.r = z__7.r + z__10.r, z__6.i = z__7.i + z__10.i;
-	    i__5 = j + ((c_dim1) << (2));
+	    i__5 = j + (c_dim1 << 2);
 	    z__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__11.i = v4.r
 		    * c__[i__5].i + v4.i * c__[i__5].r;
 	    z__5.r = z__6.r + z__11.r, z__5.i = z__6.i + z__11.i;
@@ -17284,7 +17267,7 @@ L350:
 	    z__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__14.i = v7.r
 		    * c__[i__8].i + v7.i * c__[i__8].r;
 	    z__2.r = z__3.r + z__14.r, z__2.i = z__3.i + z__14.i;
-	    i__9 = j + ((c_dim1) << (3));
+	    i__9 = j + (c_dim1 << 3);
 	    z__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__15.i = v8.r
 		    * c__[i__9].i + v8.i * c__[i__9].r;
 	    z__1.r = z__2.r + z__15.r, z__1.i = z__2.i + z__15.i;
@@ -17295,8 +17278,8 @@ L350:
 		    sum.i * t1.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
 	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
 		    sum.i * t2.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17307,8 +17290,8 @@ L350:
 		    sum.i * t3.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
 	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
 		    sum.i * t4.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17331,8 +17314,8 @@ L350:
 		    sum.i * t7.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (3));
-	    i__3 = j + ((c_dim1) << (3));
+	    i__2 = j + (c_dim1 << 3);
+	    i__3 = j + (c_dim1 << 3);
 	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
 		    sum.i * t8.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17394,7 +17377,7 @@ L370:
 	    i__2 = j + c_dim1;
 	    z__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__9.i = v1.r *
 		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
+	    i__3 = j + (c_dim1 << 1);
 	    z__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__10.i = v2.r
 		    * c__[i__3].i + v2.i * c__[i__3].r;
 	    z__8.r = z__9.r + z__10.r, z__8.i = z__9.i + z__10.i;
@@ -17402,7 +17385,7 @@ L370:
 	    z__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__11.i = v3.r
 		    * c__[i__4].i + v3.i * c__[i__4].r;
 	    z__7.r = z__8.r + z__11.r, z__7.i = z__8.i + z__11.i;
-	    i__5 = j + ((c_dim1) << (2));
+	    i__5 = j + (c_dim1 << 2);
 	    z__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__12.i = v4.r
 		    * c__[i__5].i + v4.i * c__[i__5].r;
 	    z__6.r = z__7.r + z__12.r, z__6.i = z__7.i + z__12.i;
@@ -17418,7 +17401,7 @@ L370:
 	    z__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__15.i = v7.r
 		    * c__[i__8].i + v7.i * c__[i__8].r;
 	    z__3.r = z__4.r + z__15.r, z__3.i = z__4.i + z__15.i;
-	    i__9 = j + ((c_dim1) << (3));
+	    i__9 = j + (c_dim1 << 3);
 	    z__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__16.i = v8.r
 		    * c__[i__9].i + v8.i * c__[i__9].r;
 	    z__2.r = z__3.r + z__16.r, z__2.i = z__3.i + z__16.i;
@@ -17433,8 +17416,8 @@ L370:
 		    sum.i * t1.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
 	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
 		    sum.i * t2.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17445,8 +17428,8 @@ L370:
 		    sum.i * t3.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
 	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
 		    sum.i * t4.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17469,8 +17452,8 @@ L370:
 		    sum.i * t7.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (3));
-	    i__3 = j + ((c_dim1) << (3));
+	    i__2 = j + (c_dim1 << 3);
+	    i__3 = j + (c_dim1 << 3);
 	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
 		    sum.i * t8.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17543,7 +17526,7 @@ L390:
 	    i__2 = j + c_dim1;
 	    z__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__10.i = v1.r
 		    * c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + ((c_dim1) << (1));
+	    i__3 = j + (c_dim1 << 1);
 	    z__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__11.i = v2.r
 		    * c__[i__3].i + v2.i * c__[i__3].r;
 	    z__9.r = z__10.r + z__11.r, z__9.i = z__10.i + z__11.i;
@@ -17551,7 +17534,7 @@ L390:
 	    z__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__12.i = v3.r
 		    * c__[i__4].i + v3.i * c__[i__4].r;
 	    z__8.r = z__9.r + z__12.r, z__8.i = z__9.i + z__12.i;
-	    i__5 = j + ((c_dim1) << (2));
+	    i__5 = j + (c_dim1 << 2);
 	    z__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__13.i = v4.r
 		    * c__[i__5].i + v4.i * c__[i__5].r;
 	    z__7.r = z__8.r + z__13.r, z__7.i = z__8.i + z__13.i;
@@ -17567,7 +17550,7 @@ L390:
 	    z__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__16.i = v7.r
 		    * c__[i__8].i + v7.i * c__[i__8].r;
 	    z__4.r = z__5.r + z__16.r, z__4.i = z__5.i + z__16.i;
-	    i__9 = j + ((c_dim1) << (3));
+	    i__9 = j + (c_dim1 << 3);
 	    z__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__17.i = v8.r
 		    * c__[i__9].i + v8.i * c__[i__9].r;
 	    z__3.r = z__4.r + z__17.r, z__3.i = z__4.i + z__17.i;
@@ -17586,8 +17569,8 @@ L390:
 		    sum.i * t1.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (1));
-	    i__3 = j + ((c_dim1) << (1));
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
 	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
 		    sum.i * t2.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17598,8 +17581,8 @@ L390:
 		    sum.i * t3.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (2));
-	    i__3 = j + ((c_dim1) << (2));
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
 	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
 		    sum.i * t4.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17622,8 +17605,8 @@ L390:
 		    sum.i * t7.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
 	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + ((c_dim1) << (3));
-	    i__3 = j + ((c_dim1) << (3));
+	    i__2 = j + (c_dim1 << 3);
+	    i__3 = j + (c_dim1 << 3);
 	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
 		    sum.i * t8.r;
 	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
@@ -17777,25 +17760,23 @@ L410:
 	*info = -4;
     } else if (*m < 0) {
 	*info = -6;
-    } else if (((*n < 0) || (itype == 4 && *n != *m)) || (itype == 5 && *n !=
-	    *m)) {
+    } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) {
 	*info = -7;
     } else if (itype <= 3 && *lda < max(1,*m)) {
 	*info = -9;
     } else if (itype >= 4) {
 /* Computing MAX */
 	i__1 = *m - 1;
-	if ((*kl < 0) || (*kl > max(i__1,0))) {
+	if (*kl < 0 || *kl > max(i__1,0)) {
 	    *info = -2;
 	} else /* if(complicated condition) */ {
 /* Computing MAX */
 	    i__1 = *n - 1;
-	    if (((*ku < 0) || (*ku > max(i__1,0))) || (((itype == 4) || (
-		    itype == 5)) && *kl != *ku)) {
+	    if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) &&
+		    *kl != *ku) {
 		*info = -3;
-	    } else if (((itype == 4 && *lda < *kl + 1) || (itype == 5 && *lda
-		    < *ku + 1)) || (itype == 6 && *lda < ((*kl) << (1)) + *ku
-		    + 1)) {
+	    } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < *
+		    ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) {
 		*info = -9;
 	    }
 	}
@@ -17809,7 +17790,7 @@ L410:
 
 /*     Quick return if possible */
 
-    if ((*n == 0) || (*m == 0)) {
+    if (*n == 0 || *m == 0) {
 	return 0;
     }
 
@@ -17955,7 +17936,7 @@ L10:
 
 	k1 = *kl + *ku + 2;
 	k2 = *kl + 1;
-	k3 = ((*kl) << (1)) + *ku + 1;
+	k3 = (*kl << 1) + *ku + 1;
 	k4 = *kl + *ku + 1 + *m;
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
@@ -18257,13 +18238,13 @@ L10:
 
     /* Function Body */
     info = 0;
-    if (! ((lsame_(side, "L")) || (lsame_(side, "R")))) {
+    if (! (lsame_(side, "L") || lsame_(side, "R"))) {
 	info = 1;
-    } else if (! (((lsame_(pivot, "V")) || (lsame_(
-	    pivot, "T"))) || (lsame_(pivot, "B")))) {
+    } else if (! (lsame_(pivot, "V") || lsame_(pivot,
+	    "T") || lsame_(pivot, "B"))) {
 	info = 2;
-    } else if (! ((lsame_(direct, "F")) || (lsame_(
-	    direct, "B")))) {
+    } else if (! (lsame_(direct, "F") || lsame_(direct,
+	    "B"))) {
 	info = 3;
     } else if (*m < 0) {
 	info = 4;
@@ -18279,7 +18260,7 @@ L10:
 
 /*     Quick return if possible */
 
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
     if (lsame_(side, "L")) {
@@ -18292,7 +18273,7 @@ L10:
 		for (j = 1; j <= i__1; ++j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__2 = *n;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    i__3 = j + 1 + i__ * a_dim1;
@@ -18322,7 +18303,7 @@ L10:
 		for (j = *m - 1; j >= 1; --j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__1 = *n;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    i__2 = j + 1 + i__ * a_dim1;
@@ -18355,7 +18336,7 @@ L10:
 		for (j = 2; j <= i__1; ++j) {
 		    ctemp = c__[j - 1];
 		    stemp = s[j - 1];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__2 = *n;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    i__3 = j + i__ * a_dim1;
@@ -18385,7 +18366,7 @@ L10:
 		for (j = *m; j >= 2; --j) {
 		    ctemp = c__[j - 1];
 		    stemp = s[j - 1];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__1 = *n;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    i__2 = j + i__ * a_dim1;
@@ -18418,7 +18399,7 @@ L10:
 		for (j = 1; j <= i__1; ++j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__2 = *n;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    i__3 = j + i__ * a_dim1;
@@ -18448,7 +18429,7 @@ L10:
 		for (j = *m - 1; j >= 1; --j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__1 = *n;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    i__2 = j + i__ * a_dim1;
@@ -18486,7 +18467,7 @@ L10:
 		for (j = 1; j <= i__1; ++j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    i__3 = i__ + (j + 1) * a_dim1;
@@ -18516,7 +18497,7 @@ L10:
 		for (j = *n - 1; j >= 1; --j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    i__2 = i__ + (j + 1) * a_dim1;
@@ -18549,7 +18530,7 @@ L10:
 		for (j = 2; j <= i__1; ++j) {
 		    ctemp = c__[j - 1];
 		    stemp = s[j - 1];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    i__3 = i__ + j * a_dim1;
@@ -18579,7 +18560,7 @@ L10:
 		for (j = *n; j >= 2; --j) {
 		    ctemp = c__[j - 1];
 		    stemp = s[j - 1];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    i__2 = i__ + j * a_dim1;
@@ -18612,7 +18593,7 @@ L10:
 		for (j = 1; j <= i__1; ++j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    i__3 = i__ + j * a_dim1;
@@ -18642,7 +18623,7 @@ L10:
 		for (j = *n - 1; j >= 1; --j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if ((ctemp != 1.) || (stemp != 0.)) {
+		    if (ctemp != 1. || stemp != 0.) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    i__2 = i__ + j * a_dim1;
@@ -18882,7 +18863,7 @@ L10:
 	return 0;
     }
 
-    n32 = (*n / 32) << (5);
+    n32 = *n / 32 << 5;
     if (n32 != 0) {
 	i__1 = n32;
 	for (j = 1; j <= i__1; j += 32) {
@@ -20504,17 +20485,20 @@ L210:
 
 } /* zlatrs_ */
 
-/* Subroutine */ int zlauu2_(char *uplo, integer *n, doublecomplex *a,
+/* Subroutine */ int zpotf2_(char *uplo, integer *n, doublecomplex *a,
 	integer *lda, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
     doublereal d__1;
-    doublecomplex z__1;
+    doublecomplex z__1, z__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
 
     /* Local variables */
-    static integer i__;
-    static doublereal aii;
+    static integer j;
+    static doublereal ajj;
     extern logical lsame_(char *, char *);
     extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *,
 	    doublecomplex *, integer *, doublecomplex *, integer *);
@@ -20528,7 +20512,7 @@ L210:
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
        September 30, 1994
@@ -20537,35 +20521,39 @@ L210:
     Purpose
     =======
 
-    ZLAUU2 computes the product U * U' or L' * L, where the triangular
-    factor U or L is stored in the upper or lower triangular part of
-    the array A.
+    ZPOTF2 computes the Cholesky factorization of a complex Hermitian
+    positive definite matrix A.
 
-    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
-    overwriting the factor U in A.
-    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
-    overwriting the factor L in A.
+    The factorization has the form
+       A = U' * U ,  if UPLO = 'U', or
+       A = L  * L',  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
 
-    This is the unblocked form of the algorithm, calling Level 2 BLAS.
+    This is the unblocked version of the algorithm, calling Level 2 BLAS.
 
     Arguments
     =========
 
     UPLO    (input) CHARACTER*1
-            Specifies whether the triangular factor stored in the array A
-            is upper or lower triangular:
+            Specifies whether the upper or lower triangular part of the
+            Hermitian matrix A is stored.
             = 'U':  Upper triangular
             = 'L':  Lower triangular
 
     N       (input) INTEGER
-            The order of the triangular factor U or L.  N >= 0.
+            The order of the matrix A.  N >= 0.
 
     A       (input/output) COMPLEX*16 array, dimension (LDA,N)
-            On entry, the triangular factor U or L.
-            On exit, if UPLO = 'U', the upper triangle of A is
-            overwritten with the upper triangle of the product U * U';
-            if UPLO = 'L', the lower triangle of A is overwritten with
-            the lower triangle of the product L' * L.
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            n by n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U'*U  or A = L*L'.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
@@ -20573,6 +20561,9 @@ L210:
     INFO    (output) INTEGER
             = 0: successful exit
             < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, the leading minor of order k is not
+                 positive definite, and the factorization could not be
+                 completed.
 
     =====================================================================
 
@@ -20597,7 +20588,7 @@ L210:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("ZLAUU2", &i__1);
+	xerbla_("ZPOTF2", &i__1);
 	return 0;
     }
 
@@ -20609,81 +20600,113 @@ L210:
 
     if (upper) {
 
-/*        Compute the product U * U'. */
+/*        Compute the Cholesky factorization A = U'*U. */
 
 	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = i__ + i__ * a_dim1;
-	    aii = a[i__2].r;
-	    if (i__ < *n) {
-		i__2 = i__ + i__ * a_dim1;
-		i__3 = *n - i__;
-		zdotc_(&z__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &a[
-			i__ + (i__ + 1) * a_dim1], lda);
-		d__1 = aii * aii + z__1.r;
-		a[i__2].r = d__1, a[i__2].i = 0.;
-		i__2 = *n - i__;
-		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		z__1.r = aii, z__1.i = 0.;
-		zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			z__1, &a[i__ * a_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
-	    } else {
-		zdscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
+	for (j = 1; j <= i__1; ++j) {
+
+/*           Compute U(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j + j * a_dim1;
+	    d__1 = a[i__2].r;
+	    i__3 = j - 1;
+	    zdotc_(&z__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1]
+		    , &c__1);
+	    z__1.r = d__1 - z__2.r, z__1.i = -z__2.i;
+	    ajj = z__1.r;
+	    if (ajj <= 0.) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = ajj, a[i__2].i = 0.;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    i__2 = j + j * a_dim1;
+	    a[i__2].r = ajj, a[i__2].i = 0.;
+
+/*           Compute elements J+1:N of row J. */
+
+	    if (j < *n) {
+		i__2 = j - 1;
+		zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
+		i__2 = j - 1;
+		i__3 = *n - j;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("Transpose", &i__2, &i__3, &z__1, &a[(j + 1) * a_dim1
+			+ 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b60, &a[j + (
+			j + 1) * a_dim1], lda);
+		i__2 = j - 1;
+		zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
+		i__2 = *n - j;
+		d__1 = 1. / ajj;
+		zdscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda);
 	    }
 /* L10: */
 	}
-
     } else {
 
-/*        Compute the product L' * L. */
+/*        Compute the Cholesky factorization A = L*L'. */
 
 	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = i__ + i__ * a_dim1;
-	    aii = a[i__2].r;
-	    if (i__ < *n) {
-		i__2 = i__ + i__ * a_dim1;
-		i__3 = *n - i__;
-		zdotc_(&z__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[
-			i__ + 1 + i__ * a_dim1], &c__1);
-		d__1 = aii * aii + z__1.r;
-		a[i__2].r = d__1, a[i__2].i = 0.;
-		i__2 = i__ - 1;
-		zlacgv_(&i__2, &a[i__ + a_dim1], lda);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		z__1.r = aii, z__1.i = 0.;
-		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ +
-			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			z__1, &a[i__ + a_dim1], lda);
-		i__2 = i__ - 1;
-		zlacgv_(&i__2, &a[i__ + a_dim1], lda);
-	    } else {
-		zdscal_(&i__, &aii, &a[i__ + a_dim1], lda);
+	for (j = 1; j <= i__1; ++j) {
+
+/*           Compute L(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j + j * a_dim1;
+	    d__1 = a[i__2].r;
+	    i__3 = j - 1;
+	    zdotc_(&z__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda);
+	    z__1.r = d__1 - z__2.r, z__1.i = -z__2.i;
+	    ajj = z__1.r;
+	    if (ajj <= 0.) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = ajj, a[i__2].i = 0.;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    i__2 = j + j * a_dim1;
+	    a[i__2].r = ajj, a[i__2].i = 0.;
+
+/*           Compute elements J+1:N of column J. */
+
+	    if (j < *n) {
+		i__2 = j - 1;
+		zlacgv_(&i__2, &a[j + a_dim1], lda);
+		i__2 = *n - j;
+		i__3 = j - 1;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__3, &z__1, &a[j + 1 + a_dim1]
+			, lda, &a[j + a_dim1], lda, &c_b60, &a[j + 1 + j *
+			a_dim1], &c__1);
+		i__2 = j - 1;
+		zlacgv_(&i__2, &a[j + a_dim1], lda);
+		i__2 = *n - j;
+		d__1 = 1. / ajj;
+		zdscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
 	    }
 /* L20: */
 	}
     }
+    goto L40;
 
+L30:
+    *info = j;
+
+L40:
     return 0;
 
-/*     End of ZLAUU2 */
+/*     End of ZPOTF2 */
 
-} /* zlauu2_ */
+} /* zpotf2_ */
 
-/* Subroutine */ int zlauum_(char *uplo, integer *n, doublecomplex *a,
+/* Subroutine */ int zpotrf_(char *uplo, integer *n, doublecomplex *a,
 	integer *lda, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    doublecomplex z__1;
 
     /* Local variables */
-    static integer i__, ib, nb;
+    static integer j, jb, nb;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
 	    integer *, doublecomplex *, doublecomplex *, integer *,
@@ -20692,16 +20715,16 @@ L210:
 	    integer *, doublereal *, doublecomplex *, integer *, doublereal *,
 	     doublecomplex *, integer *);
     static logical upper;
-    extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *,
+    extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *,
 	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
 	     doublecomplex *, integer *),
-	    zlauu2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *);
+	    zpotf2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
        September 30, 1994
@@ -20710,42 +20733,47 @@ L210:
     Purpose
     =======
 
-    ZLAUUM computes the product U * U' or L' * L, where the triangular
-    factor U or L is stored in the upper or lower triangular part of
-    the array A.
+    ZPOTRF computes the Cholesky factorization of a complex Hermitian
+    positive definite matrix A.
 
-    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
-    overwriting the factor U in A.
-    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
-    overwriting the factor L in A.
+    The factorization has the form
+       A = U**H * U,  if UPLO = 'U', or
+       A = L  * L**H,  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
 
-    This is the blocked form of the algorithm, calling Level 3 BLAS.
+    This is the block version of the algorithm, calling Level 3 BLAS.
 
     Arguments
     =========
 
     UPLO    (input) CHARACTER*1
-            Specifies whether the triangular factor stored in the array A
-            is upper or lower triangular:
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
 
     N       (input) INTEGER
-            The order of the triangular factor U or L.  N >= 0.
+            The order of the matrix A.  N >= 0.
 
     A       (input/output) COMPLEX*16 array, dimension (LDA,N)
-            On entry, the triangular factor U or L.
-            On exit, if UPLO = 'U', the upper triangle of A is
-            overwritten with the upper triangle of the product U * U';
-            if UPLO = 'L', the lower triangle of A is overwritten with
-            the lower triangle of the product L' * L.
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U**H*U or A = L*L**H.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
     INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the leading minor of order i is not
+                  positive definite, and the factorization could not be
+                  completed.
 
     =====================================================================
 
@@ -20770,7 +20798,7 @@ L210:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("ZLAUUM", &i__1);
+	xerbla_("ZPOTRF", &i__1);
 	return 0;
     }
 
@@ -20782,164 +20810,285 @@ L210:
 
 /*     Determine the block size for this environment. */
 
-    nb = ilaenv_(&c__1, "ZLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+    nb = ilaenv_(&c__1, "ZPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
+    if (nb <= 1 || nb >= *n) {
 
-    if ((nb <= 1) || (nb >= *n)) {
-
-/*        Use unblocked code */
+/*        Use unblocked code. */
 
-	zlauu2_(uplo, n, &a[a_offset], lda, info);
+	zpotf2_(uplo, n, &a[a_offset], lda, info);
     } else {
 
-/*        Use blocked code */
+/*        Use blocked code. */
 
 	if (upper) {
 
-/*           Compute the product U * U'. */
+/*           Compute the Cholesky factorization A = U'*U. */
 
 	    i__1 = *n;
 	    i__2 = nb;
-	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-		i__3 = nb, i__4 = *n - i__ + 1;
-		ib = min(i__3,i__4);
-		i__3 = i__ - 1;
-		ztrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", &
-			i__3, &ib, &c_b60, &a[i__ + i__ * a_dim1], lda, &a[
-			i__ * a_dim1 + 1], lda);
-		zlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
-		if (i__ + ib <= *n) {
-		    i__3 = i__ - 1;
-		    i__4 = *n - i__ - ib + 1;
-		    zgemm_("No transpose", "Conjugate transpose", &i__3, &ib,
-			    &i__4, &c_b60, &a[(i__ + ib) * a_dim1 + 1], lda, &
-			    a[i__ + (i__ + ib) * a_dim1], lda, &c_b60, &a[i__
-			    * a_dim1 + 1], lda);
-		    i__3 = *n - i__ - ib + 1;
-		    zherk_("Upper", "No transpose", &ib, &i__3, &c_b1015, &a[
-			    i__ + (i__ + ib) * a_dim1], lda, &c_b1015, &a[i__
-			    + i__ * a_dim1], lda);
+	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
+
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		zherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1294, &
+			a[j * a_dim1 + 1], lda, &c_b1015, &a[j + j * a_dim1],
+			lda);
+		zpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
 		}
-/* L10: */
-	    }
-	} else {
+		if (j + jb <= *n) {
 
-/*           Compute the product L' * L. */
+/*                 Compute the current block row. */
 
-	    i__2 = *n;
-	    i__1 = nb;
-	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-/* Computing MIN */
-		i__3 = nb, i__4 = *n - i__ + 1;
-		ib = min(i__3,i__4);
-		i__3 = i__ - 1;
-		ztrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", &
-			ib, &i__3, &c_b60, &a[i__ + i__ * a_dim1], lda, &a[
-			i__ + a_dim1], lda);
-		zlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
-		if (i__ + ib <= *n) {
-		    i__3 = i__ - 1;
-		    i__4 = *n - i__ - ib + 1;
-		    zgemm_("Conjugate transpose", "No transpose", &ib, &i__3,
-			    &i__4, &c_b60, &a[i__ + ib + i__ * a_dim1], lda, &
-			    a[i__ + ib + a_dim1], lda, &c_b60, &a[i__ +
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("Conjugate transpose", "No transpose", &jb, &i__3,
+			    &i__4, &z__1, &a[j * a_dim1 + 1], lda, &a[(j + jb)
+			     * a_dim1 + 1], lda, &c_b60, &a[j + (j + jb) *
 			    a_dim1], lda);
-		    i__3 = *n - i__ - ib + 1;
-		    zherk_("Lower", "Conjugate transpose", &ib, &i__3, &
-			    c_b1015, &a[i__ + ib + i__ * a_dim1], lda, &
-			    c_b1015, &a[i__ + i__ * a_dim1], lda);
+		    i__3 = *n - j - jb + 1;
+		    ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit",
+			     &jb, &i__3, &c_b60, &a[j + j * a_dim1], lda, &a[
+			    j + (j + jb) * a_dim1], lda);
 		}
-/* L20: */
+/* L10: */
 	    }
-	}
-    }
 
-    return 0;
+	} else {
 
-/*     End of ZLAUUM */
+/*           Compute the Cholesky factorization A = L*L'. */
 
-} /* zlauum_ */
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
 
-/* Subroutine */ int zpotf2_(char *uplo, integer *n, doublecomplex *a,
-	integer *lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    doublereal d__1;
-    doublecomplex z__1, z__2;
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
 
-    /* Builtin functions */
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		zherk_("Lower", "No transpose", &jb, &i__3, &c_b1294, &a[j +
+			a_dim1], lda, &c_b1015, &a[j + j * a_dim1], lda);
+		zpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
+
+/*                 Compute the current block column. */
+
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("No transpose", "Conjugate transpose", &i__3, &jb,
+			    &i__4, &z__1, &a[j + jb + a_dim1], lda, &a[j +
+			    a_dim1], lda, &c_b60, &a[j + jb + j * a_dim1],
+			    lda);
+		    i__3 = *n - j - jb + 1;
+		    ztrsm_("Right", "Lower", "Conjugate transpose", "Non-unit"
+			    , &i__3, &jb, &c_b60, &a[j + j * a_dim1], lda, &a[
+			    j + jb + j * a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
+    goto L40;
+
+L30:
+    *info = *info + j - 1;
+
+L40:
+    return 0;
+
+/*     End of ZPOTRF */
+
+} /* zpotrf_ */
+
+/* Subroutine */ int zstedc_(char *compz, integer *n, doublereal *d__,
+	doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work,
+	integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork,
+	integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
     double sqrt(doublereal);
 
     /* Local variables */
-    static integer j;
-    static doublereal ajj;
+    static integer i__, j, k, m;
+    static doublereal p;
+    static integer ii, ll, end, lgn;
+    static doublereal eps, tiny;
     extern logical lsame_(char *, char *);
-    extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *,
-	    doublecomplex *, integer *, doublecomplex *, integer *);
-    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
-	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
-	    integer *, doublecomplex *, doublecomplex *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
-	    integer *, doublereal *, doublecomplex *, integer *), zlacgv_(
+    static integer lwmin, start;
+    extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), zlaed0_(integer *, integer *,
+	    doublereal *, doublereal *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublereal *, integer *, integer *);
+
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dstedc_(char *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, integer *, integer *, integer *), dlaset_(
+	    char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
+	     integer *), zlacrm_(integer *, integer *, doublecomplex *,
+	    integer *, doublereal *, integer *, doublecomplex *, integer *,
+	    doublereal *);
+    static integer liwmin, icompz;
+    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *,
 	    integer *, doublecomplex *, integer *);
+    static doublereal orgnrm;
+    static integer lrwmin;
+    static logical lquery;
+    static integer smlsiz;
+    extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublecomplex *, integer *, doublereal *, integer *);
 
 
 /*
     -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+       June 30, 1999
 
 
     Purpose
     =======
 
-    ZPOTF2 computes the Cholesky factorization of a complex Hermitian
-    positive definite matrix A.
-
-    The factorization has the form
-       A = U' * U ,  if UPLO = 'U', or
-       A = L  * L',  if UPLO = 'L',
-    where U is an upper triangular matrix and L is lower triangular.
+    ZSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the divide and conquer method.
+    The eigenvectors of a full or band complex Hermitian matrix can also
+    be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this
+    matrix to tridiagonal form.
 
-    This is the unblocked version of the algorithm, calling Level 2 BLAS.
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See DLAED3 for details.
 
     Arguments
     =========
 
-    UPLO    (input) CHARACTER*1
-            Specifies whether the upper or lower triangular part of the
-            Hermitian matrix A is stored.
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'I':  Compute eigenvectors of tridiagonal matrix also.
+            = 'V':  Compute eigenvectors of original Hermitian matrix
+                    also.  On entry, Z contains the unitary matrix used
+                    to reduce the original matrix to tridiagonal form.
 
     N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+            The dimension of the symmetric tridiagonal matrix.  N >= 0.
 
-    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
-            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
-            n by n upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading n by n lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
 
-            On exit, if INFO = 0, the factor U or L from the Cholesky
-            factorization A = U'*U  or A = L*L'.
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the subdiagonal elements of the tridiagonal matrix.
+            On exit, E has been destroyed.
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+    Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+            On entry, if COMPZ = 'V', then Z contains the unitary
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original Hermitian matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If  COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1.
+            If eigenvectors are desired, then LDZ >= max(1,N).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1.
+            If COMPZ = 'V' and N > 1, LWORK must be at least N*N.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    RWORK   (workspace/output) DOUBLE PRECISION array,
+                                           dimension (LRWORK)
+            On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+
+    LRWORK  (input) INTEGER
+            The dimension of the array RWORK.
+            If COMPZ = 'N' or N <= 1, LRWORK must be at least 1.
+            If COMPZ = 'V' and N > 1, LRWORK must be at least
+                           1 + 3*N + 2*N*lg N + 3*N**2 ,
+                           where lg( N ) = smallest integer k such
+                           that 2**k >= N.
+            If COMPZ = 'I' and N > 1, LRWORK must be at least
+                           1 + 4*N + 2*N**2 .
+
+            If LRWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the RWORK array,
+            returns this value as the first entry of the RWORK array, and
+            no error message related to LRWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If COMPZ = 'N' or N <= 1, LIWORK must be at least 1.
+            If COMPZ = 'V' or N > 1,  LIWORK must be at least
+                                      6 + 6*N + 5*N*lg N.
+            If COMPZ = 'I' or N > 1,  LIWORK must be at least
+                                      3 + 5*N .
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
 
     INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-            > 0: if INFO = k, the leading minor of order k is not
-                 positive definite, and the factorization could not be
-                 completed.
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
 
     =====================================================================
 
@@ -20948,208 +21097,403 @@ L210:
 */
 
     /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+    --rwork;
+    --iwork;
 
     /* Function Body */
     *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("ZPOTF2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
+    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;
 
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (upper) {
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if (*n <= 1 || icompz <= 0) {
+	lwmin = 1;
+	liwmin = 1;
+	lrwmin = 1;
+    } else {
+	lgn = (integer) (log((doublereal) (*n)) / log(2.));
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (icompz == 1) {
+	    lwmin = *n * *n;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3;
+	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
+	} else if (icompz == 2) {
+	    lwmin = 1;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1);
+	    liwmin = *n * 5 + 3;
+	}
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
+	*info = -6;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*lrwork < lrwmin && ! lquery) {
+	*info = -10;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -12;
+    }
 
-/*        Compute the Cholesky factorization A = U'*U. */
+    if (*info == 0) {
+	work[1].r = (doublereal) lwmin, work[1].i = 0.;
+	rwork[1] = (doublereal) lrwmin;
+	iwork[1] = liwmin;
+    }
 
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZSTEDC", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
 
-/*           Compute U(J,J) and test for non-positive-definiteness. */
+/*     Quick return if possible */
 
-	    i__2 = j + j * a_dim1;
-	    d__1 = a[i__2].r;
-	    i__3 = j - 1;
-	    zdotc_(&z__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1]
-		    , &c__1);
-	    z__1.r = d__1 - z__2.r, z__1.i = -z__2.i;
-	    ajj = z__1.r;
-	    if (ajj <= 0.) {
-		i__2 = j + j * a_dim1;
-		a[i__2].r = ajj, a[i__2].i = 0.;
-		goto L30;
-	    }
-	    ajj = sqrt(ajj);
-	    i__2 = j + j * a_dim1;
-	    a[i__2].r = ajj, a[i__2].i = 0.;
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	if (icompz != 0) {
+	    i__1 = z_dim1 + 1;
+	    z__[i__1].r = 1., z__[i__1].i = 0.;
+	}
+	return 0;
+    }
 
-/*           Compute elements J+1:N of row J. */
+    smlsiz = ilaenv_(&c__9, "ZSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
 
-	    if (j < *n) {
-		i__2 = j - 1;
-		zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
-		i__2 = j - 1;
-		i__3 = *n - j;
-		z__1.r = -1., z__1.i = -0.;
-		zgemv_("Transpose", &i__2, &i__3, &z__1, &a[(j + 1) * a_dim1
-			+ 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b60, &a[j + (
-			j + 1) * a_dim1], lda);
-		i__2 = j - 1;
-		zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
-		i__2 = *n - j;
-		d__1 = 1. / ajj;
-		zdscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda);
-	    }
-/* L10: */
-	}
-    } else {
+/*
+       If the following conditional clause is removed, then the routine
+       will use the Divide and Conquer routine to compute only the
+       eigenvalues, which requires (3N + 3N**2) real workspace and
+       (2 + 5N + 2N lg(N)) integer workspace.
+       Since on many architectures DSTERF is much faster than any other
+       algorithm for finding eigenvalues only, it is used here
+       as the default.
 
-/*        Compute the Cholesky factorization A = L*L'. */
+       If COMPZ = 'N', use DSTERF to compute the eigenvalues.
+*/
 
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
+    if (icompz == 0) {
+	dsterf_(n, &d__[1], &e[1], info);
+	return 0;
+    }
 
-/*           Compute L(J,J) and test for non-positive-definiteness. */
+/*
+       If N is smaller than the minimum divide size (SMLSIZ+1), then
+       solve the problem with another solver.
+*/
 
-	    i__2 = j + j * a_dim1;
-	    d__1 = a[i__2].r;
-	    i__3 = j - 1;
-	    zdotc_(&z__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda);
-	    z__1.r = d__1 - z__2.r, z__1.i = -z__2.i;
-	    ajj = z__1.r;
-	    if (ajj <= 0.) {
-		i__2 = j + j * a_dim1;
-		a[i__2].r = ajj, a[i__2].i = 0.;
-		goto L30;
-	    }
-	    ajj = sqrt(ajj);
-	    i__2 = j + j * a_dim1;
-	    a[i__2].r = ajj, a[i__2].i = 0.;
+    if (*n <= smlsiz) {
+	if (icompz == 0) {
+	    dsterf_(n, &d__[1], &e[1], info);
+	    return 0;
+	} else if (icompz == 2) {
+	    zsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
+		    info);
+	    return 0;
+	} else {
+	    zsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
+		    info);
+	    return 0;
+	}
+    }
 
-/*           Compute elements J+1:N of column J. */
+/*     If COMPZ = 'I', we simply call DSTEDC instead. */
 
-	    if (j < *n) {
-		i__2 = j - 1;
-		zlacgv_(&i__2, &a[j + a_dim1], lda);
-		i__2 = *n - j;
-		i__3 = j - 1;
-		z__1.r = -1., z__1.i = -0.;
-		zgemv_("No transpose", &i__2, &i__3, &z__1, &a[j + 1 + a_dim1]
-			, lda, &a[j + a_dim1], lda, &c_b60, &a[j + 1 + j *
-			a_dim1], &c__1);
-		i__2 = j - 1;
-		zlacgv_(&i__2, &a[j + a_dim1], lda);
-		i__2 = *n - j;
-		d__1 = 1. / ajj;
-		zdscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+    if (icompz == 2) {
+	dlaset_("Full", n, n, &c_b324, &c_b1015, &rwork[1], n);
+	ll = *n * *n + 1;
+	i__1 = *lrwork - ll + 1;
+	dstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, &
+		iwork[1], liwork, info);
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *n;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * z_dim1;
+		i__4 = (j - 1) * *n + i__;
+		z__[i__3].r = rwork[i__4], z__[i__3].i = 0.;
+/* L10: */
 	    }
 /* L20: */
 	}
+	return 0;
     }
-    goto L40;
 
-L30:
-    *info = j;
+/*
+       From now on, only option left to be handled is COMPZ = 'V',
+       i.e. ICOMPZ = 1.
 
-L40:
-    return 0;
+       Scale.
+*/
 
-/*     End of ZPOTF2 */
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	return 0;
+    }
 
-} /* zpotf2_ */
+    eps = EPSILON;
 
-/* Subroutine */ int zpotrf_(char *uplo, integer *n, doublecomplex *a,
-	integer *lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    doublecomplex z__1;
+    start = 1;
 
-    /* Local variables */
-    static integer j, jb, nb;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
-	    integer *, doublecomplex *, doublecomplex *, integer *,
-	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
-	    integer *), zherk_(char *, char *, integer *,
-	    integer *, doublereal *, doublecomplex *, integer *, doublereal *,
-	     doublecomplex *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *,
-	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
-	     doublecomplex *, integer *),
-	    zpotf2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
+/*     while ( START <= N ) */
 
+L30:
+    if (start <= *n) {
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+       Let END be the position of the next subdiagonal entry such that
+       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
+       matrix identified by the elements between START and END
+       constitutes an independent sub-problem.
+*/
 
+	end = start;
+L40:
+	if (end < *n) {
+	    tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 =
+		    d__[end + 1], abs(d__2)));
+	    if ((d__1 = e[end], abs(d__1)) > tiny) {
+		++end;
+		goto L40;
+	    }
+	}
 
-    Purpose
-    =======
+/*        (Sub) Problem determined.  Compute its size and solve it. */
 
-    ZPOTRF computes the Cholesky factorization of a complex Hermitian
-    positive definite matrix A.
+	m = end - start + 1;
+	if (m > smlsiz) {
+	    *info = smlsiz;
 
-    The factorization has the form
-       A = U**H * U,  if UPLO = 'U', or
-       A = L  * L**H,  if UPLO = 'L',
-    where U is an upper triangular matrix and L is lower triangular.
+/*           Scale. */
 
-    This is the block version of the algorithm, calling Level 3 BLAS.
+	    orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
+	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &m, &c__1, &d__[
+		    start], &m, info);
+	    i__1 = m - 1;
+	    i__2 = m - 1;
+	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &i__1, &c__1, &e[
+		    start], &i__2, info);
+
+	    zlaed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1],
+		    ldz, &work[1], n, &rwork[1], &iwork[1], info);
+	    if (*info > 0) {
+		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
+			+ 1) + start - 1;
+		return 0;
+	    }
+
+/*           Scale back. */
+
+	    dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, &m, &c__1, &d__[
+		    start], &m, info);
+
+	} else {
+	    dsteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m *
+		     m + 1], info);
+	    zlacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, &
+		    work[1], n, &rwork[m * m + 1]);
+	    zlacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz);
+	    if (*info > 0) {
+		*info = start * (*n + 1) + end;
+		return 0;
+	    }
+	}
+
+	start = end + 1;
+	goto L30;
+    }
+
+/*
+       endwhile
+
+       If the problem split any number of times, then the eigenvalues
+       will not be properly ordered.  Here we permute the eigenvalues
+       (and the associated eigenvectors) into ascending order.
+*/
+
+    if (m != *n) {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	i__1 = *n;
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
+	    i__2 = *n;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
+		}
+/* L50: */
+	    }
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
+	    }
+/* L60: */
+	}
+    }
+
+    work[1].r = (doublereal) lwmin, work[1].i = 0.;
+    rwork[1] = (doublereal) lrwmin;
+    iwork[1] = liwmin;
+
+    return 0;
+
+/*     End of ZSTEDC */
+
+} /* zstedc_ */
+
+/* Subroutine */ int zsteqr_(char *compz, integer *n, doublereal *d__,
+	doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work,
+	integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static doublereal b, c__, f, g;
+    static integer i__, j, k, l, m;
+    static doublereal p, r__, s;
+    static integer l1, ii, mm, lm1, mm1, nm1;
+    static doublereal rt1, rt2, eps;
+    static integer lsv;
+    static doublereal tst, eps2;
+    static integer lend, jtot;
+    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *);
+    extern logical lsame_(char *, char *);
+    static doublereal anorm;
+    extern /* Subroutine */ int zlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *), dlaev2_(doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *);
+    static integer lendm1, lendp1;
+
+    static integer iscale;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
+    static doublereal safmax;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
+    static integer lendsv;
+    static doublereal ssfmin;
+    static integer nmaxit, icompz;
+    static doublereal ssfmax;
+    extern /* Subroutine */ int zlaset_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the implicit QL or QR method.
+    The eigenvectors of a full or band complex Hermitian matrix can also
+    be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this
+    matrix to tridiagonal form.
 
     Arguments
     =========
 
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'V':  Compute eigenvalues and eigenvectors of the original
+                    Hermitian matrix.  On entry, Z must contain the
+                    unitary matrix used to reduce the original matrix
+                    to tridiagonal form.
+            = 'I':  Compute eigenvalues and eigenvectors of the
+                    tridiagonal matrix.  Z is initialized to the identity
+                    matrix.
 
     N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+            The order of the matrix.  N >= 0.
 
-    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
-            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
-            N-by-N upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading N-by-N lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
 
-            On exit, if INFO = 0, the factor U or L from the Cholesky
-            factorization A = U**H*U or A = L*L**H.
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the (n-1) subdiagonal elements of the tridiagonal
+            matrix.
+            On exit, E has been destroyed.
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+    Z       (input/output) COMPLEX*16 array, dimension (LDZ, N)
+            On entry, if  COMPZ = 'V', then Z contains the unitary
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original Hermitian matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1, and if
+            eigenvectors are desired, then  LDZ >= max(1,N).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
+            If COMPZ = 'N', then WORK is not referenced.
 
     INFO    (output) INTEGER
             = 0:  successful exit
             < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the leading minor of order i is not
-                  positive definite, and the factorization could not be
-                  completed.
+            > 0:  the algorithm has failed to find all the eigenvalues in
+                  a total of 30*N iterations; if INFO = i, then i
+                  elements of E have not converged to zero; on exit, D
+                  and E contain the elements of a symmetric tridiagonal
+                  matrix which is unitarily similar to the original
+                  matrix.
 
     =====================================================================
 
@@ -21158,23 +21502,35 @@ L40:
 */
 
     /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
 
     /* Function Body */
     *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if (icompz < 0) {
 	*info = -1;
     } else if (*n < 0) {
 	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
+    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
+	*info = -6;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("ZPOTRF", &i__1);
+	xerbla_("ZSTEQR", &i__1);
 	return 0;
     }
 
@@ -21184,2307 +21540,951 @@ L40:
 	return 0;
     }
 
-/*     Determine the block size for this environment. */
-
-    nb = ilaenv_(&c__1, "ZPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
-	    ftnlen)1);
-    if ((nb <= 1) || (nb >= *n)) {
+    if (*n == 1) {
+	if (icompz == 2) {
+	    i__1 = z_dim1 + 1;
+	    z__[i__1].r = 1., z__[i__1].i = 0.;
+	}
+	return 0;
+    }
 
-/*        Use unblocked code. */
+/*     Determine the unit roundoff and over/underflow thresholds. */
 
-	zpotf2_(uplo, n, &a[a_offset], lda, info);
-    } else {
-
-/*        Use blocked code. */
-
-	if (upper) {
-
-/*           Compute the Cholesky factorization A = U'*U. */
-
-	    i__1 = *n;
-	    i__2 = nb;
-	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+    eps = EPSILON;
+/* Computing 2nd power */
+    d__1 = eps;
+    eps2 = d__1 * d__1;
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    ssfmax = sqrt(safmax) / 3.;
+    ssfmin = sqrt(safmin) / eps2;
 
 /*
-                Update and factorize the current diagonal block and test
-                for non-positive-definiteness.
-
-   Computing MIN
+       Compute the eigenvalues and eigenvectors of the tridiagonal
+       matrix.
 */
-		i__3 = nb, i__4 = *n - j + 1;
-		jb = min(i__3,i__4);
-		i__3 = j - 1;
-		zherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1294, &
-			a[j * a_dim1 + 1], lda, &c_b1015, &a[j + j * a_dim1],
-			lda);
-		zpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
-		if (*info != 0) {
-		    goto L30;
-		}
-		if (j + jb <= *n) {
-
-/*                 Compute the current block row. */
-
-		    i__3 = *n - j - jb + 1;
-		    i__4 = j - 1;
-		    z__1.r = -1., z__1.i = -0.;
-		    zgemm_("Conjugate transpose", "No transpose", &jb, &i__3,
-			    &i__4, &z__1, &a[j * a_dim1 + 1], lda, &a[(j + jb)
-			     * a_dim1 + 1], lda, &c_b60, &a[j + (j + jb) *
-			    a_dim1], lda);
-		    i__3 = *n - j - jb + 1;
-		    ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit",
-			     &jb, &i__3, &c_b60, &a[j + j * a_dim1], lda, &a[
-			    j + (j + jb) * a_dim1], lda);
-		}
-/* L10: */
-	    }
-
-	} else {
 
-/*           Compute the Cholesky factorization A = L*L'. */
+    if (icompz == 2) {
+	zlaset_("Full", n, n, &c_b59, &c_b60, &z__[z_offset], ldz);
+    }
 
-	    i__2 = *n;
-	    i__1 = nb;
-	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+    nmaxit = *n * 30;
+    jtot = 0;
 
 /*
-                Update and factorize the current diagonal block and test
-                for non-positive-definiteness.
-
-   Computing MIN
+       Determine where the matrix splits and choose QL or QR iteration
+       for each block, according to whether top or bottom diagonal
+       element is smaller.
 */
-		i__3 = nb, i__4 = *n - j + 1;
-		jb = min(i__3,i__4);
-		i__3 = j - 1;
-		zherk_("Lower", "No transpose", &jb, &i__3, &c_b1294, &a[j +
-			a_dim1], lda, &c_b1015, &a[j + j * a_dim1], lda);
-		zpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
-		if (*info != 0) {
-		    goto L30;
-		}
-		if (j + jb <= *n) {
 
-/*                 Compute the current block column. */
+    l1 = 1;
+    nm1 = *n - 1;
 
-		    i__3 = *n - j - jb + 1;
-		    i__4 = j - 1;
-		    z__1.r = -1., z__1.i = -0.;
-		    zgemm_("No transpose", "Conjugate transpose", &i__3, &jb,
-			    &i__4, &z__1, &a[j + jb + a_dim1], lda, &a[j +
-			    a_dim1], lda, &c_b60, &a[j + jb + j * a_dim1],
-			    lda);
-		    i__3 = *n - j - jb + 1;
-		    ztrsm_("Right", "Lower", "Conjugate transpose", "Non-unit"
-			    , &i__3, &jb, &c_b60, &a[j + j * a_dim1], lda, &a[
-			    j + jb + j * a_dim1], lda);
-		}
-/* L20: */
+L10:
+    if (l1 > *n) {
+	goto L160;
+    }
+    if (l1 > 1) {
+	e[l1 - 1] = 0.;
+    }
+    if (l1 <= nm1) {
+	i__1 = nm1;
+	for (m = l1; m <= i__1; ++m) {
+	    tst = (d__1 = e[m], abs(d__1));
+	    if (tst == 0.) {
+		goto L30;
+	    }
+	    if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m
+		    + 1], abs(d__2))) * eps) {
+		e[m] = 0.;
+		goto L30;
 	    }
+/* L20: */
 	}
     }
-    goto L40;
+    m = *n;
 
 L30:
-    *info = *info + j - 1;
-
-L40:
-    return 0;
+    l = l1;
+    lsv = l;
+    lend = m;
+    lendsv = lend;
+    l1 = m + 1;
+    if (lend == l) {
+	goto L10;
+    }
 
-/*     End of ZPOTRF */
+/*     Scale submatrix in rows and columns L to LEND */
 
-} /* zpotrf_ */
+    i__1 = lend - l + 1;
+    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
+    iscale = 0;
+    if (anorm == 0.) {
+	goto L10;
+    }
+    if (anorm > ssfmax) {
+	iscale = 1;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+		info);
+    } else if (anorm < ssfmin) {
+	iscale = 2;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+		info);
+    }
 
-/* Subroutine */ int zpotri_(char *uplo, integer *n, doublecomplex *a,
-	integer *lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1;
+/*     Choose between QL and QR iteration */
 
-    /* Local variables */
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *), zlauum_(
-	    char *, integer *, doublecomplex *, integer *, integer *),
-	     ztrtri_(char *, char *, integer *, doublecomplex *, integer *,
-	    integer *);
+    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
+	lend = lsv;
+	l = lendsv;
+    }
 
+    if (lend > l) {
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
-
+          QL Iteration
 
-    Purpose
-    =======
+          Look for small subdiagonal element.
+*/
 
-    ZPOTRI computes the inverse of a complex Hermitian positive definite
-    matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
-    computed by ZPOTRF.
+L40:
+	if (l != lend) {
+	    lendm1 = lend - 1;
+	    i__1 = lendm1;
+	    for (m = l; m <= i__1; ++m) {
+/* Computing 2nd power */
+		d__2 = (d__1 = e[m], abs(d__1));
+		tst = d__2 * d__2;
+		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
+			+ 1], abs(d__2)) + safmin) {
+		    goto L60;
+		}
+/* L50: */
+	    }
+	}
 
-    Arguments
-    =========
+	m = lend;
 
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
+L60:
+	if (m < lend) {
+	    e[m] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L80;
+	}
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+/*
+          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
 
-    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
-            On entry, the triangular factor U or L from the Cholesky
-            factorization A = U**H*U or A = L*L**H, as computed by
-            ZPOTRF.
-            On exit, the upper or lower triangle of the (Hermitian)
-            inverse of A, overwriting the input factor U or L.
+	if (m == l + 1) {
+	    if (icompz > 0) {
+		dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
+		work[l] = c__;
+		work[*n - 1 + l] = s;
+		zlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
+			z__[l * z_dim1 + 1], ldz);
+	    } else {
+		dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
+	    }
+	    d__[l] = rt1;
+	    d__[l + 1] = rt2;
+	    e[l] = 0.;
+	    l += 2;
+	    if (l <= lend) {
+		goto L40;
+	    }
+	    goto L140;
+	}
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the (i,i) element of the factor U or L is
-                  zero, and the inverse could not be computed.
+/*        Form shift. */
 
-    =====================================================================
+	g = (d__[l + 1] - p) / (e[l] * 2.);
+	r__ = dlapy2_(&g, &c_b1015);
+	g = d__[m] - p + e[l] / (g + d_sign(&r__, &g));
 
+	s = 1.;
+	c__ = 1.;
+	p = 0.;
 
-       Test the input parameters.
-*/
+/*        Inner loop */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
-    /* Function Body */
-    *info = 0;
-    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*n)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("ZPOTRI", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Invert the triangular Cholesky factor U or L. */
-
-    ztrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info);
-    if (*info > 0) {
-	return 0;
-    }
-
-/*     Form inv(U)*inv(U)' or inv(L)'*inv(L). */
-
-    zlauum_(uplo, n, &a[a_offset], lda, info);
-
-    return 0;
-
-/*     End of ZPOTRI */
-
-} /* zpotri_ */
-
-/* Subroutine */ int zpotrs_(char *uplo, integer *n, integer *nrhs,
-	doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb,
-	integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
-
-    /* Local variables */
-    extern logical lsame_(char *, char *);
-    static logical upper;
-    extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *,
-	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
-	     doublecomplex *, integer *),
-	    xerbla_(char *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    ZPOTRS solves a system of linear equations A*X = B with a Hermitian
-    positive definite matrix A using the Cholesky factorization
-    A = U**H*U or A = L*L**H computed by ZPOTRF.
-
-    Arguments
-    =========
-
-    UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
-
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrix B.  NRHS >= 0.
-
-    A       (input) COMPLEX*16 array, dimension (LDA,N)
-            The triangular factor U or L from the Cholesky factorization
-            A = U**H*U or A = L*L**H, as computed by ZPOTRF.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
-
-    B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
-            On entry, the right hand side matrix B.
-            On exit, the solution matrix X.
-
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,N).
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*nrhs < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*ldb < max(1,*n)) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("ZPOTRS", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if ((*n == 0) || (*nrhs == 0)) {
-	return 0;
-    }
-
-    if (upper) {
-
-/*
-          Solve A*X = B where A = U'*U.
-
-          Solve U'*X = B, overwriting B with X.
-*/
-
-	ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, &
-		c_b60, &a[a_offset], lda, &b[b_offset], ldb);
-
-/*        Solve U*X = B, overwriting B with X. */
-
-	ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b60, &
-		a[a_offset], lda, &b[b_offset], ldb);
-    } else {
-
-/*
-          Solve A*X = B where A = L*L'.
-
-          Solve L*X = B, overwriting B with X.
-*/
-
-	ztrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b60, &
-		a[a_offset], lda, &b[b_offset], ldb);
-
-/*        Solve L'*X = B, overwriting B with X. */
-
-	ztrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, &
-		c_b60, &a[a_offset], lda, &b[b_offset], ldb);
-    }
-
-    return 0;
-
-/*     End of ZPOTRS */
-
-} /* zpotrs_ */
-
-/* Subroutine */ int zstedc_(char *compz, integer *n, doublereal *d__,
-	doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work,
-	integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork,
-	integer *liwork, integer *info)
-{
-    /* System generated locals */
-    integer z_dim1, z_offset, i__1, i__2, i__3, i__4;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double log(doublereal);
-    integer pow_ii(integer *, integer *);
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, j, k, m;
-    static doublereal p;
-    static integer ii, ll, end, lgn;
-    static doublereal eps, tiny;
-    extern logical lsame_(char *, char *);
-    static integer lwmin, start;
-    extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *,
-	    doublecomplex *, integer *), zlaed0_(integer *, integer *,
-	    doublereal *, doublereal *, doublecomplex *, integer *,
-	    doublecomplex *, integer *, doublereal *, integer *, integer *);
-
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dstedc_(char *, integer *,
-	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
-	     integer *, integer *, integer *, integer *), dlaset_(
-	    char *, integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
-	     integer *), zlacrm_(integer *, integer *, doublecomplex *,
-	    integer *, doublereal *, integer *, doublecomplex *, integer *,
-	    doublereal *);
-    static integer liwmin, icompz;
-    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *,
-	    integer *, doublecomplex *, integer *);
-    static doublereal orgnrm;
-    static integer lrwmin;
-    static logical lquery;
-    static integer smlsiz;
-    extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *,
-	    doublereal *, doublecomplex *, integer *, doublereal *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    ZSTEDC computes all eigenvalues and, optionally, eigenvectors of a
-    symmetric tridiagonal matrix using the divide and conquer method.
-    The eigenvectors of a full or band complex Hermitian matrix can also
-    be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this
-    matrix to tridiagonal form.
-
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.  See DLAED3 for details.
-
-    Arguments
-    =========
-
-    COMPZ   (input) CHARACTER*1
-            = 'N':  Compute eigenvalues only.
-            = 'I':  Compute eigenvectors of tridiagonal matrix also.
-            = 'V':  Compute eigenvectors of original Hermitian matrix
-                    also.  On entry, Z contains the unitary matrix used
-                    to reduce the original matrix to tridiagonal form.
-
-    N       (input) INTEGER
-            The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the diagonal elements of the tridiagonal matrix.
-            On exit, if INFO = 0, the eigenvalues in ascending order.
-
-    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
-            On entry, the subdiagonal elements of the tridiagonal matrix.
-            On exit, E has been destroyed.
-
-    Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
-            On entry, if COMPZ = 'V', then Z contains the unitary
-            matrix used in the reduction to tridiagonal form.
-            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
-            orthonormal eigenvectors of the original Hermitian matrix,
-            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
-            of the symmetric tridiagonal matrix.
-            If  COMPZ = 'N', then Z is not referenced.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.  LDZ >= 1.
-            If eigenvectors are desired, then LDZ >= max(1,N).
-
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1.
-            If COMPZ = 'V' and N > 1, LWORK must be at least N*N.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    RWORK   (workspace/output) DOUBLE PRECISION array,
-                                           dimension (LRWORK)
-            On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
-
-    LRWORK  (input) INTEGER
-            The dimension of the array RWORK.
-            If COMPZ = 'N' or N <= 1, LRWORK must be at least 1.
-            If COMPZ = 'V' and N > 1, LRWORK must be at least
-                           1 + 3*N + 2*N*lg N + 3*N**2 ,
-                           where lg( N ) = smallest integer k such
-                           that 2**k >= N.
-            If COMPZ = 'I' and N > 1, LRWORK must be at least
-                           1 + 4*N + 2*N**2 .
-
-            If LRWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the RWORK array,
-            returns this value as the first entry of the RWORK array, and
-            no error message related to LRWORK is issued by XERBLA.
-
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
-            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
-
-    LIWORK  (input) INTEGER
-            The dimension of the array IWORK.
-            If COMPZ = 'N' or N <= 1, LIWORK must be at least 1.
-            If COMPZ = 'V' or N > 1,  LIWORK must be at least
-                                      6 + 6*N + 5*N*lg N.
-            If COMPZ = 'I' or N > 1,  LIWORK must be at least
-                                      3 + 5*N .
-
-            If LIWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the IWORK array,
-            returns this value as the first entry of the IWORK array, and
-            no error message related to LIWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The algorithm failed to compute an eigenvalue while
-                  working on the submatrix lying in rows and columns
-                  INFO/(N+1) through mod(INFO,N+1).
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --work;
-    --rwork;
-    --iwork;
-
-    /* Function Body */
-    *info = 0;
-    lquery = ((*lwork == -1) || (*lrwork == -1)) || (*liwork == -1);
-
-    if (lsame_(compz, "N")) {
-	icompz = 0;
-    } else if (lsame_(compz, "V")) {
-	icompz = 1;
-    } else if (lsame_(compz, "I")) {
-	icompz = 2;
-    } else {
-	icompz = -1;
-    }
-    if ((*n <= 1) || (icompz <= 0)) {
-	lwmin = 1;
-	liwmin = 1;
-	lrwmin = 1;
-    } else {
-	lgn = (integer) (log((doublereal) (*n)) / log(2.));
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (icompz == 1) {
-	    lwmin = *n * *n;
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lrwmin = *n * 3 + 1 + ((*n) << (1)) * lgn + i__1 * i__1 * 3;
-	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
-	} else if (icompz == 2) {
-	    lwmin = 1;
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lrwmin = ((*n) << (2)) + 1 + ((i__1 * i__1) << (1));
-	    liwmin = *n * 5 + 3;
-	}
-    }
-    if (icompz < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
-	*info = -6;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*lrwork < lrwmin && ! lquery) {
-	*info = -10;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
-	work[1].r = (doublereal) lwmin, work[1].i = 0.;
-	rwork[1] = (doublereal) lrwmin;
-	iwork[1] = liwmin;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("ZSTEDC", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-    if (*n == 1) {
-	if (icompz != 0) {
-	    i__1 = z_dim1 + 1;
-	    z__[i__1].r = 1., z__[i__1].i = 0.;
-	}
-	return 0;
-    }
-
-    smlsiz = ilaenv_(&c__9, "ZSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-
-/*
-       If the following conditional clause is removed, then the routine
-       will use the Divide and Conquer routine to compute only the
-       eigenvalues, which requires (3N + 3N**2) real workspace and
-       (2 + 5N + 2N lg(N)) integer workspace.
-       Since on many architectures DSTERF is much faster than any other
-       algorithm for finding eigenvalues only, it is used here
-       as the default.
-
-       If COMPZ = 'N', use DSTERF to compute the eigenvalues.
-*/
-
-    if (icompz == 0) {
-	dsterf_(n, &d__[1], &e[1], info);
-	return 0;
-    }
-
-/*
-       If N is smaller than the minimum divide size (SMLSIZ+1), then
-       solve the problem with another solver.
-*/
-
-    if (*n <= smlsiz) {
-	if (icompz == 0) {
-	    dsterf_(n, &d__[1], &e[1], info);
-	    return 0;
-	} else if (icompz == 2) {
-	    zsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
-		    info);
-	    return 0;
-	} else {
-	    zsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
-		    info);
-	    return 0;
-	}
-    }
-
-/*     If COMPZ = 'I', we simply call DSTEDC instead. */
-
-    if (icompz == 2) {
-	dlaset_("Full", n, n, &c_b324, &c_b1015, &rwork[1], n);
-	ll = *n * *n + 1;
-	i__1 = *lrwork - ll + 1;
-	dstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, &
-		iwork[1], liwork, info);
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *n;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * z_dim1;
-		i__4 = (j - 1) * *n + i__;
-		z__[i__3].r = rwork[i__4], z__[i__3].i = 0.;
-/* L10: */
-	    }
-/* L20: */
-	}
-	return 0;
-    }
-
-/*
-       From now on, only option left to be handled is COMPZ = 'V',
-       i.e. ICOMPZ = 1.
-
-       Scale.
-*/
-
-    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.) {
-	return 0;
-    }
-
-    eps = EPSILON;
-
-    start = 1;
-
-/*     while ( START <= N ) */
-
-L30:
-    if (start <= *n) {
-
-/*
-       Let END be the position of the next subdiagonal entry such that
-       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
-       matrix identified by the elements between START and END
-       constitutes an independent sub-problem.
-*/
-
-	end = start;
-L40:
-	if (end < *n) {
-	    tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 =
-		    d__[end + 1], abs(d__2)));
-	    if ((d__1 = e[end], abs(d__1)) > tiny) {
-		++end;
-		goto L40;
-	    }
-	}
-
-/*        (Sub) Problem determined.  Compute its size and solve it. */
-
-	m = end - start + 1;
-	if (m > smlsiz) {
-	    *info = smlsiz;
-
-/*           Scale. */
-
-	    orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
-	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &m, &c__1, &d__[
-		    start], &m, info);
-	    i__1 = m - 1;
-	    i__2 = m - 1;
-	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &i__1, &c__1, &e[
-		    start], &i__2, info);
-
-	    zlaed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1],
-		    ldz, &work[1], n, &rwork[1], &iwork[1], info);
-	    if (*info > 0) {
-		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
-			+ 1) + start - 1;
-		return 0;
-	    }
-
-/*           Scale back. */
-
-	    dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, &m, &c__1, &d__[
-		    start], &m, info);
-
-	} else {
-	    dsteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m *
-		     m + 1], info);
-	    zlacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, &
-		    work[1], n, &rwork[m * m + 1]);
-	    zlacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz);
-	    if (*info > 0) {
-		*info = start * (*n + 1) + end;
-		return 0;
-	    }
-	}
-
-	start = end + 1;
-	goto L30;
-    }
-
-/*
-       endwhile
-
-       If the problem split any number of times, then the eigenvalues
-       will not be properly ordered.  Here we permute the eigenvalues
-       (and the associated eigenvectors) into ascending order.
-*/
-
-    if (m != *n) {
-
-/*        Use Selection Sort to minimize swaps of eigenvectors */
-
-	i__1 = *n;
-	for (ii = 2; ii <= i__1; ++ii) {
-	    i__ = ii - 1;
-	    k = i__;
-	    p = d__[i__];
-	    i__2 = *n;
-	    for (j = ii; j <= i__2; ++j) {
-		if (d__[j] < p) {
-		    k = j;
-		    p = d__[j];
-		}
-/* L50: */
-	    }
-	    if (k != i__) {
-		d__[k] = d__[i__];
-		d__[i__] = p;
-		zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
-			 &c__1);
-	    }
-/* L60: */
-	}
-    }
-
-    work[1].r = (doublereal) lwmin, work[1].i = 0.;
-    rwork[1] = (doublereal) lrwmin;
-    iwork[1] = liwmin;
-
-    return 0;
-
-/*     End of ZSTEDC */
-
-} /* zstedc_ */
-
-/* Subroutine */ int zsteqr_(char *compz, integer *n, doublereal *d__,
-	doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work,
-	integer *info)
-{
-    /* System generated locals */
-    integer z_dim1, z_offset, i__1, i__2;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static doublereal b, c__, f, g;
-    static integer i__, j, k, l, m;
-    static doublereal p, r__, s;
-    static integer l1, ii, mm, lm1, mm1, nm1;
-    static doublereal rt1, rt2, eps;
-    static integer lsv;
-    static doublereal tst, eps2;
-    static integer lend, jtot;
-    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
-	    *, doublereal *, doublereal *);
-    extern logical lsame_(char *, char *);
-    static doublereal anorm;
-    extern /* Subroutine */ int zlasr_(char *, char *, char *, integer *,
-	    integer *, doublereal *, doublereal *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *,
-	    integer *, doublecomplex *, integer *), dlaev2_(doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *);
-    static integer lendm1, lendp1;
-
-    static integer iscale;
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *);
-    static doublereal safmin;
-    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *);
-    static doublereal safmax;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
-	    integer *);
-    static integer lendsv;
-    static doublereal ssfmin;
-    static integer nmaxit, icompz;
-    static doublereal ssfmax;
-    extern /* Subroutine */ int zlaset_(char *, integer *, integer *,
-	    doublecomplex *, doublecomplex *, doublecomplex *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a
-    symmetric tridiagonal matrix using the implicit QL or QR method.
-    The eigenvectors of a full or band complex Hermitian matrix can also
-    be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this
-    matrix to tridiagonal form.
-
-    Arguments
-    =========
-
-    COMPZ   (input) CHARACTER*1
-            = 'N':  Compute eigenvalues only.
-            = 'V':  Compute eigenvalues and eigenvectors of the original
-                    Hermitian matrix.  On entry, Z must contain the
-                    unitary matrix used to reduce the original matrix
-                    to tridiagonal form.
-            = 'I':  Compute eigenvalues and eigenvectors of the
-                    tridiagonal matrix.  Z is initialized to the identity
-                    matrix.
-
-    N       (input) INTEGER
-            The order of the matrix.  N >= 0.
-
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the diagonal elements of the tridiagonal matrix.
-            On exit, if INFO = 0, the eigenvalues in ascending order.
-
-    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
-            On entry, the (n-1) subdiagonal elements of the tridiagonal
-            matrix.
-            On exit, E has been destroyed.
-
-    Z       (input/output) COMPLEX*16 array, dimension (LDZ, N)
-            On entry, if  COMPZ = 'V', then Z contains the unitary
-            matrix used in the reduction to tridiagonal form.
-            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
-            orthonormal eigenvectors of the original Hermitian matrix,
-            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
-            of the symmetric tridiagonal matrix.
-            If COMPZ = 'N', then Z is not referenced.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.  LDZ >= 1, and if
-            eigenvectors are desired, then  LDZ >= max(1,N).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
-            If COMPZ = 'N', then WORK is not referenced.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  the algorithm has failed to find all the eigenvalues in
-                  a total of 30*N iterations; if INFO = i, then i
-                  elements of E have not converged to zero; on exit, D
-                  and E contain the elements of a symmetric tridiagonal
-                  matrix which is unitarily similar to the original
-                  matrix.
-
-    =====================================================================
-
-
-       Test the input parameters.
-*/
-
-    /* Parameter adjustments */
-    --d__;
-    --e;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-
-    if (lsame_(compz, "N")) {
-	icompz = 0;
-    } else if (lsame_(compz, "V")) {
-	icompz = 1;
-    } else if (lsame_(compz, "I")) {
-	icompz = 2;
-    } else {
-	icompz = -1;
-    }
-    if (icompz < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
-	*info = -6;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("ZSTEQR", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-    if (*n == 1) {
-	if (icompz == 2) {
-	    i__1 = z_dim1 + 1;
-	    z__[i__1].r = 1., z__[i__1].i = 0.;
-	}
-	return 0;
-    }
-
-/*     Determine the unit roundoff and over/underflow thresholds. */
-
-    eps = EPSILON;
-/* Computing 2nd power */
-    d__1 = eps;
-    eps2 = d__1 * d__1;
-    safmin = SAFEMINIMUM;
-    safmax = 1. / safmin;
-    ssfmax = sqrt(safmax) / 3.;
-    ssfmin = sqrt(safmin) / eps2;
-
-/*
-       Compute the eigenvalues and eigenvectors of the tridiagonal
-       matrix.
-*/
-
-    if (icompz == 2) {
-	zlaset_("Full", n, n, &c_b59, &c_b60, &z__[z_offset], ldz);
-    }
-
-    nmaxit = *n * 30;
-    jtot = 0;
-
-/*
-       Determine where the matrix splits and choose QL or QR iteration
-       for each block, according to whether top or bottom diagonal
-       element is smaller.
-*/
-
-    l1 = 1;
-    nm1 = *n - 1;
-
-L10:
-    if (l1 > *n) {
-	goto L160;
-    }
-    if (l1 > 1) {
-	e[l1 - 1] = 0.;
-    }
-    if (l1 <= nm1) {
-	i__1 = nm1;
-	for (m = l1; m <= i__1; ++m) {
-	    tst = (d__1 = e[m], abs(d__1));
-	    if (tst == 0.) {
-		goto L30;
-	    }
-	    if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m
-		    + 1], abs(d__2))) * eps) {
-		e[m] = 0.;
-		goto L30;
-	    }
-/* L20: */
-	}
-    }
-    m = *n;
-
-L30:
-    l = l1;
-    lsv = l;
-    lend = m;
-    lendsv = lend;
-    l1 = m + 1;
-    if (lend == l) {
-	goto L10;
-    }
-
-/*     Scale submatrix in rows and columns L to LEND */
-
-    i__1 = lend - l + 1;
-    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
-    iscale = 0;
-    if (anorm == 0.) {
-	goto L10;
-    }
-    if (anorm > ssfmax) {
-	iscale = 1;
-	i__1 = lend - l + 1;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
-		info);
-	i__1 = lend - l;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
-		info);
-    } else if (anorm < ssfmin) {
-	iscale = 2;
-	i__1 = lend - l + 1;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
-		info);
-	i__1 = lend - l;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
-		info);
-    }
-
-/*     Choose between QL and QR iteration */
-
-    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
-	lend = lsv;
-	l = lendsv;
-    }
-
-    if (lend > l) {
-
-/*
-          QL Iteration
-
-          Look for small subdiagonal element.
-*/
-
-L40:
-	if (l != lend) {
-	    lendm1 = lend - 1;
-	    i__1 = lendm1;
-	    for (m = l; m <= i__1; ++m) {
-/* Computing 2nd power */
-		d__2 = (d__1 = e[m], abs(d__1));
-		tst = d__2 * d__2;
-		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
-			+ 1], abs(d__2)) + safmin) {
-		    goto L60;
-		}
-/* L50: */
-	    }
-	}
-
-	m = lend;
-
-L60:
-	if (m < lend) {
-	    e[m] = 0.;
-	}
-	p = d__[l];
-	if (m == l) {
-	    goto L80;
-	}
-
-/*
-          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
-          to compute its eigensystem.
-*/
-
-	if (m == l + 1) {
-	    if (icompz > 0) {
-		dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
-		work[l] = c__;
-		work[*n - 1 + l] = s;
-		zlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
-			z__[l * z_dim1 + 1], ldz);
-	    } else {
-		dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
-	    }
-	    d__[l] = rt1;
-	    d__[l + 1] = rt2;
-	    e[l] = 0.;
-	    l += 2;
-	    if (l <= lend) {
-		goto L40;
-	    }
-	    goto L140;
-	}
-
-	if (jtot == nmaxit) {
-	    goto L140;
-	}
-	++jtot;
-
-/*        Form shift. */
-
-	g = (d__[l + 1] - p) / (e[l] * 2.);
-	r__ = dlapy2_(&g, &c_b1015);
-	g = d__[m] - p + e[l] / (g + d_sign(&r__, &g));
-
-	s = 1.;
-	c__ = 1.;
-	p = 0.;
-
-/*        Inner loop */
-
-	mm1 = m - 1;
-	i__1 = l;
-	for (i__ = mm1; i__ >= i__1; --i__) {
-	    f = s * e[i__];
-	    b = c__ * e[i__];
-	    dlartg_(&g, &f, &c__, &s, &r__);
-	    if (i__ != m - 1) {
-		e[i__ + 1] = r__;
-	    }
-	    g = d__[i__ + 1] - p;
-	    r__ = (d__[i__] - g) * s + c__ * 2. * b;
-	    p = s * r__;
-	    d__[i__ + 1] = g + p;
-	    g = c__ * r__ - b;
-
-/*           If eigenvectors are desired, then save rotations. */
-
-	    if (icompz > 0) {
-		work[i__] = c__;
-		work[*n - 1 + i__] = -s;
-	    }
-
-/* L70: */
-	}
-
-/*        If eigenvectors are desired, then apply saved rotations. */
-
-	if (icompz > 0) {
-	    mm = m - l + 1;
-	    zlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
-		    * z_dim1 + 1], ldz);
-	}
-
-	d__[l] -= p;
-	e[l] = g;
-	goto L40;
-
-/*        Eigenvalue found. */
-
-L80:
-	d__[l] = p;
-
-	++l;
-	if (l <= lend) {
-	    goto L40;
-	}
-	goto L140;
-
-    } else {
-
-/*
-          QR Iteration
-
-          Look for small superdiagonal element.
-*/
-
-L90:
-	if (l != lend) {
-	    lendp1 = lend + 1;
-	    i__1 = lendp1;
-	    for (m = l; m >= i__1; --m) {
-/* Computing 2nd power */
-		d__2 = (d__1 = e[m - 1], abs(d__1));
-		tst = d__2 * d__2;
-		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
-			- 1], abs(d__2)) + safmin) {
-		    goto L110;
-		}
-/* L100: */
-	    }
-	}
-
-	m = lend;
-
-L110:
-	if (m > lend) {
-	    e[m - 1] = 0.;
-	}
-	p = d__[l];
-	if (m == l) {
-	    goto L130;
-	}
-
-/*
-          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
-          to compute its eigensystem.
-*/
-
-	if (m == l - 1) {
-	    if (icompz > 0) {
-		dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
-			;
-		work[m] = c__;
-		work[*n - 1 + m] = s;
-		zlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
-			z__[(l - 1) * z_dim1 + 1], ldz);
-	    } else {
-		dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
-	    }
-	    d__[l - 1] = rt1;
-	    d__[l] = rt2;
-	    e[l - 1] = 0.;
-	    l += -2;
-	    if (l >= lend) {
-		goto L90;
-	    }
-	    goto L140;
-	}
-
-	if (jtot == nmaxit) {
-	    goto L140;
-	}
-	++jtot;
-
-/*        Form shift. */
-
-	g = (d__[l - 1] - p) / (e[l - 1] * 2.);
-	r__ = dlapy2_(&g, &c_b1015);
-	g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g));
-
-	s = 1.;
-	c__ = 1.;
-	p = 0.;
-
-/*        Inner loop */
-
-	lm1 = l - 1;
-	i__1 = lm1;
-	for (i__ = m; i__ <= i__1; ++i__) {
-	    f = s * e[i__];
-	    b = c__ * e[i__];
-	    dlartg_(&g, &f, &c__, &s, &r__);
-	    if (i__ != m) {
-		e[i__ - 1] = r__;
-	    }
-	    g = d__[i__] - p;
-	    r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b;
-	    p = s * r__;
-	    d__[i__] = g + p;
-	    g = c__ * r__ - b;
-
-/*           If eigenvectors are desired, then save rotations. */
-
-	    if (icompz > 0) {
-		work[i__] = c__;
-		work[*n - 1 + i__] = s;
-	    }
-
-/* L120: */
-	}
-
-/*        If eigenvectors are desired, then apply saved rotations. */
-
-	if (icompz > 0) {
-	    mm = l - m + 1;
-	    zlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
-		    * z_dim1 + 1], ldz);
-	}
-
-	d__[l] -= p;
-	e[lm1] = g;
-	goto L90;
-
-/*        Eigenvalue found. */
-
-L130:
-	d__[l] = p;
-
-	--l;
-	if (l >= lend) {
-	    goto L90;
-	}
-	goto L140;
-
-    }
-
-/*     Undo scaling if necessary */
-
-L140:
-    if (iscale == 1) {
-	i__1 = lendsv - lsv + 1;
-	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
-		n, info);
-	i__1 = lendsv - lsv;
-	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
-		info);
-    } else if (iscale == 2) {
-	i__1 = lendsv - lsv + 1;
-	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
-		n, info);
-	i__1 = lendsv - lsv;
-	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
-		info);
-    }
-
-/*
-       Check for no convergence to an eigenvalue after a total
-       of N*MAXIT iterations.
-*/
-
-    if (jtot == nmaxit) {
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (e[i__] != 0.) {
-		++(*info);
-	    }
-/* L150: */
-	}
-	return 0;
-    }
-    goto L10;
-
-/*     Order eigenvalues and eigenvectors. */
-
-L160:
-    if (icompz == 0) {
-
-/*        Use Quick Sort */
-
-	dlasrt_("I", n, &d__[1], info);
-
-    } else {
-
-/*        Use Selection Sort to minimize swaps of eigenvectors */
-
-	i__1 = *n;
-	for (ii = 2; ii <= i__1; ++ii) {
-	    i__ = ii - 1;
-	    k = i__;
-	    p = d__[i__];
-	    i__2 = *n;
-	    for (j = ii; j <= i__2; ++j) {
-		if (d__[j] < p) {
-		    k = j;
-		    p = d__[j];
-		}
-/* L170: */
-	    }
-	    if (k != i__) {
-		d__[k] = d__[i__];
-		d__[i__] = p;
-		zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
-			 &c__1);
-	    }
-/* L180: */
-	}
-    }
-    return 0;
-
-/*     End of ZSTEQR */
-
-} /* zsteqr_ */
-
-/* Subroutine */ int ztrevc_(char *side, char *howmny, logical *select,
-	integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl,
-	integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer
-	*m, doublecomplex *work, doublereal *rwork, integer *info)
-{
-    /* System generated locals */
-    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
-	    i__2, i__3, i__4, i__5;
-    doublereal d__1, d__2, d__3;
-    doublecomplex z__1, z__2;
-
-    /* Builtin functions */
-    double d_imag(doublecomplex *);
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, k, ii, ki, is;
-    static doublereal ulp;
-    static logical allv;
-    static doublereal unfl, ovfl, smin;
-    static logical over;
-    static doublereal scale;
-    extern logical lsame_(char *, char *);
-    static doublereal remax;
-    static logical leftv, bothv;
-    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
-	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
-	    integer *, doublecomplex *, doublecomplex *, integer *);
-    static logical somev;
-    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
-	    doublecomplex *, integer *), dlabad_(doublereal *, doublereal *);
-
-    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
-	    integer *, doublereal *, doublecomplex *, integer *);
-    extern integer izamax_(integer *, doublecomplex *, integer *);
-    static logical rightv;
-    extern doublereal dzasum_(integer *, doublecomplex *, integer *);
-    static doublereal smlnum;
-    extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *,
-	    integer *, doublecomplex *, integer *, doublecomplex *,
-	    doublereal *, doublereal *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    ZTREVC computes some or all of the right and/or left eigenvectors of
-    a complex upper triangular matrix T.
-
-    The right eigenvector x and the left eigenvector y of T corresponding
-    to an eigenvalue w are defined by:
-
-                 T*x = w*x,     y'*T = w*y'
-
-    where y' denotes the conjugate transpose of the vector y.
-
-    If all eigenvectors are requested, the routine may either return the
-    matrices X and/or Y of right or left eigenvectors of T, or the
-    products Q*X and/or Q*Y, where Q is an input unitary
-    matrix. If T was obtained from the Schur factorization of an
-    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
-    right or left eigenvectors of A.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'R':  compute right eigenvectors only;
-            = 'L':  compute left eigenvectors only;
-            = 'B':  compute both right and left eigenvectors.
-
-    HOWMNY  (input) CHARACTER*1
-            = 'A':  compute all right and/or left eigenvectors;
-            = 'B':  compute all right and/or left eigenvectors,
-                    and backtransform them using the input matrices
-                    supplied in VR and/or VL;
-            = 'S':  compute selected right and/or left eigenvectors,
-                    specified by the logical array SELECT.
-
-    SELECT  (input) LOGICAL array, dimension (N)
-            If HOWMNY = 'S', SELECT specifies the eigenvectors to be
-            computed.
-            If HOWMNY = 'A' or 'B', SELECT is not referenced.
-            To select the eigenvector corresponding to the j-th
-            eigenvalue, SELECT(j) must be set to .TRUE..
-
-    N       (input) INTEGER
-            The order of the matrix T. N >= 0.
-
-    T       (input/output) COMPLEX*16 array, dimension (LDT,N)
-            The upper triangular matrix T.  T is modified, but restored
-            on exit.
-
-    LDT     (input) INTEGER
-            The leading dimension of the array T. LDT >= max(1,N).
-
-    VL      (input/output) COMPLEX*16 array, dimension (LDVL,MM)
-            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
-            contain an N-by-N matrix Q (usually the unitary matrix Q of
-            Schur vectors returned by ZHSEQR).
-            On exit, if SIDE = 'L' or 'B', VL contains:
-            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
-                             VL is lower triangular. The i-th column
-                             VL(i) of VL is the eigenvector corresponding
-                             to T(i,i).
-            if HOWMNY = 'B', the matrix Q*Y;
-            if HOWMNY = 'S', the left eigenvectors of T specified by
-                             SELECT, stored consecutively in the columns
-                             of VL, in the same order as their
-                             eigenvalues.
-            If SIDE = 'R', VL is not referenced.
-
-    LDVL    (input) INTEGER
-            The leading dimension of the array VL.  LDVL >= max(1,N) if
-            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
-
-    VR      (input/output) COMPLEX*16 array, dimension (LDVR,MM)
-            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
-            contain an N-by-N matrix Q (usually the unitary matrix Q of
-            Schur vectors returned by ZHSEQR).
-            On exit, if SIDE = 'R' or 'B', VR contains:
-            if HOWMNY = 'A', the matrix X of right eigenvectors of T;
-                             VR is upper triangular. The i-th column
-                             VR(i) of VR is the eigenvector corresponding
-                             to T(i,i).
-            if HOWMNY = 'B', the matrix Q*X;
-            if HOWMNY = 'S', the right eigenvectors of T specified by
-                             SELECT, stored consecutively in the columns
-                             of VR, in the same order as their
-                             eigenvalues.
-            If SIDE = 'L', VR is not referenced.
-
-    LDVR    (input) INTEGER
-            The leading dimension of the array VR.  LDVR >= max(1,N) if
-             SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
-
-    MM      (input) INTEGER
-            The number of columns in the arrays VL and/or VR. MM >= M.
-
-    M       (output) INTEGER
-            The number of columns in the arrays VL and/or VR actually
-            used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
-            is set to N.  Each selected eigenvector occupies one
-            column.
-
-    WORK    (workspace) COMPLEX*16 array, dimension (2*N)
-
-    RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-
-    Further Details
-    ===============
-
-    The algorithm used in this program is basically backward (forward)
-    substitution, with scaling to make the code robust against
-    possible overflow.
-
-    Each eigenvector is normalized so that the element of largest
-    magnitude has magnitude 1; here the magnitude of a complex number
-    (x,y) is taken to be |x| + |y|.
-
-    =====================================================================
-
-
-       Decode and test the input parameters
-*/
-
-    /* Parameter adjustments */
-    --select;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-    vl_dim1 = *ldvl;
-    vl_offset = 1 + vl_dim1;
-    vl -= vl_offset;
-    vr_dim1 = *ldvr;
-    vr_offset = 1 + vr_dim1;
-    vr -= vr_offset;
-    --work;
-    --rwork;
-
-    /* Function Body */
-    bothv = lsame_(side, "B");
-    rightv = (lsame_(side, "R")) || (bothv);
-    leftv = (lsame_(side, "L")) || (bothv);
-
-    allv = lsame_(howmny, "A");
-    over = lsame_(howmny, "B");
-    somev = lsame_(howmny, "S");
+	mm1 = m - 1;
+	i__1 = l;
+	for (i__ = mm1; i__ >= i__1; --i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    dlartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m - 1) {
+		e[i__ + 1] = r__;
+	    }
+	    g = d__[i__ + 1] - p;
+	    r__ = (d__[i__] - g) * s + c__ * 2. * b;
+	    p = s * r__;
+	    d__[i__ + 1] = g + p;
+	    g = c__ * r__ - b;
 
-/*
-       Set M to the number of columns required to store the selected
-       eigenvectors.
-*/
+/*           If eigenvectors are desired, then save rotations. */
 
-    if (somev) {
-	*m = 0;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (select[j]) {
-		++(*m);
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = -s;
 	    }
-/* L10: */
+
+/* L70: */
 	}
-    } else {
-	*m = *n;
-    }
 
-    *info = 0;
-    if (! rightv && ! leftv) {
-	*info = -1;
-    } else if (! allv && ! over && ! somev) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -4;
-    } else if (*ldt < max(1,*n)) {
-	*info = -6;
-    } else if ((*ldvl < 1) || (leftv && *ldvl < *n)) {
-	*info = -8;
-    } else if ((*ldvr < 1) || (rightv && *ldvr < *n)) {
-	*info = -10;
-    } else if (*mm < *m) {
-	*info = -11;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("ZTREVC", &i__1);
-	return 0;
-    }
+/*        If eigenvectors are desired, then apply saved rotations. */
 
-/*     Quick return if possible. */
+	if (icompz > 0) {
+	    mm = m - l + 1;
+	    zlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
+		    * z_dim1 + 1], ldz);
+	}
 
-    if (*n == 0) {
-	return 0;
-    }
+	d__[l] -= p;
+	e[l] = g;
+	goto L40;
 
-/*     Set the constants to control overflow. */
+/*        Eigenvalue found. */
 
-    unfl = SAFEMINIMUM;
-    ovfl = 1. / unfl;
-    dlabad_(&unfl, &ovfl);
-    ulp = PRECISION;
-    smlnum = unfl * (*n / ulp);
+L80:
+	d__[l] = p;
 
-/*     Store the diagonal elements of T in working array WORK. */
+	++l;
+	if (l <= lend) {
+	    goto L40;
+	}
+	goto L140;
 
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__ + *n;
-	i__3 = i__ + i__ * t_dim1;
-	work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i;
-/* L20: */
-    }
+    } else {
 
 /*
-       Compute 1-norm of each column of strictly upper triangular
-       part of T to control overflow in triangular solver.
+          QR Iteration
+
+          Look for small superdiagonal element.
 */
 
-    rwork[1] = 0.;
-    i__1 = *n;
-    for (j = 2; j <= i__1; ++j) {
-	i__2 = j - 1;
-	rwork[j] = dzasum_(&i__2, &t[j * t_dim1 + 1], &c__1);
-/* L30: */
-    }
+L90:
+	if (l != lend) {
+	    lendp1 = lend + 1;
+	    i__1 = lendp1;
+	    for (m = l; m >= i__1; --m) {
+/* Computing 2nd power */
+		d__2 = (d__1 = e[m - 1], abs(d__1));
+		tst = d__2 * d__2;
+		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
+			- 1], abs(d__2)) + safmin) {
+		    goto L110;
+		}
+/* L100: */
+	    }
+	}
 
-    if (rightv) {
+	m = lend;
 
-/*        Compute right eigenvectors. */
+L110:
+	if (m > lend) {
+	    e[m - 1] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L130;
+	}
 
-	is = *m;
-	for (ki = *n; ki >= 1; --ki) {
+/*
+          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
 
-	    if (somev) {
-		if (! select[ki]) {
-		    goto L80;
-		}
+	if (m == l - 1) {
+	    if (icompz > 0) {
+		dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
+			;
+		work[m] = c__;
+		work[*n - 1 + m] = s;
+		zlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
+			z__[(l - 1) * z_dim1 + 1], ldz);
+	    } else {
+		dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
 	    }
-/* Computing MAX */
-	    i__1 = ki + ki * t_dim1;
-	    d__3 = ulp * ((d__1 = t[i__1].r, abs(d__1)) + (d__2 = d_imag(&t[
-		    ki + ki * t_dim1]), abs(d__2)));
-	    smin = max(d__3,smlnum);
+	    d__[l - 1] = rt1;
+	    d__[l] = rt2;
+	    e[l - 1] = 0.;
+	    l += -2;
+	    if (l >= lend) {
+		goto L90;
+	    }
+	    goto L140;
+	}
 
-	    work[1].r = 1., work[1].i = 0.;
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
 
-/*           Form right-hand side. */
+/*        Form shift. */
 
-	    i__1 = ki - 1;
-	    for (k = 1; k <= i__1; ++k) {
-		i__2 = k;
-		i__3 = k + ki * t_dim1;
-		z__1.r = -t[i__3].r, z__1.i = -t[i__3].i;
-		work[i__2].r = z__1.r, work[i__2].i = z__1.i;
-/* L40: */
-	    }
+	g = (d__[l - 1] - p) / (e[l - 1] * 2.);
+	r__ = dlapy2_(&g, &c_b1015);
+	g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g));
 
-/*
-             Solve the triangular system:
-                (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK.
-*/
+	s = 1.;
+	c__ = 1.;
+	p = 0.;
 
-	    i__1 = ki - 1;
-	    for (k = 1; k <= i__1; ++k) {
-		i__2 = k + k * t_dim1;
-		i__3 = k + k * t_dim1;
-		i__4 = ki + ki * t_dim1;
-		z__1.r = t[i__3].r - t[i__4].r, z__1.i = t[i__3].i - t[i__4]
-			.i;
-		t[i__2].r = z__1.r, t[i__2].i = z__1.i;
-		i__2 = k + k * t_dim1;
-		if ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[k + k *
-			t_dim1]), abs(d__2)) < smin) {
-		    i__3 = k + k * t_dim1;
-		    t[i__3].r = smin, t[i__3].i = 0.;
-		}
-/* L50: */
-	    }
+/*        Inner loop */
 
-	    if (ki > 1) {
-		i__1 = ki - 1;
-		zlatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[
-			t_offset], ldt, &work[1], &scale, &rwork[1], info);
-		i__1 = ki;
-		work[i__1].r = scale, work[i__1].i = 0.;
+	lm1 = l - 1;
+	i__1 = lm1;
+	for (i__ = m; i__ <= i__1; ++i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    dlartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m) {
+		e[i__ - 1] = r__;
 	    }
+	    g = d__[i__] - p;
+	    r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b;
+	    p = s * r__;
+	    d__[i__] = g + p;
+	    g = c__ * r__ - b;
 
-/*           Copy the vector x or Q*x to VR and normalize. */
+/*           If eigenvectors are desired, then save rotations. */
 
-	    if (! over) {
-		zcopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1);
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = s;
+	    }
 
-		ii = izamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
-		i__1 = ii + is * vr_dim1;
-		remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag(
-			&vr[ii + is * vr_dim1]), abs(d__2)));
-		zdscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+/* L120: */
+	}
 
-		i__1 = *n;
-		for (k = ki + 1; k <= i__1; ++k) {
-		    i__2 = k + is * vr_dim1;
-		    vr[i__2].r = 0., vr[i__2].i = 0.;
-/* L60: */
-		}
-	    } else {
-		if (ki > 1) {
-		    i__1 = ki - 1;
-		    z__1.r = scale, z__1.i = 0.;
-		    zgemv_("N", n, &i__1, &c_b60, &vr[vr_offset], ldvr, &work[
-			    1], &c__1, &z__1, &vr[ki * vr_dim1 + 1], &c__1);
-		}
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = l - m + 1;
+	    zlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
+		    * z_dim1 + 1], ldz);
+	}
 
-		ii = izamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
-		i__1 = ii + ki * vr_dim1;
-		remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag(
-			&vr[ii + ki * vr_dim1]), abs(d__2)));
-		zdscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
-	    }
+	d__[l] -= p;
+	e[lm1] = g;
+	goto L90;
 
-/*           Set back the original diagonal elements of T. */
+/*        Eigenvalue found. */
 
-	    i__1 = ki - 1;
-	    for (k = 1; k <= i__1; ++k) {
-		i__2 = k + k * t_dim1;
-		i__3 = k + *n;
-		t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i;
-/* L70: */
-	    }
+L130:
+	d__[l] = p;
 
-	    --is;
-L80:
-	    ;
+	--l;
+	if (l >= lend) {
+	    goto L90;
 	}
+	goto L140;
+
     }
 
-    if (leftv) {
+/*     Undo scaling if necessary */
 
-/*        Compute left eigenvectors. */
+L140:
+    if (iscale == 1) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    } else if (iscale == 2) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    }
 
-	is = 1;
-	i__1 = *n;
-	for (ki = 1; ki <= i__1; ++ki) {
+/*
+       Check for no convergence to an eigenvalue after a total
+       of N*MAXIT iterations.
+*/
 
-	    if (somev) {
-		if (! select[ki]) {
-		    goto L130;
-		}
+    if (jtot == nmaxit) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (e[i__] != 0.) {
+		++(*info);
 	    }
-/* Computing MAX */
-	    i__2 = ki + ki * t_dim1;
-	    d__3 = ulp * ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[
-		    ki + ki * t_dim1]), abs(d__2)));
-	    smin = max(d__3,smlnum);
+/* L150: */
+	}
+	return 0;
+    }
+    goto L10;
 
-	    i__2 = *n;
-	    work[i__2].r = 1., work[i__2].i = 0.;
+/*     Order eigenvalues and eigenvectors. */
 
-/*           Form right-hand side. */
+L160:
+    if (icompz == 0) {
 
-	    i__2 = *n;
-	    for (k = ki + 1; k <= i__2; ++k) {
-		i__3 = k;
-		d_cnjg(&z__2, &t[ki + k * t_dim1]);
-		z__1.r = -z__2.r, z__1.i = -z__2.i;
-		work[i__3].r = z__1.r, work[i__3].i = z__1.i;
-/* L90: */
-	    }
+/*        Use Quick Sort */
 
-/*
-             Solve the triangular system:
-                (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK.
-*/
+	dlasrt_("I", n, &d__[1], info);
+
+    } else {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
 
+	i__1 = *n;
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
 	    i__2 = *n;
-	    for (k = ki + 1; k <= i__2; ++k) {
-		i__3 = k + k * t_dim1;
-		i__4 = k + k * t_dim1;
-		i__5 = ki + ki * t_dim1;
-		z__1.r = t[i__4].r - t[i__5].r, z__1.i = t[i__4].i - t[i__5]
-			.i;
-		t[i__3].r = z__1.r, t[i__3].i = z__1.i;
-		i__3 = k + k * t_dim1;
-		if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t[k + k *
-			t_dim1]), abs(d__2)) < smin) {
-		    i__4 = k + k * t_dim1;
-		    t[i__4].r = smin, t[i__4].i = 0.;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
 		}
-/* L100: */
+/* L170: */
 	    }
-
-	    if (ki < *n) {
-		i__2 = *n - ki;
-		zlatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", &
-			i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki +
-			1], &scale, &rwork[1], info);
-		i__2 = ki;
-		work[i__2].r = scale, work[i__2].i = 0.;
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
 	    }
+/* L180: */
+	}
+    }
+    return 0;
 
-/*           Copy the vector x or Q*x to VL and normalize. */
+/*     End of ZSTEQR */
 
-	    if (! over) {
-		i__2 = *n - ki + 1;
-		zcopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1)
-			;
+} /* zsteqr_ */
 
-		i__2 = *n - ki + 1;
-		ii = izamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1;
-		i__2 = ii + is * vl_dim1;
-		remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag(
-			&vl[ii + is * vl_dim1]), abs(d__2)));
-		i__2 = *n - ki + 1;
-		zdscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+/* Subroutine */ int ztrevc_(char *side, char *howmny, logical *select,
+	integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl,
+	integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer
+	*m, doublecomplex *work, doublereal *rwork, integer *info)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3, i__4, i__5;
+    doublereal d__1, d__2, d__3;
+    doublecomplex z__1, z__2;
 
-		i__2 = ki - 1;
-		for (k = 1; k <= i__2; ++k) {
-		    i__3 = k + is * vl_dim1;
-		    vl[i__3].r = 0., vl[i__3].i = 0.;
-/* L110: */
-		}
-	    } else {
-		if (ki < *n) {
-		    i__2 = *n - ki;
-		    z__1.r = scale, z__1.i = 0.;
-		    zgemv_("N", n, &i__2, &c_b60, &vl[(ki + 1) * vl_dim1 + 1],
-			     ldvl, &work[ki + 1], &c__1, &z__1, &vl[ki *
-			    vl_dim1 + 1], &c__1);
-		}
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+    void d_cnjg(doublecomplex *, doublecomplex *);
 
-		ii = izamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
-		i__2 = ii + ki * vl_dim1;
-		remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag(
-			&vl[ii + ki * vl_dim1]), abs(d__2)));
-		zdscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
-	    }
+    /* Local variables */
+    static integer i__, j, k, ii, ki, is;
+    static doublereal ulp;
+    static logical allv;
+    static doublereal unfl, ovfl, smin;
+    static logical over;
+    static doublereal scale;
+    extern logical lsame_(char *, char *);
+    static doublereal remax;
+    static logical leftv, bothv;
+    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *);
+    static logical somev;
+    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), dlabad_(doublereal *, doublereal *);
 
-/*           Set back the original diagonal elements of T. */
+    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
+	    integer *, doublereal *, doublecomplex *, integer *);
+    extern integer izamax_(integer *, doublecomplex *, integer *);
+    static logical rightv;
+    extern doublereal dzasum_(integer *, doublecomplex *, integer *);
+    static doublereal smlnum;
+    extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublereal *, doublereal *, integer *);
 
-	    i__2 = *n;
-	    for (k = ki + 1; k <= i__2; ++k) {
-		i__3 = k + k * t_dim1;
-		i__4 = k + *n;
-		t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i;
-/* L120: */
-	    }
 
-	    ++is;
-L130:
-	    ;
-	}
-    }
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-    return 0;
 
-/*     End of ZTREVC */
+    Purpose
+    =======
 
-} /* ztrevc_ */
+    ZTREVC computes some or all of the right and/or left eigenvectors of
+    a complex upper triangular matrix T.
+
+    The right eigenvector x and the left eigenvector y of T corresponding
+    to an eigenvalue w are defined by:
+
+                 T*x = w*x,     y'*T = w*y'
+
+    where y' denotes the conjugate transpose of the vector y.
+
+    If all eigenvectors are requested, the routine may either return the
+    matrices X and/or Y of right or left eigenvectors of T, or the
+    products Q*X and/or Q*Y, where Q is an input unitary
+    matrix. If T was obtained from the Schur factorization of an
+    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
+    right or left eigenvectors of A.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  compute right eigenvectors only;
+            = 'L':  compute left eigenvectors only;
+            = 'B':  compute both right and left eigenvectors.
 
-/* Subroutine */ int ztrti2_(char *uplo, char *diag, integer *n,
-	doublecomplex *a, integer *lda, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    doublecomplex z__1;
+    HOWMNY  (input) CHARACTER*1
+            = 'A':  compute all right and/or left eigenvectors;
+            = 'B':  compute all right and/or left eigenvectors,
+                    and backtransform them using the input matrices
+                    supplied in VR and/or VL;
+            = 'S':  compute selected right and/or left eigenvectors,
+                    specified by the logical array SELECT.
 
-    /* Builtin functions */
-    void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
+    SELECT  (input) LOGICAL array, dimension (N)
+            If HOWMNY = 'S', SELECT specifies the eigenvectors to be
+            computed.
+            If HOWMNY = 'A' or 'B', SELECT is not referenced.
+            To select the eigenvector corresponding to the j-th
+            eigenvalue, SELECT(j) must be set to .TRUE..
 
-    /* Local variables */
-    static integer j;
-    static doublecomplex ajj;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
-	    doublecomplex *, integer *);
-    static logical upper;
-    extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *,
-	    doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *);
-    static logical nounit;
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
 
+    T       (input/output) COMPLEX*16 array, dimension (LDT,N)
+            The upper triangular matrix T.  T is modified, but restored
+            on exit.
 
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
 
+    VL      (input/output) COMPLEX*16 array, dimension (LDVL,MM)
+            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+            contain an N-by-N matrix Q (usually the unitary matrix Q of
+            Schur vectors returned by ZHSEQR).
+            On exit, if SIDE = 'L' or 'B', VL contains:
+            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
+                             VL is lower triangular. The i-th column
+                             VL(i) of VL is the eigenvector corresponding
+                             to T(i,i).
+            if HOWMNY = 'B', the matrix Q*Y;
+            if HOWMNY = 'S', the left eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VL, in the same order as their
+                             eigenvalues.
+            If SIDE = 'R', VL is not referenced.
 
-    Purpose
-    =======
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= max(1,N) if
+            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
 
-    ZTRTI2 computes the inverse of a complex upper or lower triangular
-    matrix.
+    VR      (input/output) COMPLEX*16 array, dimension (LDVR,MM)
+            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+            contain an N-by-N matrix Q (usually the unitary matrix Q of
+            Schur vectors returned by ZHSEQR).
+            On exit, if SIDE = 'R' or 'B', VR contains:
+            if HOWMNY = 'A', the matrix X of right eigenvectors of T;
+                             VR is upper triangular. The i-th column
+                             VR(i) of VR is the eigenvector corresponding
+                             to T(i,i).
+            if HOWMNY = 'B', the matrix Q*X;
+            if HOWMNY = 'S', the right eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VR, in the same order as their
+                             eigenvalues.
+            If SIDE = 'L', VR is not referenced.
 
-    This is the Level 2 BLAS version of the algorithm.
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= max(1,N) if
+             SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
 
-    Arguments
-    =========
+    MM      (input) INTEGER
+            The number of columns in the arrays VL and/or VR. MM >= M.
 
-    UPLO    (input) CHARACTER*1
-            Specifies whether the matrix A is upper or lower triangular.
-            = 'U':  Upper triangular
-            = 'L':  Lower triangular
+    M       (output) INTEGER
+            The number of columns in the arrays VL and/or VR actually
+            used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
+            is set to N.  Each selected eigenvector occupies one
+            column.
 
-    DIAG    (input) CHARACTER*1
-            Specifies whether or not the matrix A is unit triangular.
-            = 'N':  Non-unit triangular
-            = 'U':  Unit triangular
+    WORK    (workspace) COMPLEX*16 array, dimension (2*N)
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+    RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
 
-    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
-            On entry, the triangular matrix A.  If UPLO = 'U', the
-            leading n by n upper triangular part of the array A contains
-            the upper triangular matrix, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading n by n lower triangular part of the array A contains
-            the lower triangular matrix, and the strictly upper
-            triangular part of A is not referenced.  If DIAG = 'U', the
-            diagonal elements of A are also not referenced and are
-            assumed to be 1.
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
 
-            On exit, the (triangular) inverse of the original matrix, in
-            the same storage format.
+    Further Details
+    ===============
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+    The algorithm used in this program is basically backward (forward)
+    substitution, with scaling to make the the code robust against
+    possible overflow.
 
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
+    Each eigenvector is normalized so that the element of largest
+    magnitude has magnitude 1; here the magnitude of a complex number
+    (x,y) is taken to be |x| + |y|.
 
     =====================================================================
 
 
-       Test the input parameters.
+       Decode and test the input parameters
 */
 
     /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
+    --select;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+    --rwork;
 
     /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    nounit = lsame_(diag, "N");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (! nounit && ! lsame_(diag, "U")) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("ZTRTI2", &i__1);
-	return 0;
-    }
+    bothv = lsame_(side, "B");
+    rightv = lsame_(side, "R") || bothv;
+    leftv = lsame_(side, "L") || bothv;
 
-    if (upper) {
+    allv = lsame_(howmny, "A");
+    over = lsame_(howmny, "B");
+    somev = lsame_(howmny, "S");
 
-/*        Compute inverse of upper triangular matrix. */
+/*
+       Set M to the number of columns required to store the selected
+       eigenvectors.
+*/
 
+    if (somev) {
+	*m = 0;
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
-	    if (nounit) {
-		i__2 = j + j * a_dim1;
-		z_div(&z__1, &c_b60, &a[j + j * a_dim1]);
-		a[i__2].r = z__1.r, a[i__2].i = z__1.i;
-		i__2 = j + j * a_dim1;
-		z__1.r = -a[i__2].r, z__1.i = -a[i__2].i;
-		ajj.r = z__1.r, ajj.i = z__1.i;
-	    } else {
-		z__1.r = -1., z__1.i = -0.;
-		ajj.r = z__1.r, ajj.i = z__1.i;
+	    if (select[j]) {
+		++(*m);
 	    }
-
-/*           Compute elements 1:j-1 of j-th column. */
-
-	    i__2 = j - 1;
-	    ztrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, &
-		    a[j * a_dim1 + 1], &c__1);
-	    i__2 = j - 1;
-	    zscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1);
 /* L10: */
 	}
     } else {
-
-/*        Compute inverse of lower triangular matrix. */
-
-	for (j = *n; j >= 1; --j) {
-	    if (nounit) {
-		i__1 = j + j * a_dim1;
-		z_div(&z__1, &c_b60, &a[j + j * a_dim1]);
-		a[i__1].r = z__1.r, a[i__1].i = z__1.i;
-		i__1 = j + j * a_dim1;
-		z__1.r = -a[i__1].r, z__1.i = -a[i__1].i;
-		ajj.r = z__1.r, ajj.i = z__1.i;
-	    } else {
-		z__1.r = -1., z__1.i = -0.;
-		ajj.r = z__1.r, ajj.i = z__1.i;
-	    }
-	    if (j < *n) {
-
-/*              Compute elements j+1:n of j-th column. */
-
-		i__1 = *n - j;
-		ztrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j +
-			1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1);
-		i__1 = *n - j;
-		zscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1);
-	    }
-/* L20: */
-	}
+	*m = *n;
     }
 
-    return 0;
+    *info = 0;
+    if (! rightv && ! leftv) {
+	*info = -1;
+    } else if (! allv && ! over && ! somev) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ldt < max(1,*n)) {
+	*info = -6;
+    } else if (*ldvl < 1 || leftv && *ldvl < *n) {
+	*info = -8;
+    } else if (*ldvr < 1 || rightv && *ldvr < *n) {
+	*info = -10;
+    } else if (*mm < *m) {
+	*info = -11;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZTREVC", &i__1);
+	return 0;
+    }
 
-/*     End of ZTRTI2 */
+/*     Quick return if possible. */
 
-} /* ztrti2_ */
+    if (*n == 0) {
+	return 0;
+    }
 
-/* Subroutine */ int ztrtri_(char *uplo, char *diag, integer *n,
-	doublecomplex *a, integer *lda, integer *info)
-{
-    /* System generated locals */
-    address a__1[2];
-    integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5;
-    doublecomplex z__1;
-    char ch__1[2];
+/*     Set the constants to control overflow. */
 
-    /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
+    smlnum = unfl * (*n / ulp);
 
-    /* Local variables */
-    static integer j, jb, nb, nn;
-    extern logical lsame_(char *, char *);
-    static logical upper;
-    extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *,
-	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
-	     doublecomplex *, integer *),
-	    ztrsm_(char *, char *, char *, char *, integer *, integer *,
-	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
-	    integer *), ztrti2_(char *, char *
-	    , integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static logical nounit;
+/*     Store the diagonal elements of T in working array WORK. */
 
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__ + *n;
+	i__3 = i__ + i__ * t_dim1;
+	work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i;
+/* L20: */
+    }
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
+       Compute 1-norm of each column of strictly upper triangular
+       part of T to control overflow in triangular solver.
+*/
 
-    ZTRTRI computes the inverse of a complex upper or lower triangular
-    matrix A.
+    rwork[1] = 0.;
+    i__1 = *n;
+    for (j = 2; j <= i__1; ++j) {
+	i__2 = j - 1;
+	rwork[j] = dzasum_(&i__2, &t[j * t_dim1 + 1], &c__1);
+/* L30: */
+    }
 
-    This is the Level 3 BLAS version of the algorithm.
+    if (rightv) {
 
-    Arguments
-    =========
+/*        Compute right eigenvectors. */
 
-    UPLO    (input) CHARACTER*1
-            = 'U':  A is upper triangular;
-            = 'L':  A is lower triangular.
+	is = *m;
+	for (ki = *n; ki >= 1; --ki) {
 
-    DIAG    (input) CHARACTER*1
-            = 'N':  A is non-unit triangular;
-            = 'U':  A is unit triangular.
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L80;
+		}
+	    }
+/* Computing MAX */
+	    i__1 = ki + ki * t_dim1;
+	    d__3 = ulp * ((d__1 = t[i__1].r, abs(d__1)) + (d__2 = d_imag(&t[
+		    ki + ki * t_dim1]), abs(d__2)));
+	    smin = max(d__3,smlnum);
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+	    work[1].r = 1., work[1].i = 0.;
 
-    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
-            On entry, the triangular matrix A.  If UPLO = 'U', the
-            leading N-by-N upper triangular part of the array A contains
-            the upper triangular matrix, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading N-by-N lower triangular part of the array A contains
-            the lower triangular matrix, and the strictly upper
-            triangular part of A is not referenced.  If DIAG = 'U', the
-            diagonal elements of A are also not referenced and are
-            assumed to be 1.
-            On exit, the (triangular) inverse of the original matrix, in
-            the same storage format.
+/*           Form right-hand side. */
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k;
+		i__3 = k + ki * t_dim1;
+		z__1.r = -t[i__3].r, z__1.i = -t[i__3].i;
+		work[i__2].r = z__1.r, work[i__2].i = z__1.i;
+/* L40: */
+	    }
 
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
-            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
-                 matrix is singular and its inverse can not be computed.
+/*
+             Solve the triangular system:
+                (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK.
+*/
 
-    =====================================================================
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k + k * t_dim1;
+		i__3 = k + k * t_dim1;
+		i__4 = ki + ki * t_dim1;
+		z__1.r = t[i__3].r - t[i__4].r, z__1.i = t[i__3].i - t[i__4]
+			.i;
+		t[i__2].r = z__1.r, t[i__2].i = z__1.i;
+		i__2 = k + k * t_dim1;
+		if ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[k + k *
+			t_dim1]), abs(d__2)) < smin) {
+		    i__3 = k + k * t_dim1;
+		    t[i__3].r = smin, t[i__3].i = 0.;
+		}
+/* L50: */
+	    }
 
+	    if (ki > 1) {
+		i__1 = ki - 1;
+		zlatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[
+			t_offset], ldt, &work[1], &scale, &rwork[1], info);
+		i__1 = ki;
+		work[i__1].r = scale, work[i__1].i = 0.;
+	    }
 
-       Test the input parameters.
-*/
+/*           Copy the vector x or Q*x to VR and normalize. */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
+	    if (! over) {
+		zcopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1);
 
-    /* Function Body */
-    *info = 0;
-    upper = lsame_(uplo, "U");
-    nounit = lsame_(diag, "N");
-    if (! upper && ! lsame_(uplo, "L")) {
-	*info = -1;
-    } else if (! nounit && ! lsame_(diag, "U")) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("ZTRTRI", &i__1);
-	return 0;
-    }
+		ii = izamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
+		i__1 = ii + is * vr_dim1;
+		remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag(
+			&vr[ii + is * vr_dim1]), abs(d__2)));
+		zdscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
 
-/*     Quick return if possible */
+		i__1 = *n;
+		for (k = ki + 1; k <= i__1; ++k) {
+		    i__2 = k + is * vr_dim1;
+		    vr[i__2].r = 0., vr[i__2].i = 0.;
+/* L60: */
+		}
+	    } else {
+		if (ki > 1) {
+		    i__1 = ki - 1;
+		    z__1.r = scale, z__1.i = 0.;
+		    zgemv_("N", n, &i__1, &c_b60, &vr[vr_offset], ldvr, &work[
+			    1], &c__1, &z__1, &vr[ki * vr_dim1 + 1], &c__1);
+		}
 
-    if (*n == 0) {
-	return 0;
-    }
+		ii = izamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
+		i__1 = ii + ki * vr_dim1;
+		remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag(
+			&vr[ii + ki * vr_dim1]), abs(d__2)));
+		zdscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+	    }
 
-/*     Check for singularity if non-unit. */
+/*           Set back the original diagonal elements of T. */
 
-    if (nounit) {
-	i__1 = *n;
-	for (*info = 1; *info <= i__1; ++(*info)) {
-	    i__2 = *info + *info * a_dim1;
-	    if (a[i__2].r == 0. && a[i__2].i == 0.) {
-		return 0;
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k + k * t_dim1;
+		i__3 = k + *n;
+		t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i;
+/* L70: */
 	    }
-/* L10: */
+
+	    --is;
+L80:
+	    ;
 	}
-	*info = 0;
     }
 
-/*
-       Determine the block size for this environment.
-
-   Writing concatenation
-*/
-    i__3[0] = 1, a__1[0] = uplo;
-    i__3[1] = 1, a__1[1] = diag;
-    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-    nb = ilaenv_(&c__1, "ZTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-    if ((nb <= 1) || (nb >= *n)) {
-
-/*        Use unblocked code */
+    if (leftv) {
 
-	ztrti2_(uplo, diag, n, &a[a_offset], lda, info);
-    } else {
+/*        Compute left eigenvectors. */
 
-/*        Use blocked code */
+	is = 1;
+	i__1 = *n;
+	for (ki = 1; ki <= i__1; ++ki) {
 
-	if (upper) {
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L130;
+		}
+	    }
+/* Computing MAX */
+	    i__2 = ki + ki * t_dim1;
+	    d__3 = ulp * ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[
+		    ki + ki * t_dim1]), abs(d__2)));
+	    smin = max(d__3,smlnum);
 
-/*           Compute inverse of upper triangular matrix */
+	    i__2 = *n;
+	    work[i__2].r = 1., work[i__2].i = 0.;
 
-	    i__1 = *n;
-	    i__2 = nb;
-	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
-/* Computing MIN */
-		i__4 = nb, i__5 = *n - j + 1;
-		jb = min(i__4,i__5);
+/*           Form right-hand side. */
 
-/*              Compute rows 1:j-1 of current block column */
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k;
+		d_cnjg(&z__2, &t[ki + k * t_dim1]);
+		z__1.r = -z__2.r, z__1.i = -z__2.i;
+		work[i__3].r = z__1.r, work[i__3].i = z__1.i;
+/* L90: */
+	    }
 
-		i__4 = j - 1;
-		ztrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
-			c_b60, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
-		i__4 = j - 1;
-		z__1.r = -1., z__1.i = -0.;
-		ztrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
-			z__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1],
-			lda);
+/*
+             Solve the triangular system:
+                (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK.
+*/
 
-/*              Compute inverse of current diagonal block */
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k + k * t_dim1;
+		i__4 = k + k * t_dim1;
+		i__5 = ki + ki * t_dim1;
+		z__1.r = t[i__4].r - t[i__5].r, z__1.i = t[i__4].i - t[i__5]
+			.i;
+		t[i__3].r = z__1.r, t[i__3].i = z__1.i;
+		i__3 = k + k * t_dim1;
+		if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t[k + k *
+			t_dim1]), abs(d__2)) < smin) {
+		    i__4 = k + k * t_dim1;
+		    t[i__4].r = smin, t[i__4].i = 0.;
+		}
+/* L100: */
+	    }
 
-		ztrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
-/* L20: */
+	    if (ki < *n) {
+		i__2 = *n - ki;
+		zlatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", &
+			i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki +
+			1], &scale, &rwork[1], info);
+		i__2 = ki;
+		work[i__2].r = scale, work[i__2].i = 0.;
 	    }
-	} else {
 
-/*           Compute inverse of lower triangular matrix */
+/*           Copy the vector x or Q*x to VL and normalize. */
 
-	    nn = (*n - 1) / nb * nb + 1;
-	    i__2 = -nb;
-	    for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) {
-/* Computing MIN */
-		i__1 = nb, i__4 = *n - j + 1;
-		jb = min(i__1,i__4);
-		if (j + jb <= *n) {
+	    if (! over) {
+		i__2 = *n - ki + 1;
+		zcopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1)
+			;
 
-/*                 Compute rows j+jb:n of current block column */
+		i__2 = *n - ki + 1;
+		ii = izamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1;
+		i__2 = ii + is * vl_dim1;
+		remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag(
+			&vl[ii + is * vl_dim1]), abs(d__2)));
+		i__2 = *n - ki + 1;
+		zdscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
 
-		    i__1 = *n - j - jb + 1;
-		    ztrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
-			    &c_b60, &a[j + jb + (j + jb) * a_dim1], lda, &a[j
-			    + jb + j * a_dim1], lda);
-		    i__1 = *n - j - jb + 1;
-		    z__1.r = -1., z__1.i = -0.;
-		    ztrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
-			     &z__1, &a[j + j * a_dim1], lda, &a[j + jb + j *
-			    a_dim1], lda);
+		i__2 = ki - 1;
+		for (k = 1; k <= i__2; ++k) {
+		    i__3 = k + is * vl_dim1;
+		    vl[i__3].r = 0., vl[i__3].i = 0.;
+/* L110: */
+		}
+	    } else {
+		if (ki < *n) {
+		    i__2 = *n - ki;
+		    z__1.r = scale, z__1.i = 0.;
+		    zgemv_("N", n, &i__2, &c_b60, &vl[(ki + 1) * vl_dim1 + 1],
+			     ldvl, &work[ki + 1], &c__1, &z__1, &vl[ki *
+			    vl_dim1 + 1], &c__1);
 		}
 
-/*              Compute inverse of current diagonal block */
+		ii = izamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
+		i__2 = ii + ki * vl_dim1;
+		remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag(
+			&vl[ii + ki * vl_dim1]), abs(d__2)));
+		zdscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+	    }
 
-		ztrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
-/* L30: */
+/*           Set back the original diagonal elements of T. */
+
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k + k * t_dim1;
+		i__4 = k + *n;
+		t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i;
+/* L120: */
 	    }
+
+	    ++is;
+L130:
+	    ;
 	}
     }
 
     return 0;
 
-/*     End of ZTRTRI */
+/*     End of ZTREVC */
 
-} /* ztrtri_ */
+} /* ztrevc_ */
 
 /* Subroutine */ int zung2r_(integer *m, integer *n, integer *k,
 	doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
@@ -23570,9 +22570,9 @@ L130:
     *info = 0;
     if (*m < 0) {
 	*info = -1;
-    } else if ((*n < 0) || (*n > *m)) {
+    } else if (*n < 0 || *n > *m) {
 	*info = -2;
-    } else if ((*k < 0) || (*k > *n)) {
+    } else if (*k < 0 || *k > *n) {
 	*info = -3;
     } else if (*lda < max(1,*m)) {
 	*info = -5;
@@ -23776,8 +22776,8 @@ L130:
 	*info = -1;
     } else if (*m < 0) {
 	*info = -2;
-    } else if (((*n < 0) || (wantq && ((*n > *m) || (*n < min(*m,*k))))) || (!
-	     wantq && ((*m > *n) || (*m < min(*n,*k))))) {
+    } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && (
+	    *m > *n || *m < min(*n,*k))) {
 	*info = -3;
     } else if (*k < 0) {
 	*info = -4;
@@ -23809,7 +22809,7 @@ L130:
 
 /*     Quick return if possible */
 
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	work[1].r = 1., work[1].i = 0.;
 	return 0;
     }
@@ -23865,8 +22865,8 @@ L130:
 		i__1 = *m - 1;
 		i__2 = *m - 1;
 		i__3 = *m - 1;
-		zungqr_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
-			tau[1], &work[1], lwork, &iinfo);
+		zungqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[
+			1], &work[1], lwork, &iinfo);
 	    }
 	}
     } else {
@@ -23920,8 +22920,8 @@ L130:
 		i__1 = *n - 1;
 		i__2 = *n - 1;
 		i__3 = *n - 1;
-		zunglq_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
-			tau[1], &work[1], lwork, &iinfo);
+		zunglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[
+			1], &work[1], lwork, &iinfo);
 	    }
 	}
     }
@@ -24028,9 +23028,9 @@ L130:
     lquery = *lwork == -1;
     if (*n < 0) {
 	*info = -1;
-    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
 	*info = -2;
-    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
 	*info = -3;
     } else if (*lda < max(1,*n)) {
 	*info = -5;
@@ -24216,7 +23216,7 @@ L130:
 	*info = -1;
     } else if (*n < *m) {
 	*info = -2;
-    } else if ((*k < 0) || (*k > *m)) {
+    } else if (*k < 0 || *k > *m) {
 	*info = -3;
     } else if (*lda < max(1,*m)) {
 	*info = -5;
@@ -24407,7 +23407,7 @@ L130:
 	*info = -1;
     } else if (*n < *m) {
 	*info = -2;
-    } else if ((*k < 0) || (*k > *m)) {
+    } else if (*k < 0 || *k > *m) {
 	*info = -3;
     } else if (*lda < max(1,*m)) {
 	*info = -5;
@@ -24670,9 +23670,9 @@ L130:
     lquery = *lwork == -1;
     if (*m < 0) {
 	*info = -1;
-    } else if ((*n < 0) || (*n > *m)) {
+    } else if (*n < 0 || *n > *m) {
 	*info = -2;
-    } else if ((*k < 0) || (*k > *n)) {
+    } else if (*k < 0 || *k > *n) {
 	*info = -3;
     } else if (*lda < max(1,*m)) {
 	*info = -5;
@@ -24966,7 +23966,7 @@ L130:
 	*info = -3;
     } else if (*n < 0) {
 	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
+    } else if (*k < 0 || *k > nq) {
 	*info = -5;
     } else if (*lda < max(1,nq)) {
 	*info = -7;
@@ -24981,11 +23981,11 @@ L130:
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+    if (*m == 0 || *n == 0 || *k == 0) {
 	return 0;
     }
 
-    if ((left && notran) || (! left && ! notran)) {
+    if (left && notran || ! left && ! notran) {
 	i1 = 1;
 	i2 = *k;
 	i3 = 1;
@@ -25181,7 +24181,7 @@ L130:
 	*info = -3;
     } else if (*n < 0) {
 	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
+    } else if (*k < 0 || *k > nq) {
 	*info = -5;
     } else if (*lda < max(1,nq)) {
 	*info = -7;
@@ -25196,11 +24196,11 @@ L130:
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+    if (*m == 0 || *n == 0 || *k == 0) {
 	return 0;
     }
 
-    if ((left && ! notran) || (! left && notran)) {
+    if (left && ! notran || ! left && notran) {
 	i1 = 1;
 	i2 = *k;
 	i3 = 1;
@@ -25450,8 +24450,7 @@ L130:
     } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = 1, i__2 = min(nq,*k);
-	if ((applyq && *lda < max(1,nq)) || (! applyq && *lda < max(i__1,i__2)
-		)) {
+	if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) {
 	    *info = -8;
 	} else if (*ldc < max(1,*m)) {
 	    *info = -11;
@@ -25516,7 +24515,7 @@ L130:
 /*     Quick return if possible */
 
     work[1].r = 1., work[1].i = 0.;
-    if ((*m == 0) || (*n == 0)) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -25580,9 +24579,9 @@ L130:
 		i2 = 2;
 	    }
 	    i__1 = nq - 1;
-	    zunmlq_(side, transt, &mi, &ni, &i__1, &a[((a_dim1) << (1)) + 1],
-		    lda, &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1],
-		    lwork, &iinfo);
+	    zunmlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda,
+		     &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &
+		    iinfo);
 	}
     }
     work[1].r = (doublereal) lwkopt, work[1].i = 0.;
@@ -25732,7 +24731,7 @@ L130:
 	*info = -3;
     } else if (*n < 0) {
 	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
+    } else if (*k < 0 || *k > nq) {
 	*info = -5;
     } else if (*lda < max(1,*k)) {
 	*info = -7;
@@ -25747,11 +24746,11 @@ L130:
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+    if (*m == 0 || *n == 0 || *k == 0) {
 	return 0;
     }
 
-    if ((left && notran) || (! left && ! notran)) {
+    if (left && notran || ! left && ! notran) {
 	i1 = 1;
 	i2 = *k;
 	i3 = 1;
@@ -25987,7 +24986,7 @@ L130:
 	*info = -3;
     } else if (*n < 0) {
 	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
+    } else if (*k < 0 || *k > nq) {
 	*info = -5;
     } else if (*lda < max(1,*k)) {
 	*info = -7;
@@ -26026,7 +25025,7 @@ L130:
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+    if (*m == 0 || *n == 0 || *k == 0) {
 	work[1].r = 1., work[1].i = 0.;
 	return 0;
     }
@@ -26052,7 +25051,7 @@ L130:
 	iws = nw;
     }
 
-    if ((nb < nbmin) || (nb >= *k)) {
+    if (nb < nbmin || nb >= *k) {
 
 /*        Use unblocked code */
 
@@ -26062,7 +25061,7 @@ L130:
 
 /*        Use blocked code */
 
-	if ((left && notran) || (! left && ! notran)) {
+	if (left && notran || ! left && ! notran) {
 	    i1 = 1;
 	    i2 = *k;
 	    i3 = nb;
@@ -26297,7 +25296,7 @@ L130:
 	*info = -3;
     } else if (*n < 0) {
 	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
+    } else if (*k < 0 || *k > nq) {
 	*info = -5;
     } else if (*lda < max(1,nq)) {
 	*info = -7;
@@ -26336,7 +25335,7 @@ L130:
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+    if (*m == 0 || *n == 0 || *k == 0) {
 	work[1].r = 1., work[1].i = 0.;
 	return 0;
     }
@@ -26362,7 +25361,7 @@ L130:
 	iws = nw;
     }
 
-    if ((nb < nbmin) || (nb >= *k)) {
+    if (nb < nbmin || nb >= *k) {
 
 /*        Use unblocked code */
 
@@ -26372,7 +25371,7 @@ L130:
 
 /*        Use blocked code */
 
-	if ((left && notran) || (! left && ! notran)) {
+	if (left && notran || ! left && ! notran) {
 	    i1 = 1;
 	    i2 = *k;
 	    i3 = nb;
@@ -26597,7 +25596,7 @@ L130:
 	*info = -3;
     } else if (*n < 0) {
 	*info = -4;
-    } else if ((*k < 0) || (*k > nq)) {
+    } else if (*k < 0 || *k > nq) {
 	*info = -5;
     } else if (*lda < max(1,nq)) {
 	*info = -7;
@@ -26636,7 +25635,7 @@ L130:
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+    if (*m == 0 || *n == 0 || *k == 0) {
 	work[1].r = 1., work[1].i = 0.;
 	return 0;
     }
@@ -26662,7 +25661,7 @@ L130:
 	iws = nw;
     }
 
-    if ((nb < nbmin) || (nb >= *k)) {
+    if (nb < nbmin || nb >= *k) {
 
 /*        Use unblocked code */
 
@@ -26672,7 +25671,7 @@ L130:
 
 /*        Use blocked code */
 
-	if ((left && ! notran) || (! left && notran)) {
+	if (left && ! notran || ! left && notran) {
 	    i1 = 1;
 	    i2 = *k;
 	    i3 = nb;
@@ -26962,7 +25961,7 @@ L130:
 
 /*     Quick return if possible */
 
-    if (((*m == 0) || (*n == 0)) || (nq == 1)) {
+    if (*m == 0 || *n == 0 || nq == 1) {
 	work[1].r = 1., work[1].i = 0.;
 	return 0;
     }
@@ -26980,8 +25979,8 @@ L130:
 /*        Q was determined by a call to ZHETRD with UPLO = 'U' */
 
 	i__2 = nq - 1;
-	zunmql_(side, trans, &mi, &ni, &i__2, &a[((a_dim1) << (1)) + 1], lda,
-		&tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
+	zunmql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, &
+		tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
     } else {
 
 /*        Q was determined by a call to ZHETRD with UPLO = 'L' */
@@ -27003,3 +26002,4 @@ L130:
 /*     End of ZUNMTR */
 
 } /* zunmtr_ */
+
-- 
cgit v1.2.1


From b9731962a8d767abbe9d0c746e8ba734c01c866a Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Tue, 13 Dec 2016 21:01:32 +0000
Subject: BUG: Do not repeat xerbla_, which we deliberately replace

This is also defined in python_xerbla.c, where we redefine a python-compatible version
---
 numpy/linalg/lapack_lite/blas_lite.c      | 71 +++----------------------------
 numpy/linalg/lapack_lite/wrapped_routines |  1 +
 2 files changed, 8 insertions(+), 64 deletions(-)

diff --git a/numpy/linalg/lapack_lite/blas_lite.c b/numpy/linalg/lapack_lite/blas_lite.c
index 87322236c..98735a6ba 100644
--- a/numpy/linalg/lapack_lite/blas_lite.c
+++ b/numpy/linalg/lapack_lite/blas_lite.c
@@ -20,8 +20,7 @@ extern doublereal dlapy2_(doublereal *x, doublereal *y);
 
 /* Table of constant values */
 
-static integer c__1 = 1;
-static doublecomplex c_b359 = {1.,0.};
+static doublecomplex c_b353 = {1.,0.};
 
 /* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx,
 	integer *incx, doublereal *dy, integer *incy)
@@ -4307,62 +4306,6 @@ logical lsame_(char *ca, char *cb)
     return ret_val;
 } /* lsame_ */
 
-/* Subroutine */ int xerbla_(char *srname, integer *info)
-{
-    /* Format strings */
-    static char fmt_9999[] = "(\002 ** On entry to \002,a6,\002 parameter nu"
-	    "mber \002,i2,\002 had \002,\002an illegal value\002)";
-
-    /* Builtin functions */
-    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);
-    /* Subroutine */ int s_stop(char *, ftnlen);
-
-    /* Fortran I/O blocks */
-    static cilist io___147 = { 0, 6, 0, fmt_9999, 0 };
-
-
-/*
-    -- LAPACK auxiliary routine (preliminary version) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
-
-
-    Purpose
-    =======
-
-    XERBLA  is an error handler for the LAPACK routines.
-    It is called by an LAPACK routine if an input parameter has an
-    invalid value.  A message is printed and execution stops.
-
-    Installers may consider modifying the STOP statement in order to
-    call system-specific exception-handling facilities.
-
-    Arguments
-    =========
-
-    SRNAME  (input) CHARACTER*6
-            The name of the routine which called XERBLA.
-
-    INFO    (input) INTEGER
-            The position of the invalid parameter in the parameter list
-            of the calling routine.
-*/
-
-
-    s_wsfe(&io___147);
-    do_fio(&c__1, srname, (ftnlen)6);
-    do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer));
-    e_wsfe();
-
-    s_stop("", (ftnlen)0);
-
-
-/*     End of XERBLA */
-
-    return 0;
-} /* xerbla_ */
-
 /* Subroutine */ int zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx,
 	integer *incx, doublecomplex *zy, integer *incy)
 {
@@ -9946,7 +9889,7 @@ L20:
 /* L210: */
 		    }
 		    if (nounit) {
-			z_div(&z__1, &c_b359, &a[j + j * a_dim1]);
+			z_div(&z__1, &c_b353, &a[j + j * a_dim1]);
 			temp.r = z__1.r, temp.i = z__1.i;
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
@@ -9997,7 +9940,7 @@ L20:
 /* L260: */
 		    }
 		    if (nounit) {
-			z_div(&z__1, &c_b359, &a[j + j * a_dim1]);
+			z_div(&z__1, &c_b353, &a[j + j * a_dim1]);
 			temp.r = z__1.r, temp.i = z__1.i;
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
@@ -10024,11 +9967,11 @@ L20:
 		for (k = *n; k >= 1; --k) {
 		    if (nounit) {
 			if (noconj) {
-			    z_div(&z__1, &c_b359, &a[k + k * a_dim1]);
+			    z_div(&z__1, &c_b353, &a[k + k * a_dim1]);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			} else {
 			    d_cnjg(&z__2, &a[k + k * a_dim1]);
-			    z_div(&z__1, &c_b359, &z__2);
+			    z_div(&z__1, &c_b353, &z__2);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			}
 			i__1 = *m;
@@ -10088,11 +10031,11 @@ L20:
 		for (k = 1; k <= i__1; ++k) {
 		    if (nounit) {
 			if (noconj) {
-			    z_div(&z__1, &c_b359, &a[k + k * a_dim1]);
+			    z_div(&z__1, &c_b353, &a[k + k * a_dim1]);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			} else {
 			    d_cnjg(&z__2, &a[k + k * a_dim1]);
-			    z_div(&z__1, &c_b359, &z__2);
+			    z_div(&z__1, &c_b353, &z__2);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			}
 			i__2 = *m;
diff --git a/numpy/linalg/lapack_lite/wrapped_routines b/numpy/linalg/lapack_lite/wrapped_routines
index 2045c12cd..b58361683 100644
--- a/numpy/linalg/lapack_lite/wrapped_routines
+++ b/numpy/linalg/lapack_lite/wrapped_routines
@@ -17,3 +17,4 @@ zgeqrf
 # need this b/c it's not properly declared as external in the BLAS source
 dcabs1
 IGNORE: dlamch
+IGNORE: xerbla
-- 
cgit v1.2.1


From debb7a3f2b770e2a0f6356a00f93ebac97f7448b Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Wed, 14 Dec 2016 01:14:08 +0000
Subject: MAINT: Sort the functions, for ease of diffing

---
 numpy/linalg/lapack_lite/wrapped_routines | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/numpy/linalg/lapack_lite/wrapped_routines b/numpy/linalg/lapack_lite/wrapped_routines
index b58361683..cc279f083 100644
--- a/numpy/linalg/lapack_lite/wrapped_routines
+++ b/numpy/linalg/lapack_lite/wrapped_routines
@@ -1,19 +1,19 @@
 dgeev
-zgeev
-dsyevd
-zheevd
 dgelsd
-zgelsd
+dgeqrf
+dgesdd
 dgesv
-zgesv
 dgetrf
-zgetrf
 dpotrf
-zpotrf
-dgesdd
-zgesdd
-dgeqrf
+dsyevd
+zgeev
+zgelsd
 zgeqrf
+zgesdd
+zgesv
+zgetrf
+zheevd
+zpotrf
 # need this b/c it's not properly declared as external in the BLAS source
 dcabs1
 IGNORE: dlamch
-- 
cgit v1.2.1


From 224abf8274239474b58cac6d0526e0bfed7079d3 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Wed, 14 Dec 2016 01:16:40 +0000
Subject: BUG: add missing routines to the transpilation list

These are taken from lapack_litemodule.c and umath_linalg.c.src
---
 numpy/linalg/lapack_lite/blas_lite.c      | 13316 +++-
 numpy/linalg/lapack_lite/dlapack_lite.c   | 95476 ++++++++++++++++++++++------
 numpy/linalg/lapack_lite/wrapped_routines |    30 +
 numpy/linalg/lapack_lite/zlapack_lite.c   |  3945 +-
 4 files changed, 92159 insertions(+), 20608 deletions(-)

diff --git a/numpy/linalg/lapack_lite/blas_lite.c b/numpy/linalg/lapack_lite/blas_lite.c
index 98735a6ba..0991b2d21 100644
--- a/numpy/linalg/lapack_lite/blas_lite.c
+++ b/numpy/linalg/lapack_lite/blas_lite.c
@@ -20,35 +20,40 @@ extern doublereal dlapy2_(doublereal *x, doublereal *y);
 
 /* Table of constant values */
 
-static doublecomplex c_b353 = {1.,0.};
+static complex c_b21 = {1.f,0.f};
+static doublecomplex c_b1071 = {1.,0.};
 
-/* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx,
-	integer *incx, doublereal *dy, integer *incy)
+/* Subroutine */ int caxpy_(integer *n, complex *ca, complex *cx, integer *
+	incx, complex *cy, integer *incy)
 {
     /* System generated locals */
-    integer i__1;
+    integer i__1, i__2, i__3, i__4;
+    real r__1, r__2;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    double r_imag(complex *);
 
     /* Local variables */
-    static integer i__, m, ix, iy, mp1;
+    static integer i__, ix, iy;
 
 
 /*
        constant times a vector plus a vector.
-       uses unrolled loops for increments equal to one.
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
 */
 
 
     /* Parameter adjustments */
-    --dy;
-    --dx;
+    --cy;
+    --cx;
 
     /* Function Body */
     if (*n <= 0) {
 	return 0;
     }
-    if (*da == 0.) {
+    if ((r__1 = ca->r, dabs(r__1)) + (r__2 = r_imag(ca), dabs(r__2)) == 0.f) {
 	return 0;
     }
     if (*incx == 1 && *incy == 1) {
@@ -70,86 +75,56 @@ static doublecomplex c_b353 = {1.,0.};
     }
     i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	dy[iy] += *da * dx[ix];
+	i__2 = iy;
+	i__3 = iy;
+	i__4 = ix;
+	q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[
+		i__4].i + ca->i * cx[i__4].r;
+	q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i;
+	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
 	ix += *incx;
 	iy += *incy;
 /* L10: */
     }
     return 0;
 
-/*
-          code for both increments equal to 1
-
-
-          clean-up loop
-*/
+/*        code for both increments equal to 1 */
 
 L20:
-    m = *n % 4;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
+    i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	dy[i__] += *da * dx[i__];
+	i__2 = i__;
+	i__3 = i__;
+	i__4 = i__;
+	q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[
+		i__4].i + ca->i * cx[i__4].r;
+	q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i;
+	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
 /* L30: */
-    }
-    if (*n < 4) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 4) {
-	dy[i__] += *da * dx[i__];
-	dy[i__ + 1] += *da * dx[i__ + 1];
-	dy[i__ + 2] += *da * dx[i__ + 2];
-	dy[i__ + 3] += *da * dx[i__ + 3];
-/* L50: */
     }
     return 0;
-} /* daxpy_ */
-
-doublereal dcabs1_(doublecomplex *z__)
-{
-    /* System generated locals */
-    doublereal ret_val;
-    static doublecomplex equiv_0[1];
-
-    /* Local variables */
-#define t ((doublereal *)equiv_0)
-#define zz (equiv_0)
-
-    zz->r = z__->r, zz->i = z__->i;
-    ret_val = abs(t[0]) + abs(t[1]);
-    return ret_val;
-} /* dcabs1_ */
-
-#undef zz
-#undef t
-
+} /* caxpy_ */
 
-/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx,
-	doublereal *dy, integer *incy)
+/* Subroutine */ int ccopy_(integer *n, complex *cx, integer *incx, complex *
+	cy, integer *incy)
 {
     /* System generated locals */
-    integer i__1;
+    integer i__1, i__2, i__3;
 
     /* Local variables */
-    static integer i__, m, ix, iy, mp1;
+    static integer i__, ix, iy;
 
 
 /*
        copies a vector, x, to a vector, y.
-       uses unrolled loops for increments equal to one.
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
 */
 
 
     /* Parameter adjustments */
-    --dy;
-    --dx;
+    --cy;
+    --cx;
 
     /* Function Body */
     if (*n <= 0) {
@@ -174,78 +149,138 @@ doublereal dcabs1_(doublecomplex *z__)
     }
     i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	dy[iy] = dx[ix];
+	i__2 = iy;
+	i__3 = ix;
+	cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i;
 	ix += *incx;
 	iy += *incy;
 /* L10: */
     }
     return 0;
 
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__;
+	cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i;
+/* L30: */
+    }
+    return 0;
+} /* ccopy_ */
+
+/* Complex */ VOID cdotc_(complex * ret_val, integer *n, complex *cx, integer
+	*incx, complex *cy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static complex ctemp;
+
+
 /*
-          code for both increments equal to 1
+       forms the dot product of two vectors, conjugating the first
+       vector.
+       jack dongarra, linpack,  3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
 
 
-          clean-up loop
+    /* Parameter adjustments */
+    --cy;
+    --cx;
+
+    /* Function Body */
+    ctemp.r = 0.f, ctemp.i = 0.f;
+     ret_val->r = 0.f,  ret_val->i = 0.f;
+    if (*n <= 0) {
+	return ;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
 */
 
-L20:
-    m = *n % 7;
-    if (m == 0) {
-	goto L40;
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
     }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dy[i__] = dx[i__];
-/* L30: */
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
     }
-    if (*n < 7) {
-	return 0;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r_cnjg(&q__3, &cx[ix]);
+	i__2 = iy;
+	q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r *
+		cy[i__2].i + q__3.i * cy[i__2].r;
+	q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
     }
-L40:
-    mp1 = m + 1;
+     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
+    return ;
+
+/*        code for both increments equal to 1 */
+
+L20:
     i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 7) {
-	dy[i__] = dx[i__];
-	dy[i__ + 1] = dx[i__ + 1];
-	dy[i__ + 2] = dx[i__ + 2];
-	dy[i__ + 3] = dx[i__ + 3];
-	dy[i__ + 4] = dx[i__ + 4];
-	dy[i__ + 5] = dx[i__ + 5];
-	dy[i__ + 6] = dx[i__ + 6];
-/* L50: */
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r_cnjg(&q__3, &cx[i__]);
+	i__2 = i__;
+	q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r *
+		cy[i__2].i + q__3.i * cy[i__2].r;
+	q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
+/* L30: */
     }
-    return 0;
-} /* dcopy_ */
+     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
+    return ;
+} /* cdotc_ */
 
-doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
-	integer *incy)
+/* Complex */ VOID cdotu_(complex * ret_val, integer *n, complex *cx, integer
+	*incx, complex *cy, integer *incy)
 {
     /* System generated locals */
-    integer i__1;
-    doublereal ret_val;
+    integer i__1, i__2, i__3;
+    complex q__1, q__2;
 
     /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static doublereal dtemp;
+    static integer i__, ix, iy;
+    static complex ctemp;
 
 
 /*
        forms the dot product of two vectors.
-       uses unrolled loops for increments equal to one.
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
 */
 
 
     /* Parameter adjustments */
-    --dy;
-    --dx;
+    --cy;
+    --cx;
 
     /* Function Body */
-    ret_val = 0.;
-    dtemp = 0.;
+    ctemp.r = 0.f, ctemp.i = 0.f;
+     ret_val->r = 0.f,  ret_val->i = 0.f;
     if (*n <= 0) {
-	return ret_val;
+	return ;
     }
     if (*incx == 1 && *incy == 1) {
 	goto L20;
@@ -266,61 +301,53 @@ doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
     }
     i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp += dx[ix] * dy[iy];
+	i__2 = ix;
+	i__3 = iy;
+	q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i =
+		cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r;
+	q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
 	ix += *incx;
 	iy += *incy;
 /* L10: */
     }
-    ret_val = dtemp;
-    return ret_val;
-
-/*
-          code for both increments equal to 1
-
+     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
+    return ;
 
-          clean-up loop
-*/
+/*        code for both increments equal to 1 */
 
 L20:
-    m = *n % 5;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
+    i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp += dx[i__] * dy[i__];
+	i__2 = i__;
+	i__3 = i__;
+	q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i =
+		cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r;
+	q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
 /* L30: */
     }
-    if (*n < 5) {
-	goto L60;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 5) {
-	dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[
-		i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ +
-		4] * dy[i__ + 4];
-/* L50: */
-    }
-L60:
-    ret_val = dtemp;
-    return ret_val;
-} /* ddot_ */
+     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
+    return ;
+} /* cdotu_ */
 
-/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer *
-	n, integer *k, doublereal *alpha, doublereal *a, integer *lda,
-	doublereal *b, integer *ldb, doublereal *beta, doublereal *c__,
-	integer *ldc)
+/* Subroutine */ int cgemm_(char *transa, char *transb, integer *m, integer *
+	n, integer *k, complex *alpha, complex *a, integer *lda, complex *b,
+	integer *ldb, complex *beta, complex *c__, integer *ldc)
 {
     /* System generated locals */
     integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
-	    i__3;
+	    i__3, i__4, i__5, i__6;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
     static integer i__, j, l, info;
     static logical nota, notb;
-    static doublereal temp;
+    static complex temp;
+    static logical conja, conjb;
     static integer ncola;
     extern logical lsame_(char *, char *);
     static integer nrowa, nrowb;
@@ -331,13 +358,13 @@ L60:
     Purpose
     =======
 
-    DGEMM  performs one of the matrix-matrix operations
+    CGEMM  performs one of the matrix-matrix operations
 
        C := alpha*op( A )*op( B ) + beta*C,
 
     where  op( X ) is one of
 
-       op( X ) = X   or   op( X ) = X',
+       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
 
     alpha and beta are scalars, and A, B and C are matrices, with op( A )
     an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
@@ -353,7 +380,7 @@ L60:
 
                 TRANSA = 'T' or 't',  op( A ) = A'.
 
-                TRANSA = 'C' or 'c',  op( A ) = A'.
+                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).
 
              Unchanged on exit.
 
@@ -365,7 +392,7 @@ L60:
 
                 TRANSB = 'T' or 't',  op( B ) = B'.
 
-                TRANSB = 'C' or 'c',  op( B ) = B'.
+                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).
 
              Unchanged on exit.
 
@@ -386,11 +413,11 @@ L60:
              be at least  zero.
              Unchanged on exit.
 
-    ALPHA  - DOUBLE PRECISION.
+    ALPHA  - COMPLEX         .
              On entry, ALPHA specifies the scalar alpha.
              Unchanged on exit.
 
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+    A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
              k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
              Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
              part of the array  A  must contain the matrix  A,  otherwise
@@ -405,7 +432,7 @@ L60:
              least  max( 1, k ).
              Unchanged on exit.
 
-    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
+    B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is
              n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
              Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
              part of the array  B  must contain the matrix  B,  otherwise
@@ -420,12 +447,12 @@ L60:
              least  max( 1, n ).
              Unchanged on exit.
 
-    BETA   - DOUBLE PRECISION.
+    BETA   - COMPLEX         .
              On entry,  BETA  specifies the scalar  beta.  When  BETA  is
              supplied as zero then C need not be set on input.
              Unchanged on exit.
 
-    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+    C      - COMPLEX          array of DIMENSION ( LDC, n ).
              Before entry, the leading  m by n  part of the array  C must
              contain the matrix  C,  except when  beta  is zero, in which
              case C need not be set on entry.
@@ -449,8 +476,10 @@ L60:
 
 
        Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-       transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
-       and  columns of  A  and the  number of  rows  of  B  respectively.
+       conjugated or transposed, set  CONJA and CONJB  as true if  A  and
+       B  respectively are to be  transposed but  not conjugated  and set
+       NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A
+       and the number of rows of  B  respectively.
 */
 
     /* Parameter adjustments */
@@ -467,6 +496,8 @@ L60:
     /* Function Body */
     nota = lsame_(transa, "N");
     notb = lsame_(transb, "N");
+    conja = lsame_(transa, "C");
+    conjb = lsame_(transb, "C");
     if (nota) {
 	nrowa = *m;
 	ncola = *k;
@@ -483,11 +514,9 @@ L60:
 /*     Test the input parameters. */
 
     info = 0;
-    if (! nota && ! lsame_(transa, "C") && ! lsame_(
-	    transa, "T")) {
+    if (! nota && ! conja && ! lsame_(transa, "T")) {
 	info = 1;
-    } else if (! notb && ! lsame_(transb, "C") && !
-	    lsame_(transb, "T")) {
+    } else if (! notb && ! conjb && ! lsame_(transb, "T")) {
 	info = 2;
     } else if (*m < 0) {
 	info = 3;
@@ -503,25 +532,27 @@ L60:
 	info = 13;
     }
     if (info != 0) {
-	xerbla_("DGEMM ", &info);
+	xerbla_("CGEMM ", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
+    if (*m == 0 || *n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0)
+	    && (beta->r == 1.f && beta->i == 0.f)) {
 	return 0;
     }
 
-/*     And if  alpha.eq.zero. */
+/*     And when  alpha.eq.zero. */
 
-    if (*alpha == 0.) {
-	if (*beta == 0.) {
+    if (alpha->r == 0.f && alpha->i == 0.f) {
+	if (beta->r == 0.f && beta->i == 0.f) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    c__[i__ + j * c_dim1] = 0.;
+		    i__3 = i__ + j * c_dim1;
+		    c__[i__3].r = 0.f, c__[i__3].i = 0.f;
 /* L10: */
 		}
 /* L20: */
@@ -531,7 +562,12 @@ L60:
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+		    i__3 = i__ + j * c_dim1;
+		    i__4 = i__ + j * c_dim1;
+		    q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i,
+			    q__1.i = beta->r * c__[i__4].i + beta->i * c__[
+			    i__4].r;
+		    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 /* L30: */
 		}
 /* L40: */
@@ -549,27 +585,45 @@ L60:
 
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.) {
+		if (beta->r == 0.f && beta->i == 0.f) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
 /* L50: */
 		    }
-		} else if (*beta != 1.) {
+		} else if (beta->r != 1.f || beta->i != 0.f) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__1.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 /* L60: */
 		    }
 		}
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
-		    if (b[l + j * b_dim1] != 0.) {
-			temp = *alpha * b[l + j * b_dim1];
+		    i__3 = l + j * b_dim1;
+		    if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
+			i__3 = l + j * b_dim1;
+			q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
+				q__1.i = alpha->r * b[i__3].i + alpha->i * b[
+				i__3].r;
+			temp.r = q__1.r, temp.i = q__1.i;
 			i__3 = *m;
 			for (i__ = 1; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
-				    a_dim1];
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    q__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
+				    .i + q__2.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
 /* L70: */
 			}
 		    }
@@ -577,65 +631,324 @@ L60:
 		}
 /* L90: */
 	    }
-	} else {
+	} else if (conja) {
 
-/*           Form  C := alpha*A'*B + beta*C */
+/*           Form  C := alpha*conjg( A' )*B + beta*C. */
 
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp = 0.;
+		    temp.r = 0.f, temp.i = 0.f;
 		    i__3 = *k;
 		    for (l = 1; l <= i__3; ++l) {
-			temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * b_dim1;
+			q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
+				q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
+				.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
 /* L100: */
 		    }
-		    if (*beta == 0.) {
-			c__[i__ + j * c_dim1] = *alpha * temp;
+		    if (beta->r == 0.f && beta->i == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 		    } else {
-			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-				i__ + j * c_dim1];
+			i__3 = i__ + j * c_dim1;
+			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 		    }
 /* L110: */
 		}
 /* L120: */
 	    }
+	} else {
+
+/*           Form  C := alpha*A'*B + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			i__4 = l + i__ * a_dim1;
+			i__5 = l + j * b_dim1;
+			q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
+				.i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
+				.i * b[i__5].r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L130: */
+		    }
+		    if (beta->r == 0.f && beta->i == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    }
+/* L140: */
+		}
+/* L150: */
+	    }
 	}
-    } else {
-	if (nota) {
+    } else if (nota) {
+	if (conjb) {
 
-/*           Form  C := alpha*A*B' + beta*C */
+/*           Form  C := alpha*A*conjg( B' ) + beta*C. */
 
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.) {
+		if (beta->r == 0.f && beta->i == 0.f) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
-/* L130: */
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L160: */
 		    }
-		} else if (*beta != 1.) {
+		} else if (beta->r != 1.f || beta->i != 0.f) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L140: */
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__1.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L170: */
 		    }
 		}
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
-		    if (b[j + l * b_dim1] != 0.) {
-			temp = *alpha * b[j + l * b_dim1];
+		    i__3 = j + l * b_dim1;
+		    if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
+			r_cnjg(&q__2, &b[j + l * b_dim1]);
+			q__1.r = alpha->r * q__2.r - alpha->i * q__2.i,
+				q__1.i = alpha->r * q__2.i + alpha->i *
+				q__2.r;
+			temp.r = q__1.r, temp.i = q__1.i;
 			i__3 = *m;
 			for (i__ = 1; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
-				    a_dim1];
-/* L150: */
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    q__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
+				    .i + q__2.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+/* L180: */
 			}
 		    }
-/* L160: */
+/* L190: */
 		}
-/* L170: */
+/* L200: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A*B'          + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (beta->r == 0.f && beta->i == 0.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L210: */
+		    }
+		} else if (beta->r != 1.f || beta->i != 0.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__1.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L220: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * b_dim1;
+		    if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
+			i__3 = j + l * b_dim1;
+			q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
+				q__1.i = alpha->r * b[i__3].i + alpha->i * b[
+				i__3].r;
+			temp.r = q__1.r, temp.i = q__1.i;
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    q__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
+				    .i + q__2.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+/* L230: */
+			}
+		    }
+/* L240: */
+		}
+/* L250: */
+	    }
+	}
+    } else if (conja) {
+	if (conjb) {
+
+/*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			r_cnjg(&q__4, &b[j + l * b_dim1]);
+			q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i =
+				q__3.r * q__4.i + q__3.i * q__4.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L260: */
+		    }
+		    if (beta->r == 0.f && beta->i == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    }
+/* L270: */
+		}
+/* L280: */
+	    }
+	} else {
+
+/*           Form  C := alpha*conjg( A' )*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			i__4 = j + l * b_dim1;
+			q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
+				q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
+				.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L290: */
+		    }
+		    if (beta->r == 0.f && beta->i == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    }
+/* L300: */
+		}
+/* L310: */
+	    }
+	}
+    } else {
+	if (conjb) {
+
+/*           Form  C := alpha*A'*conjg( B' ) + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			i__4 = l + i__ * a_dim1;
+			r_cnjg(&q__3, &b[j + l * b_dim1]);
+			q__2.r = a[i__4].r * q__3.r - a[i__4].i * q__3.i,
+				q__2.i = a[i__4].r * q__3.i + a[i__4].i *
+				q__3.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L320: */
+		    }
+		    if (beta->r == 0.f && beta->i == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    }
+/* L330: */
+		}
+/* L340: */
 	    }
 	} else {
 
@@ -645,53 +958,78 @@ L60:
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *m;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp = 0.;
+		    temp.r = 0.f, temp.i = 0.f;
 		    i__3 = *k;
 		    for (l = 1; l <= i__3; ++l) {
-			temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
-/* L180: */
+			i__4 = l + i__ * a_dim1;
+			i__5 = j + l * b_dim1;
+			q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
+				.i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
+				.i * b[i__5].r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L350: */
 		    }
-		    if (*beta == 0.) {
-			c__[i__ + j * c_dim1] = *alpha * temp;
+		    if (beta->r == 0.f && beta->i == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 		    } else {
-			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-				i__ + j * c_dim1];
+			i__3 = i__ + j * c_dim1;
+			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 		    }
-/* L190: */
+/* L360: */
 		}
-/* L200: */
+/* L370: */
 	    }
 	}
     }
 
     return 0;
 
-/*     End of DGEMM . */
+/*     End of CGEMM . */
 
-} /* dgemm_ */
+} /* cgemm_ */
 
-/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal *
-	alpha, doublereal *a, integer *lda, doublereal *x, integer *incx,
-	doublereal *beta, doublereal *y, integer *incy)
+/* Subroutine */ int cgemv_(char *trans, integer *m, integer *n, complex *
+	alpha, complex *a, integer *lda, complex *x, integer *incx, complex *
+	beta, complex *y, integer *incy)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
     static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp;
+    static complex temp;
     static integer lenx, leny;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconj;
 
 
 /*
     Purpose
     =======
 
-    DGEMV  performs one of the matrix-vector operations
+    CGEMV  performs one of the matrix-vector operations
 
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
+
+       y := alpha*conjg( A' )*x + beta*y,
 
     where alpha and beta are scalars, x and y are vectors and A is an
     m by n matrix.
@@ -707,7 +1045,7 @@ L60:
 
                 TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
 
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+                TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.
 
              Unchanged on exit.
 
@@ -721,11 +1059,11 @@ L60:
              N must be at least zero.
              Unchanged on exit.
 
-    ALPHA  - DOUBLE PRECISION.
+    ALPHA  - COMPLEX         .
              On entry, ALPHA specifies the scalar alpha.
              Unchanged on exit.
 
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
              Before entry, the leading m by n part of the array A must
              contain the matrix of coefficients.
              Unchanged on exit.
@@ -736,7 +1074,7 @@ L60:
              max( 1, m ).
              Unchanged on exit.
 
-    X      - DOUBLE PRECISION array of DIMENSION at least
+    X      - COMPLEX          array of DIMENSION at least
              ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
              and at least
              ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
@@ -749,12 +1087,12 @@ L60:
              X. INCX must not be zero.
              Unchanged on exit.
 
-    BETA   - DOUBLE PRECISION.
+    BETA   - COMPLEX         .
              On entry, BETA specifies the scalar beta. When BETA is
              supplied as zero then Y need not be set on input.
              Unchanged on exit.
 
-    Y      - DOUBLE PRECISION array of DIMENSION at least
+    Y      - COMPLEX          array of DIMENSION at least
              ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
              and at least
              ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
@@ -804,16 +1142,19 @@ L60:
 	info = 11;
     }
     if (info != 0) {
-	xerbla_("DGEMV ", &info);
+	xerbla_("CGEMV ", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
+    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r
+	    == 1.f && beta->i == 0.f)) {
 	return 0;
     }
 
+    noconj = lsame_(trans, "T");
+
 /*
        Set  LENX  and  LENY, the lengths of the vectors x and y, and set
        up the start points in  X  and  Y.
@@ -844,41 +1185,53 @@ L60:
        First form  y := beta*y.
 */
 
-    if (*beta != 1.) {
+    if (beta->r != 1.f || beta->i != 0.f) {
 	if (*incy == 1) {
-	    if (*beta == 0.) {
+	    if (beta->r == 0.f && beta->i == 0.f) {
 		i__1 = leny;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = 0.;
+		    i__2 = i__;
+		    y[i__2].r = 0.f, y[i__2].i = 0.f;
 /* L10: */
 		}
 	    } else {
 		i__1 = leny;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = *beta * y[i__];
+		    i__2 = i__;
+		    i__3 = i__;
+		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+			    .r;
+		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
 /* L20: */
 		}
 	    }
 	} else {
 	    iy = ky;
-	    if (*beta == 0.) {
+	    if (beta->r == 0.f && beta->i == 0.f) {
 		i__1 = leny;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = 0.;
+		    i__2 = iy;
+		    y[i__2].r = 0.f, y[i__2].i = 0.f;
 		    iy += *incy;
 /* L30: */
 		}
 	    } else {
 		i__1 = leny;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = *beta * y[iy];
+		    i__2 = iy;
+		    i__3 = iy;
+		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+			    .r;
+		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
 		    iy += *incy;
 /* L40: */
 		}
 	    }
 	}
     }
-    if (*alpha == 0.) {
+    if (alpha->r == 0.f && alpha->i == 0.f) {
 	return 0;
     }
     if (lsame_(trans, "N")) {
@@ -889,11 +1242,24 @@ L60:
 	if (*incy == 1) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (x[jx] != 0.) {
-		    temp = *alpha * x[jx];
+		i__2 = jx;
+		if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
+		    i__2 = jx;
+		    q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    temp.r = q__1.r, temp.i = q__1.i;
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			y[i__] += temp * a[i__ + j * a_dim1];
+			i__3 = i__;
+			i__4 = i__;
+			i__5 = i__ + j * a_dim1;
+			q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
+				.r;
+			q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i +
+				q__2.i;
+			y[i__3].r = q__1.r, y[i__3].i = q__1.i;
 /* L50: */
 		    }
 		}
@@ -903,12 +1269,25 @@ L60:
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (x[jx] != 0.) {
-		    temp = *alpha * x[jx];
+		i__2 = jx;
+		if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
+		    i__2 = jx;
+		    q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    temp.r = q__1.r, temp.i = q__1.i;
 		    iy = ky;
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			y[iy] += temp * a[i__ + j * a_dim1];
+			i__3 = iy;
+			i__4 = iy;
+			i__5 = i__ + j * a_dim1;
+			q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
+				.r;
+			q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i +
+				q__2.i;
+			y[i__3].r = q__1.r, y[i__3].i = q__1.i;
 			iy += *incy;
 /* L70: */
 		    }
@@ -919,56 +1298,110 @@ L60:
 	}
     } else {
 
-/*        Form  y := alpha*A'*x + y. */
+/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y. */
 
 	jy = ky;
 	if (*incx == 1) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		temp = 0.;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp += a[i__ + j * a_dim1] * x[i__];
+		temp.r = 0.f, temp.i = 0.f;
+		if (noconj) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__;
+			q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
+				.i, q__2.i = a[i__3].r * x[i__4].i + a[i__3]
+				.i * x[i__4].r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
 /* L90: */
+		    }
+		} else {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			i__3 = i__;
+			q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
+				q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3]
+				.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L100: */
+		    }
 		}
-		y[jy] += *alpha * temp;
+		i__2 = jy;
+		i__3 = jy;
+		q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i =
+			alpha->r * temp.i + alpha->i * temp.r;
+		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
 		jy += *incy;
-/* L100: */
+/* L110: */
 	    }
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		temp = 0.;
+		temp.r = 0.f, temp.i = 0.f;
 		ix = kx;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp += a[i__ + j * a_dim1] * x[ix];
-		    ix += *incx;
-/* L110: */
+		if (noconj) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = ix;
+			q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
+				.i, q__2.i = a[i__3].r * x[i__4].i + a[i__3]
+				.i * x[i__4].r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+			ix += *incx;
+/* L120: */
+		    }
+		} else {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			i__3 = ix;
+			q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
+				q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3]
+				.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+			ix += *incx;
+/* L130: */
+		    }
 		}
-		y[jy] += *alpha * temp;
+		i__2 = jy;
+		i__3 = jy;
+		q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i =
+			alpha->r * temp.i + alpha->i * temp.r;
+		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
 		jy += *incy;
-/* L120: */
+/* L140: */
 	    }
 	}
     }
 
     return 0;
 
-/*     End of DGEMV . */
+/*     End of CGEMV . */
 
-} /* dgemv_ */
+} /* cgemv_ */
 
-/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha,
-	doublereal *x, integer *incx, doublereal *y, integer *incy,
-	doublereal *a, integer *lda)
+/* Subroutine */ int cgerc_(integer *m, integer *n, complex *alpha, complex *
+	x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
     static integer i__, j, ix, jy, kx, info;
-    static doublereal temp;
+    static complex temp;
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
 
@@ -976,9 +1409,9 @@ L60:
     Purpose
     =======
 
-    DGER   performs the rank 1 operation
+    CGERC  performs the rank 1 operation
 
-       A := alpha*x*y' + A,
+       A := alpha*x*conjg( y' ) + A,
 
     where alpha is a scalar, x is an m element vector, y is an n element
     vector and A is an m by n matrix.
@@ -996,11 +1429,11 @@ L60:
              N must be at least zero.
              Unchanged on exit.
 
-    ALPHA  - DOUBLE PRECISION.
+    ALPHA  - COMPLEX         .
              On entry, ALPHA specifies the scalar alpha.
              Unchanged on exit.
 
-    X      - DOUBLE PRECISION array of dimension at least
+    X      - COMPLEX          array of dimension at least
              ( 1 + ( m - 1 )*abs( INCX ) ).
              Before entry, the incremented array X must contain the m
              element vector x.
@@ -1011,7 +1444,7 @@ L60:
              X. INCX must not be zero.
              Unchanged on exit.
 
-    Y      - DOUBLE PRECISION array of dimension at least
+    Y      - COMPLEX          array of dimension at least
              ( 1 + ( n - 1 )*abs( INCY ) ).
              Before entry, the incremented array Y must contain the n
              element vector y.
@@ -1022,7 +1455,7 @@ L60:
              Y. INCY must not be zero.
              Unchanged on exit.
 
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
              Before entry, the leading m by n part of the array A must
              contain the matrix of coefficients. On exit, A is
              overwritten by the updated matrix.
@@ -1067,13 +1500,13 @@ L60:
 	info = 9;
     }
     if (info != 0) {
-	xerbla_("DGER  ", &info);
+	xerbla_("CGERC ", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if (*m == 0 || *n == 0 || *alpha == 0.) {
+    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) {
 	return 0;
     }
 
@@ -1090,11 +1523,21 @@ L60:
     if (*incx == 1) {
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
-	    if (y[jy] != 0.) {
-		temp = *alpha * y[jy];
+	    i__2 = jy;
+	    if (y[i__2].r != 0.f || y[i__2].i != 0.f) {
+		r_cnjg(&q__2, &y[jy]);
+		q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
+			alpha->r * q__2.i + alpha->i * q__2.r;
+		temp.r = q__1.r, temp.i = q__1.i;
 		i__2 = *m;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    a[i__ + j * a_dim1] += x[i__] * temp;
+		    i__3 = i__ + j * a_dim1;
+		    i__4 = i__ + j * a_dim1;
+		    i__5 = i__;
+		    q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
+			     x[i__5].r * temp.i + x[i__5].i * temp.r;
+		    q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
+		    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
 /* L10: */
 		}
 	    }
@@ -1109,12 +1552,22 @@ L60:
 	}
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
-	    if (y[jy] != 0.) {
-		temp = *alpha * y[jy];
+	    i__2 = jy;
+	    if (y[i__2].r != 0.f || y[i__2].i != 0.f) {
+		r_cnjg(&q__2, &y[jy]);
+		q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
+			alpha->r * q__2.i + alpha->i * q__2.r;
+		temp.r = q__1.r, temp.i = q__1.i;
 		ix = kx;
 		i__2 = *m;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    a[i__ + j * a_dim1] += x[ix] * temp;
+		    i__3 = i__ + j * a_dim1;
+		    i__4 = i__ + j * a_dim1;
+		    i__5 = ix;
+		    q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
+			     x[i__5].r * temp.i + x[i__5].i * temp.r;
+		    q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
+		    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
 		    ix += *incx;
 /* L30: */
 		}
@@ -1126,407 +1579,296 @@ L60:
 
     return 0;
 
-/*     End of DGER  . */
+/*     End of CGERC . */
 
-} /* dger_ */
+} /* cgerc_ */
 
-doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
+/* Subroutine */ int cgeru_(integer *m, integer *n, complex *alpha, complex *
+	x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
 {
     /* System generated locals */
-    integer i__1, i__2;
-    doublereal ret_val, d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2;
 
     /* Local variables */
-    static integer ix;
-    static doublereal ssq, norm, scale, absxi;
+    static integer i__, j, ix, jy, kx, info;
+    static complex temp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
 
 
 /*
-    DNRM2 returns the euclidean norm of a vector via the function
-    name, so that
-
-       DNRM2 := sqrt( x'*x )
+    Purpose
+    =======
 
+    CGERU  performs the rank 1 operation
 
-    -- This version written on 25-October-1982.
-       Modified on 14-October-1993 to inline the call to DLASSQ.
-       Sven Hammarling, Nag Ltd.
-*/
+       A := alpha*x*y' + A,
 
+    where alpha is a scalar, x is an m element vector, y is an n element
+    vector and A is an m by n matrix.
 
-    /* Parameter adjustments */
-    --x;
+    Parameters
+    ==========
 
-    /* Function Body */
-    if (*n < 1 || *incx < 1) {
-	norm = 0.;
-    } else if (*n == 1) {
-	norm = abs(x[1]);
-    } else {
-	scale = 0.;
-	ssq = 1.;
-/*
-          The following loop is equivalent to this call to the LAPACK
-          auxiliary routine:
-          CALL DLASSQ( N, X, INCX, SCALE, SSQ )
-*/
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
 
-	i__1 = (*n - 1) * *incx + 1;
-	i__2 = *incx;
-	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-	    if (x[ix] != 0.) {
-		absxi = (d__1 = x[ix], abs(d__1));
-		if (scale < absxi) {
-/* Computing 2nd power */
-		    d__1 = scale / absxi;
-		    ssq = ssq * (d__1 * d__1) + 1.;
-		    scale = absxi;
-		} else {
-/* Computing 2nd power */
-		    d__1 = absxi / scale;
-		    ssq += d__1 * d__1;
-		}
-	    }
-/* L10: */
-	}
-	norm = scale * sqrt(ssq);
-    }
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
 
-    ret_val = norm;
-    return ret_val;
+    ALPHA  - COMPLEX         .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
 
-/*     End of DNRM2. */
+    X      - COMPLEX          array of dimension at least
+             ( 1 + ( m - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the m
+             element vector x.
+             Unchanged on exit.
 
-} /* dnrm2_ */
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
 
-/* Subroutine */ int drot_(integer *n, doublereal *dx, integer *incx,
-	doublereal *dy, integer *incy, doublereal *c__, doublereal *s)
-{
-    /* System generated locals */
-    integer i__1;
+    Y      - COMPLEX          array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
 
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublereal dtemp;
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
 
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients. On exit, A is
+             overwritten by the updated matrix.
 
-/*
-       applies a plane rotation.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
 
 
-    /* Parameter adjustments */
-    --dy;
-    --dx;
+    Level 2 Blas routine.
 
-    /* Function Body */
-    if (*n <= 0) {
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (*m < 0) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("CGERU ", &info);
 	return 0;
     }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) {
+	return 0;
     }
 
 /*
-         code for unequal increments or equal increments not equal
-           to 1
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
 */
 
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
+    if (*incy > 0) {
+	jy = 1;
+    } else {
+	jy = 1 - (*n - 1) * *incy;
     }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp = *c__ * dx[ix] + *s * dy[iy];
-	dy[iy] = *c__ * dy[iy] - *s * dx[ix];
-	dx[ix] = dtemp;
-	ix += *incx;
-	iy += *incy;
+    if (*incx == 1) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = jy;
+	    if (y[i__2].r != 0.f || y[i__2].i != 0.f) {
+		i__2 = jy;
+		q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i =
+			 alpha->r * y[i__2].i + alpha->i * y[i__2].r;
+		temp.r = q__1.r, temp.i = q__1.i;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * a_dim1;
+		    i__4 = i__ + j * a_dim1;
+		    i__5 = i__;
+		    q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
+			     x[i__5].r * temp.i + x[i__5].i * temp.r;
+		    q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
+		    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
 /* L10: */
+		}
+	    }
+	    jy += *incy;
+/* L20: */
+	}
+    } else {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*m - 1) * *incx;
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = jy;
+	    if (y[i__2].r != 0.f || y[i__2].i != 0.f) {
+		i__2 = jy;
+		q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i =
+			 alpha->r * y[i__2].i + alpha->i * y[i__2].r;
+		temp.r = q__1.r, temp.i = q__1.i;
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * a_dim1;
+		    i__4 = i__ + j * a_dim1;
+		    i__5 = ix;
+		    q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
+			     x[i__5].r * temp.i + x[i__5].i * temp.r;
+		    q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
+		    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+		    ix += *incx;
+/* L30: */
+		}
+	    }
+	    jy += *incy;
+/* L40: */
+	}
     }
+
     return 0;
 
-/*       code for both increments equal to 1 */
+/*     End of CGERU . */
 
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp = *c__ * dx[i__] + *s * dy[i__];
-	dy[i__] = *c__ * dy[i__] - *s * dx[i__];
-	dx[i__] = dtemp;
-/* L30: */
-    }
-    return 0;
-} /* drot_ */
+} /* cgeru_ */
 
-/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx,
-	integer *incx)
+/* Subroutine */ int chemv_(char *uplo, integer *n, complex *alpha, complex *
+	a, integer *lda, complex *x, integer *incx, complex *beta, complex *y,
+	 integer *incy)
 {
     /* System generated locals */
-    integer i__1, i__2;
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
-    static integer i__, m, mp1, nincx;
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static complex temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
 
 
 /*
-       scales a vector by a constant.
-       uses unrolled loops for increment equal to one.
-       jack dongarra, linpack, 3/11/78.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
+    Purpose
+    =======
 
+    CHEMV  performs the matrix-vector  operation
 
-    /* Parameter adjustments */
-    --dx;
+       y := alpha*A*x + beta*y,
 
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-	return 0;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
+    where alpha and beta are scalars, x and y are n element vectors and
+    A is an n by n hermitian matrix.
 
-/*        code for increment not equal to 1 */
+    Parameters
+    ==========
 
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-	dx[i__] = *da * dx[i__];
-/* L10: */
-    }
-    return 0;
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
 
-/*
-          code for increment equal to 1
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
 
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
 
-          clean-up loop
-*/
+             Unchanged on exit.
 
-L20:
-    m = *n % 5;
-    if (m == 0) {
-	goto L40;
-    }
-    i__2 = m;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-	dx[i__] = *da * dx[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__2 = *n;
-    for (i__ = mp1; i__ <= i__2; i__ += 5) {
-	dx[i__] = *da * dx[i__];
-	dx[i__ + 1] = *da * dx[i__ + 1];
-	dx[i__ + 2] = *da * dx[i__ + 2];
-	dx[i__ + 3] = *da * dx[i__ + 3];
-	dx[i__ + 4] = *da * dx[i__ + 4];
-/* L50: */
-    }
-    return 0;
-} /* dscal_ */
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
 
-/* Subroutine */ int dswap_(integer *n, doublereal *dx, integer *incx,
-	doublereal *dy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
+    ALPHA  - COMPLEX         .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
 
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static doublereal dtemp;
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the hermitian matrix and the strictly
+             lower triangular part of A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the hermitian matrix and the strictly
+             upper triangular part of A is not referenced.
+             Note that the imaginary parts of the diagonal elements need
+             not be set and are assumed to be zero.
+             Unchanged on exit.
 
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
 
-/*
-       interchanges two vectors.
-       uses unrolled loops for increments equal one.
-       jack dongarra, linpack, 3/11/78.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
+    X      - COMPLEX          array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
 
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
 
-    /* Parameter adjustments */
-    --dy;
-    --dx;
+    BETA   - COMPLEX         .
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
 
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-         code for unequal increments or equal increments not equal
-           to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp = dx[ix];
-	dx[ix] = dy[iy];
-	dy[iy] = dtemp;
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*
-         code for both increments equal to 1
-
-
-         clean-up loop
-*/
-
-L20:
-    m = *n % 3;
-    if (m == 0) {
-	goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dtemp = dx[i__];
-	dx[i__] = dy[i__];
-	dy[i__] = dtemp;
-/* L30: */
-    }
-    if (*n < 3) {
-	return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 3) {
-	dtemp = dx[i__];
-	dx[i__] = dy[i__];
-	dy[i__] = dtemp;
-	dtemp = dx[i__ + 1];
-	dx[i__ + 1] = dy[i__ + 1];
-	dy[i__ + 1] = dtemp;
-	dtemp = dx[i__ + 2];
-	dx[i__ + 2] = dy[i__ + 2];
-	dy[i__ + 2] = dtemp;
-/* L50: */
-    }
-    return 0;
-} /* dswap_ */
-
-/* Subroutine */ int dsymv_(char *uplo, integer *n, doublereal *alpha,
-	doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal
-	*beta, doublereal *y, integer *incy)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-
-/*
-    Purpose
-    =======
-
-    DSYMV  performs the matrix-vector  operation
-
-       y := alpha*A*x + beta*y,
-
-    where alpha and beta are scalars, x and y are n element vectors and
-    A is an n by n symmetric matrix.
-
-    Parameters
-    ==========
-
-    UPLO   - CHARACTER*1.
-             On entry, UPLO specifies whether the upper or lower
-             triangular part of the array A is to be referenced as
-             follows:
-
-                UPLO = 'U' or 'u'   Only the upper triangular part of A
-                                    is to be referenced.
-
-                UPLO = 'L' or 'l'   Only the lower triangular part of A
-                                    is to be referenced.
-
-             Unchanged on exit.
-
-    N      - INTEGER.
-             On entry, N specifies the order of the matrix A.
-             N must be at least zero.
-             Unchanged on exit.
-
-    ALPHA  - DOUBLE PRECISION.
-             On entry, ALPHA specifies the scalar alpha.
-             Unchanged on exit.
-
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-             Before entry with  UPLO = 'U' or 'u', the leading n by n
-             upper triangular part of the array A must contain the upper
-             triangular part of the symmetric matrix and the strictly
-             lower triangular part of A is not referenced.
-             Before entry with UPLO = 'L' or 'l', the leading n by n
-             lower triangular part of the array A must contain the lower
-             triangular part of the symmetric matrix and the strictly
-             upper triangular part of A is not referenced.
-             Unchanged on exit.
-
-    LDA    - INTEGER.
-             On entry, LDA specifies the first dimension of A as declared
-             in the calling (sub) program. LDA must be at least
-             max( 1, n ).
-             Unchanged on exit.
-
-    X      - DOUBLE PRECISION array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCX ) ).
-             Before entry, the incremented array X must contain the n
-             element vector x.
-             Unchanged on exit.
-
-    INCX   - INTEGER.
-             On entry, INCX specifies the increment for the elements of
-             X. INCX must not be zero.
-             Unchanged on exit.
-
-    BETA   - DOUBLE PRECISION.
-             On entry, BETA specifies the scalar beta. When BETA is
-             supplied as zero then Y need not be set on input.
-             Unchanged on exit.
-
-    Y      - DOUBLE PRECISION array of dimension at least
-             ( 1 + ( n - 1 )*abs( INCY ) ).
-             Before entry, the incremented array Y must contain the n
-             element vector y. On exit, Y is overwritten by the updated
-             vector y.
+    Y      - COMPLEX          array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y. On exit, Y is overwritten by the updated
+             vector y.
 
     INCY   - INTEGER.
              On entry, INCY specifies the increment for the elements of
@@ -1567,13 +1909,14 @@ L40:
 	info = 10;
     }
     if (info != 0) {
-	xerbla_("DSYMV ", &info);
+	xerbla_("CHEMV ", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if (*n == 0 || *alpha == 0. && *beta == 1.) {
+    if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f &&
+	    beta->i == 0.f)) {
 	return 0;
     }
 
@@ -1598,41 +1941,53 @@ L40:
        First form  y := beta*y.
 */
 
-    if (*beta != 1.) {
+    if (beta->r != 1.f || beta->i != 0.f) {
 	if (*incy == 1) {
-	    if (*beta == 0.) {
+	    if (beta->r == 0.f && beta->i == 0.f) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = 0.;
+		    i__2 = i__;
+		    y[i__2].r = 0.f, y[i__2].i = 0.f;
 /* L10: */
 		}
 	    } else {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = *beta * y[i__];
+		    i__2 = i__;
+		    i__3 = i__;
+		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+			    .r;
+		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
 /* L20: */
 		}
 	    }
 	} else {
 	    iy = ky;
-	    if (*beta == 0.) {
+	    if (beta->r == 0.f && beta->i == 0.f) {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = 0.;
+		    i__2 = iy;
+		    y[i__2].r = 0.f, y[i__2].i = 0.f;
 		    iy += *incy;
 /* L30: */
 		}
 	    } else {
 		i__1 = *n;
 		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = *beta * y[iy];
+		    i__2 = iy;
+		    i__3 = iy;
+		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+			    .r;
+		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
 		    iy += *incy;
 /* L40: */
 		}
 	    }
 	}
     }
-    if (*alpha == 0.) {
+    if (alpha->r == 0.f && alpha->i == 0.f) {
 	return 0;
     }
     if (lsame_(uplo, "U")) {
@@ -1642,15 +1997,39 @@ L40:
 	if (*incx == 1 && *incy == 1) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.;
+		i__2 = j;
+		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+		temp1.r = q__1.r, temp1.i = q__1.i;
+		temp2.r = 0.f, temp2.i = 0.f;
 		i__2 = j - 1;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    y[i__] += temp1 * a[i__ + j * a_dim1];
-		    temp2 += a[i__ + j * a_dim1] * x[i__];
+		    i__3 = i__;
+		    i__4 = i__;
+		    i__5 = i__ + j * a_dim1;
+		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+			    .r;
+		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+		    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+		    i__3 = i__;
+		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
+			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+		    temp2.r = q__1.r, temp2.i = q__1.i;
 /* L50: */
 		}
-		y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
+		i__2 = j;
+		i__3 = j;
+		i__4 = j + j * a_dim1;
+		r__1 = a[i__4].r;
+		q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
+		q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
+		q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i =
+			alpha->r * temp2.i + alpha->i * temp2.r;
+		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
 /* L60: */
 	    }
 	} else {
@@ -1658,19 +2037,43 @@ L40:
 	    jy = ky;
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.;
+		i__2 = jx;
+		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+		temp1.r = q__1.r, temp1.i = q__1.i;
+		temp2.r = 0.f, temp2.i = 0.f;
 		ix = kx;
 		iy = ky;
 		i__2 = j - 1;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    y[iy] += temp1 * a[i__ + j * a_dim1];
-		    temp2 += a[i__ + j * a_dim1] * x[ix];
+		    i__3 = iy;
+		    i__4 = iy;
+		    i__5 = i__ + j * a_dim1;
+		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+			    .r;
+		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+		    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+		    i__3 = ix;
+		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
+			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+		    temp2.r = q__1.r, temp2.i = q__1.i;
 		    ix += *incx;
 		    iy += *incy;
 /* L70: */
 		}
-		y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
+		i__2 = jy;
+		i__3 = jy;
+		i__4 = j + j * a_dim1;
+		r__1 = a[i__4].r;
+		q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
+		q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
+		q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i =
+			alpha->r * temp2.i + alpha->i * temp2.r;
+		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
 		jx += *incx;
 		jy += *incy;
 /* L80: */
@@ -1683,16 +2086,42 @@ L40:
 	if (*incx == 1 && *incy == 1) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[j];
-		temp2 = 0.;
-		y[j] += temp1 * a[j + j * a_dim1];
+		i__2 = j;
+		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+		temp1.r = q__1.r, temp1.i = q__1.i;
+		temp2.r = 0.f, temp2.i = 0.f;
+		i__2 = j;
+		i__3 = j;
+		i__4 = j + j * a_dim1;
+		r__1 = a[i__4].r;
+		q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
+		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
 		i__2 = *n;
 		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    y[i__] += temp1 * a[i__ + j * a_dim1];
-		    temp2 += a[i__ + j * a_dim1] * x[i__];
+		    i__3 = i__;
+		    i__4 = i__;
+		    i__5 = i__ + j * a_dim1;
+		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+			    .r;
+		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+		    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+		    i__3 = i__;
+		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
+			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+		    temp2.r = q__1.r, temp2.i = q__1.i;
 /* L90: */
 		}
-		y[j] += *alpha * temp2;
+		i__2 = j;
+		i__3 = j;
+		q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i =
+			alpha->r * temp2.i + alpha->i * temp2.r;
+		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
 /* L100: */
 	    }
 	} else {
@@ -1700,20 +2129,46 @@ L40:
 	    jy = ky;
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		temp1 = *alpha * x[jx];
-		temp2 = 0.;
-		y[jy] += temp1 * a[j + j * a_dim1];
+		i__2 = jx;
+		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+		temp1.r = q__1.r, temp1.i = q__1.i;
+		temp2.r = 0.f, temp2.i = 0.f;
+		i__2 = jy;
+		i__3 = jy;
+		i__4 = j + j * a_dim1;
+		r__1 = a[i__4].r;
+		q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
+		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
 		ix = jx;
 		iy = jy;
 		i__2 = *n;
 		for (i__ = j + 1; i__ <= i__2; ++i__) {
 		    ix += *incx;
 		    iy += *incy;
-		    y[iy] += temp1 * a[i__ + j * a_dim1];
-		    temp2 += a[i__ + j * a_dim1] * x[ix];
+		    i__3 = iy;
+		    i__4 = iy;
+		    i__5 = i__ + j * a_dim1;
+		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+			    .r;
+		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+		    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+		    i__3 = ix;
+		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
+			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+		    temp2.r = q__1.r, temp2.i = q__1.i;
 /* L110: */
 		}
-		y[jy] += *alpha * temp2;
+		i__2 = jy;
+		i__3 = jy;
+		q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i =
+			alpha->r * temp2.i + alpha->i * temp2.r;
+		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
 		jx += *incx;
 		jy += *incy;
 /* L120: */
@@ -1723,20 +2178,24 @@ L40:
 
     return 0;
 
-/*     End of DSYMV . */
+/*     End of CHEMV . */
 
-} /* dsymv_ */
+} /* chemv_ */
 
-/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha,
-	doublereal *x, integer *incx, doublereal *y, integer *incy,
-	doublereal *a, integer *lda)
+/* Subroutine */ int cher2_(char *uplo, integer *n, complex *alpha, complex *
+	x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+    real r__1;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
     static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
+    static complex temp1, temp2;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
@@ -1745,12 +2204,12 @@ L40:
     Purpose
     =======
 
-    DSYR2  performs the symmetric rank 2 operation
+    CHER2  performs the hermitian rank 2 operation
 
-       A := alpha*x*y' + alpha*y*x' + A,
+       A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
 
     where alpha is a scalar, x and y are n element vectors and A is an n
-    by n symmetric matrix.
+    by n hermitian matrix.
 
     Parameters
     ==========
@@ -1773,11 +2232,11 @@ L40:
              N must be at least zero.
              Unchanged on exit.
 
-    ALPHA  - DOUBLE PRECISION.
+    ALPHA  - COMPLEX         .
              On entry, ALPHA specifies the scalar alpha.
              Unchanged on exit.
 
-    X      - DOUBLE PRECISION array of dimension at least
+    X      - COMPLEX          array of dimension at least
              ( 1 + ( n - 1 )*abs( INCX ) ).
              Before entry, the incremented array X must contain the n
              element vector x.
@@ -1788,7 +2247,7 @@ L40:
              X. INCX must not be zero.
              Unchanged on exit.
 
-    Y      - DOUBLE PRECISION array of dimension at least
+    Y      - COMPLEX          array of dimension at least
              ( 1 + ( n - 1 )*abs( INCY ) ).
              Before entry, the incremented array Y must contain the n
              element vector y.
@@ -1799,19 +2258,22 @@ L40:
              Y. INCY must not be zero.
              Unchanged on exit.
 
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
              Before entry with  UPLO = 'U' or 'u', the leading n by n
              upper triangular part of the array A must contain the upper
-             triangular part of the symmetric matrix and the strictly
+             triangular part of the hermitian matrix and the strictly
              lower triangular part of A is not referenced. On exit, the
              upper triangular part of the array A is overwritten by the
              upper triangular part of the updated matrix.
              Before entry with UPLO = 'L' or 'l', the leading n by n
              lower triangular part of the array A must contain the lower
-             triangular part of the symmetric matrix and the strictly
+             triangular part of the hermitian matrix and the strictly
              upper triangular part of A is not referenced. On exit, the
              lower triangular part of the array A is overwritten by the
              lower triangular part of the updated matrix.
+             Note that the imaginary parts of the diagonal elements need
+             not be set, they are assumed to be zero, and on exit they
+             are set to zero.
 
     LDA    - INTEGER.
              On entry, LDA specifies the first dimension of A as declared
@@ -1853,13 +2315,13 @@ L40:
 	info = 9;
     }
     if (info != 0) {
-	xerbla_("DSYR2 ", &info);
+	xerbla_("CHER2 ", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if (*n == 0 || *alpha == 0.) {
+    if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f) {
 	return 0;
     }
 
@@ -1896,34 +2358,116 @@ L40:
 	if (*incx == 1 && *incy == 1) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (x[j] != 0. || y[j] != 0.) {
-		    temp1 = *alpha * y[j];
-		    temp2 = *alpha * x[j];
+		i__2 = j;
+		i__3 = j;
+		if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f
+			|| y[i__3].i != 0.f)) {
+		    r_cnjg(&q__2, &y[j]);
+		    q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
+			    alpha->r * q__2.i + alpha->i * q__2.r;
+		    temp1.r = q__1.r, temp1.i = q__1.i;
 		    i__2 = j;
+		    q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    r_cnjg(&q__1, &q__2);
+		    temp2.r = q__1.r, temp2.i = q__1.i;
+		    i__2 = j - 1;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
-				temp1 + y[i__] * temp2;
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__ + j * a_dim1;
+			i__5 = i__;
+			q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
+				q__3.i = x[i__5].r * temp1.i + x[i__5].i *
+				temp1.r;
+			q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i +
+				q__3.i;
+			i__6 = i__;
+			q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
+				q__4.i = y[i__6].r * temp2.i + y[i__6].i *
+				temp2.r;
+			q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
 /* L10: */
 		    }
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    i__4 = j;
+		    q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
+			    q__2.i = x[i__4].r * temp1.i + x[i__4].i *
+			    temp1.r;
+		    i__5 = j;
+		    q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
+			    q__3.i = y[i__5].r * temp2.i + y[i__5].i *
+			    temp2.r;
+		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+		    r__1 = a[i__3].r + q__1.r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    r__1 = a[i__3].r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
 		}
 /* L20: */
 	    }
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (x[jx] != 0. || y[jy] != 0.) {
-		    temp1 = *alpha * y[jy];
-		    temp2 = *alpha * x[jx];
+		i__2 = jx;
+		i__3 = jy;
+		if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f
+			|| y[i__3].i != 0.f)) {
+		    r_cnjg(&q__2, &y[jy]);
+		    q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
+			    alpha->r * q__2.i + alpha->i * q__2.r;
+		    temp1.r = q__1.r, temp1.i = q__1.i;
+		    i__2 = jx;
+		    q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    r_cnjg(&q__1, &q__2);
+		    temp2.r = q__1.r, temp2.i = q__1.i;
 		    ix = kx;
 		    iy = ky;
-		    i__2 = j;
+		    i__2 = j - 1;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
-				temp1 + y[iy] * temp2;
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__ + j * a_dim1;
+			i__5 = ix;
+			q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
+				q__3.i = x[i__5].r * temp1.i + x[i__5].i *
+				temp1.r;
+			q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i +
+				q__3.i;
+			i__6 = iy;
+			q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
+				q__4.i = y[i__6].r * temp2.i + y[i__6].i *
+				temp2.r;
+			q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
 			ix += *incx;
 			iy += *incy;
 /* L30: */
 		    }
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    i__4 = jx;
+		    q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
+			    q__2.i = x[i__4].r * temp1.i + x[i__4].i *
+			    temp1.r;
+		    i__5 = jy;
+		    q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
+			    q__3.i = y[i__5].r * temp2.i + y[i__5].i *
+			    temp2.r;
+		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+		    r__1 = a[i__3].r + q__1.r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    r__1 = a[i__3].r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
 		}
 		jx += *incx;
 		jy += *incy;
@@ -1937,34 +2481,116 @@ L40:
 	if (*incx == 1 && *incy == 1) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (x[j] != 0. || y[j] != 0.) {
-		    temp1 = *alpha * y[j];
-		    temp2 = *alpha * x[j];
+		i__2 = j;
+		i__3 = j;
+		if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f
+			|| y[i__3].i != 0.f)) {
+		    r_cnjg(&q__2, &y[j]);
+		    q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
+			    alpha->r * q__2.i + alpha->i * q__2.r;
+		    temp1.r = q__1.r, temp1.i = q__1.i;
+		    i__2 = j;
+		    q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    r_cnjg(&q__1, &q__2);
+		    temp2.r = q__1.r, temp2.i = q__1.i;
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    i__4 = j;
+		    q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
+			    q__2.i = x[i__4].r * temp1.i + x[i__4].i *
+			    temp1.r;
+		    i__5 = j;
+		    q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
+			    q__3.i = y[i__5].r * temp2.i + y[i__5].i *
+			    temp2.r;
+		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+		    r__1 = a[i__3].r + q__1.r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
 		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
-				temp1 + y[i__] * temp2;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__ + j * a_dim1;
+			i__5 = i__;
+			q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
+				q__3.i = x[i__5].r * temp1.i + x[i__5].i *
+				temp1.r;
+			q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i +
+				q__3.i;
+			i__6 = i__;
+			q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
+				q__4.i = y[i__6].r * temp2.i + y[i__6].i *
+				temp2.r;
+			q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
 /* L50: */
 		    }
+		} else {
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    r__1 = a[i__3].r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
 		}
 /* L60: */
 	    }
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (x[jx] != 0. || y[jy] != 0.) {
-		    temp1 = *alpha * y[jy];
-		    temp2 = *alpha * x[jx];
+		i__2 = jx;
+		i__3 = jy;
+		if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f
+			|| y[i__3].i != 0.f)) {
+		    r_cnjg(&q__2, &y[jy]);
+		    q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
+			    alpha->r * q__2.i + alpha->i * q__2.r;
+		    temp1.r = q__1.r, temp1.i = q__1.i;
+		    i__2 = jx;
+		    q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    r_cnjg(&q__1, &q__2);
+		    temp2.r = q__1.r, temp2.i = q__1.i;
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    i__4 = jx;
+		    q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
+			    q__2.i = x[i__4].r * temp1.i + x[i__4].i *
+			    temp1.r;
+		    i__5 = jy;
+		    q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
+			    q__3.i = y[i__5].r * temp2.i + y[i__5].i *
+			    temp2.r;
+		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+		    r__1 = a[i__3].r + q__1.r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
 		    ix = jx;
 		    iy = jy;
 		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
-				temp1 + y[iy] * temp2;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
 			ix += *incx;
 			iy += *incy;
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__ + j * a_dim1;
+			i__5 = ix;
+			q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
+				q__3.i = x[i__5].r * temp1.i + x[i__5].i *
+				temp1.r;
+			q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i +
+				q__3.i;
+			i__6 = iy;
+			q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
+				q__4.i = y[i__6].r * temp2.i + y[i__6].i *
+				temp2.r;
+			q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
 /* L70: */
 		    }
+		} else {
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    r__1 = a[i__3].r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
 		}
 		jx += *incx;
 		jy += *incy;
@@ -1975,21 +2601,26 @@ L40:
 
     return 0;
 
-/*     End of DSYR2 . */
+/*     End of CHER2 . */
 
-} /* dsyr2_ */
+} /* cher2_ */
 
-/* Subroutine */ int dsyr2k_(char *uplo, char *trans, integer *n, integer *k,
-	doublereal *alpha, doublereal *a, integer *lda, doublereal *b,
-	integer *ldb, doublereal *beta, doublereal *c__, integer *ldc)
+/* Subroutine */ int cher2k_(char *uplo, char *trans, integer *n, integer *k,
+	complex *alpha, complex *a, integer *lda, complex *b, integer *ldb,
+	real *beta, complex *c__, integer *ldc)
 {
     /* System generated locals */
     integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
-	    i__3;
+	    i__3, i__4, i__5, i__6, i__7;
+    real r__1;
+    complex q__1, q__2, q__3, q__4, q__5, q__6;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
     static integer i__, j, l, info;
-    static doublereal temp1, temp2;
+    static complex temp1, temp2;
     extern logical lsame_(char *, char *);
     static integer nrowa;
     static logical upper;
@@ -2000,17 +2631,17 @@ L40:
     Purpose
     =======
 
-    DSYR2K  performs one of the symmetric rank 2k operations
+    CHER2K  performs one of the hermitian rank 2k operations
 
-       C := alpha*A*B' + alpha*B*A' + beta*C,
+       C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C,
 
     or
 
-       C := alpha*A'*B + alpha*B'*A + beta*C,
+       C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C,
 
-    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-    and  A and B  are  n by k  matrices  in the  first  case  and  k by n
-    matrices in the second case.
+    where  alpha and beta  are scalars with  beta  real,  C is an  n by n
+    hermitian matrix and  A and B  are  n by k matrices in the first case
+    and  k by n  matrices in the second case.
 
     Parameters
     ==========
@@ -2032,14 +2663,13 @@ L40:
              On entry,  TRANS  specifies the operation to be performed as
              follows:
 
-                TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +
-                                          beta*C.
-
-                TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +
-                                          beta*C.
+                TRANS = 'N' or 'n'    C := alpha*A*conjg( B' )          +
+                                           conjg( alpha )*B*conjg( A' ) +
+                                           beta*C.
 
-                TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +
-                                          beta*C.
+                TRANS = 'C' or 'c'    C := alpha*conjg( A' )*B          +
+                                           conjg( alpha )*conjg( B' )*A +
+                                           beta*C.
 
              Unchanged on exit.
 
@@ -2051,15 +2681,15 @@ L40:
     K      - INTEGER.
              On entry with  TRANS = 'N' or 'n',  K  specifies  the number
              of  columns  of the  matrices  A and B,  and on  entry  with
-             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-             of rows of the matrices  A and B.  K must be at least  zero.
+             TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+             matrices  A and B.  K must be at least zero.
              Unchanged on exit.
 
-    ALPHA  - DOUBLE PRECISION.
+    ALPHA  - COMPLEX         .
              On entry, ALPHA specifies the scalar alpha.
              Unchanged on exit.
 
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+    A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
              k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
              Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
              part of the array  A  must contain the matrix  A,  otherwise
@@ -2074,7 +2704,7 @@ L40:
              be at least  max( 1, k ).
              Unchanged on exit.
 
-    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
+    B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is
              k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
              Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
              part of the array  B  must contain the matrix  B,  otherwise
@@ -2089,23 +2719,26 @@ L40:
              be at least  max( 1, k ).
              Unchanged on exit.
 
-    BETA   - DOUBLE PRECISION.
+    BETA   - REAL            .
              On entry, BETA specifies the scalar beta.
              Unchanged on exit.
 
-    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+    C      - COMPLEX          array of DIMENSION ( LDC, n ).
              Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
              upper triangular part of the array C must contain the upper
-             triangular part  of the  symmetric matrix  and the strictly
+             triangular part  of the  hermitian matrix  and the strictly
              lower triangular part of C is not referenced.  On exit, the
              upper triangular part of the array  C is overwritten by the
              upper triangular part of the updated matrix.
              Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
              lower triangular part of the array C must contain the lower
-             triangular part  of the  symmetric matrix  and the strictly
+             triangular part  of the  hermitian matrix  and the strictly
              upper triangular part of C is not referenced.  On exit, the
              lower triangular part of the array  C is overwritten by the
              lower triangular part of the updated matrix.
+             Note that the imaginary parts of the diagonal elements need
+             not be set,  they are assumed to be zero,  and on exit they
+             are set to zero.
 
     LDC    - INTEGER.
              On entry, LDC specifies the first dimension of C as declared
@@ -2116,13 +2749,15 @@ L40:
 
     Level 3 Blas routine.
 
-
     -- Written on 8-February-1989.
        Jack Dongarra, Argonne National Laboratory.
        Iain Duff, AERE Harwell.
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
+       Ed Anderson, Cray Research Inc.
+
 
        Test the input parameters.
 */
@@ -2150,7 +2785,7 @@ L40:
     if (! upper && ! lsame_(uplo, "L")) {
 	info = 1;
     } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "T") && ! lsame_(trans, "C")) {
+	    "C")) {
 	info = 2;
     } else if (*n < 0) {
 	info = 3;
@@ -2164,26 +2799,28 @@ L40:
 	info = 12;
     }
     if (info != 0) {
-	xerbla_("DSYR2K", &info);
+	xerbla_("CHER2K", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
+    if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) && *beta ==
+	     1.f) {
 	return 0;
     }
 
 /*     And when  alpha.eq.zero. */
 
-    if (*alpha == 0.) {
+    if (alpha->r == 0.f && alpha->i == 0.f) {
 	if (upper) {
-	    if (*beta == 0.) {
+	    if (*beta == 0.f) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
 /* L10: */
 		    }
 /* L20: */
@@ -2191,21 +2828,30 @@ L40:
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
+		    i__2 = j - 1;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 /* L30: */
 		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 /* L40: */
 		}
 	    }
 	} else {
-	    if (*beta == 0.) {
+	    if (*beta == 0.f) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = *n;
 		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
 /* L50: */
 		    }
 /* L60: */
@@ -2213,9 +2859,17 @@ L40:
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 /* L70: */
 		    }
 /* L80: */
@@ -2229,36 +2883,90 @@ L40:
 
     if (lsame_(trans, "N")) {
 
-/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
+/*
+          Form  C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) +
+                     C.
+*/
 
 	if (upper) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.) {
+		if (*beta == 0.f) {
 		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
 /* L90: */
 		    }
-		} else if (*beta != 1.) {
-		    i__2 = j;
+		} else if (*beta != 1.f) {
+		    i__2 = j - 1;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 /* L100: */
 		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		}
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
-		    if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) {
-			temp1 = *alpha * b[j + l * b_dim1];
-			temp2 = *alpha * a[j + l * a_dim1];
-			i__3 = j;
+		    i__3 = j + l * a_dim1;
+		    i__4 = j + l * b_dim1;
+		    if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r !=
+			    0.f || b[i__4].i != 0.f)) {
+			r_cnjg(&q__2, &b[j + l * b_dim1]);
+			q__1.r = alpha->r * q__2.r - alpha->i * q__2.i,
+				q__1.i = alpha->r * q__2.i + alpha->i *
+				q__2.r;
+			temp1.r = q__1.r, temp1.i = q__1.i;
+			i__3 = j + l * a_dim1;
+			q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i,
+				q__2.i = alpha->r * a[i__3].i + alpha->i * a[
+				i__3].r;
+			r_cnjg(&q__1, &q__2);
+			temp2.r = q__1.r, temp2.i = q__1.i;
+			i__3 = j - 1;
 			for (i__ = 1; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-				    i__ + l * a_dim1] * temp1 + b[i__ + l *
-				    b_dim1] * temp2;
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__3.r = a[i__6].r * temp1.r - a[i__6].i *
+				    temp1.i, q__3.i = a[i__6].r * temp1.i + a[
+				    i__6].i * temp1.r;
+			    q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
+				    .i + q__3.i;
+			    i__7 = i__ + l * b_dim1;
+			    q__4.r = b[i__7].r * temp2.r - b[i__7].i *
+				    temp2.i, q__4.i = b[i__7].r * temp2.i + b[
+				    i__7].i * temp2.r;
+			    q__1.r = q__2.r + q__4.r, q__1.i = q__2.i +
+				    q__4.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
 /* L110: */
 			}
+			i__3 = j + j * c_dim1;
+			i__4 = j + j * c_dim1;
+			i__5 = j + l * a_dim1;
+			q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i,
+				q__2.i = a[i__5].r * temp1.i + a[i__5].i *
+				temp1.r;
+			i__6 = j + l * b_dim1;
+			q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i,
+				q__3.i = b[i__6].r * temp2.i + b[i__6].i *
+				temp2.r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			r__1 = c__[i__4].r + q__1.r;
+			c__[i__3].r = r__1, c__[i__3].i = 0.f;
 		    }
 /* L120: */
 		}
@@ -2267,31 +2975,82 @@ L40:
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.) {
+		if (*beta == 0.f) {
 		    i__2 = *n;
 		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
 /* L140: */
 		    }
-		} else if (*beta != 1.) {
+		} else if (*beta != 1.f) {
 		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 /* L150: */
 		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		}
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
-		    if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) {
-			temp1 = *alpha * b[j + l * b_dim1];
-			temp2 = *alpha * a[j + l * a_dim1];
+		    i__3 = j + l * a_dim1;
+		    i__4 = j + l * b_dim1;
+		    if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r !=
+			    0.f || b[i__4].i != 0.f)) {
+			r_cnjg(&q__2, &b[j + l * b_dim1]);
+			q__1.r = alpha->r * q__2.r - alpha->i * q__2.i,
+				q__1.i = alpha->r * q__2.i + alpha->i *
+				q__2.r;
+			temp1.r = q__1.r, temp1.i = q__1.i;
+			i__3 = j + l * a_dim1;
+			q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i,
+				q__2.i = alpha->r * a[i__3].i + alpha->i * a[
+				i__3].r;
+			r_cnjg(&q__1, &q__2);
+			temp2.r = q__1.r, temp2.i = q__1.i;
 			i__3 = *n;
-			for (i__ = j; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-				    i__ + l * a_dim1] * temp1 + b[i__ + l *
-				    b_dim1] * temp2;
+			for (i__ = j + 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__3.r = a[i__6].r * temp1.r - a[i__6].i *
+				    temp1.i, q__3.i = a[i__6].r * temp1.i + a[
+				    i__6].i * temp1.r;
+			    q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
+				    .i + q__3.i;
+			    i__7 = i__ + l * b_dim1;
+			    q__4.r = b[i__7].r * temp2.r - b[i__7].i *
+				    temp2.i, q__4.i = b[i__7].r * temp2.i + b[
+				    i__7].i * temp2.r;
+			    q__1.r = q__2.r + q__4.r, q__1.i = q__2.i +
+				    q__4.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
 /* L160: */
 			}
+			i__3 = j + j * c_dim1;
+			i__4 = j + j * c_dim1;
+			i__5 = j + l * a_dim1;
+			q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i,
+				q__2.i = a[i__5].r * temp1.i + a[i__5].i *
+				temp1.r;
+			i__6 = j + l * b_dim1;
+			q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i,
+				q__3.i = b[i__6].r * temp2.i + b[i__6].i *
+				temp2.r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			r__1 = c__[i__4].r + q__1.r;
+			c__[i__3].r = r__1, c__[i__3].i = 0.f;
 		    }
 /* L170: */
 		}
@@ -2300,27 +3059,96 @@ L40:
 	}
     } else {
 
-/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
+/*
+          Form  C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A +
+                     C.
+*/
 
 	if (upper) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = j;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp1 = 0.;
-		    temp2 = 0.;
+		    temp1.r = 0.f, temp1.i = 0.f;
+		    temp2.r = 0.f, temp2.i = 0.f;
 		    i__3 = *k;
 		    for (l = 1; l <= i__3; ++l) {
-			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * b_dim1;
+			q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
+				q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
+				.r;
+			q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
+			temp1.r = q__1.r, temp1.i = q__1.i;
+			r_cnjg(&q__3, &b[l + i__ * b_dim1]);
+			i__4 = l + j * a_dim1;
+			q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i,
+				q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
+				.r;
+			q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+			temp2.r = q__1.r, temp2.i = q__1.i;
 /* L190: */
 		    }
-		    if (*beta == 0.) {
-			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
-				temp2;
+		    if (i__ == j) {
+			if (*beta == 0.f) {
+			    i__3 = j + j * c_dim1;
+			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    r_cnjg(&q__4, alpha);
+			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
+				    q__3.i = q__4.r * temp2.i + q__4.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    r__1 = q__1.r;
+			    c__[i__3].r = r__1, c__[i__3].i = 0.f;
+			} else {
+			    i__3 = j + j * c_dim1;
+			    i__4 = j + j * c_dim1;
+			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    r_cnjg(&q__4, alpha);
+			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
+				    q__3.i = q__4.r * temp2.i + q__4.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    r__1 = *beta * c__[i__4].r + q__1.r;
+			    c__[i__3].r = r__1, c__[i__3].i = 0.f;
+			}
 		    } else {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
-				+ *alpha * temp1 + *alpha * temp2;
+			if (*beta == 0.f) {
+			    i__3 = i__ + j * c_dim1;
+			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    r_cnjg(&q__4, alpha);
+			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
+				    q__3.i = q__4.r * temp2.i + q__4.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			} else {
+			    i__3 = i__ + j * c_dim1;
+			    i__4 = i__ + j * c_dim1;
+			    q__3.r = *beta * c__[i__4].r, q__3.i = *beta *
+				    c__[i__4].i;
+			    q__4.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__4.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i +
+				    q__4.i;
+			    r_cnjg(&q__6, alpha);
+			    q__5.r = q__6.r * temp2.r - q__6.i * temp2.i,
+				    q__5.i = q__6.r * temp2.i + q__6.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i +
+				    q__5.i;
+			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			}
 		    }
 /* L200: */
 		}
@@ -2331,20 +3159,86 @@ L40:
 	    for (j = 1; j <= i__1; ++j) {
 		i__2 = *n;
 		for (i__ = j; i__ <= i__2; ++i__) {
-		    temp1 = 0.;
-		    temp2 = 0.;
+		    temp1.r = 0.f, temp1.i = 0.f;
+		    temp2.r = 0.f, temp2.i = 0.f;
 		    i__3 = *k;
 		    for (l = 1; l <= i__3; ++l) {
-			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * b_dim1;
+			q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
+				q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
+				.r;
+			q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
+			temp1.r = q__1.r, temp1.i = q__1.i;
+			r_cnjg(&q__3, &b[l + i__ * b_dim1]);
+			i__4 = l + j * a_dim1;
+			q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i,
+				q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
+				.r;
+			q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+			temp2.r = q__1.r, temp2.i = q__1.i;
 /* L220: */
 		    }
-		    if (*beta == 0.) {
-			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
-				temp2;
+		    if (i__ == j) {
+			if (*beta == 0.f) {
+			    i__3 = j + j * c_dim1;
+			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    r_cnjg(&q__4, alpha);
+			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
+				    q__3.i = q__4.r * temp2.i + q__4.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    r__1 = q__1.r;
+			    c__[i__3].r = r__1, c__[i__3].i = 0.f;
+			} else {
+			    i__3 = j + j * c_dim1;
+			    i__4 = j + j * c_dim1;
+			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    r_cnjg(&q__4, alpha);
+			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
+				    q__3.i = q__4.r * temp2.i + q__4.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    r__1 = *beta * c__[i__4].r + q__1.r;
+			    c__[i__3].r = r__1, c__[i__3].i = 0.f;
+			}
 		    } else {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
-				+ *alpha * temp1 + *alpha * temp2;
+			if (*beta == 0.f) {
+			    i__3 = i__ + j * c_dim1;
+			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    r_cnjg(&q__4, alpha);
+			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
+				    q__3.i = q__4.r * temp2.i + q__4.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			} else {
+			    i__3 = i__ + j * c_dim1;
+			    i__4 = i__ + j * c_dim1;
+			    q__3.r = *beta * c__[i__4].r, q__3.i = *beta *
+				    c__[i__4].i;
+			    q__4.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__4.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i +
+				    q__4.i;
+			    r_cnjg(&q__6, alpha);
+			    q__5.r = q__6.r * temp2.r - q__6.i * temp2.i,
+				    q__5.i = q__6.r * temp2.i + q__6.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i +
+				    q__5.i;
+			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			}
 		    }
 /* L230: */
 		}
@@ -2355,22 +3249,29 @@ L40:
 
     return 0;
 
-/*     End of DSYR2K. */
+/*     End of CHER2K. */
 
-} /* dsyr2k_ */
+} /* cher2k_ */
 
-/* Subroutine */ int dsyrk_(char *uplo, char *trans, integer *n, integer *k,
-	doublereal *alpha, doublereal *a, integer *lda, doublereal *beta,
-	doublereal *c__, integer *ldc)
+/* Subroutine */ int cherk_(char *uplo, char *trans, integer *n, integer *k,
+	real *alpha, complex *a, integer *lda, real *beta, complex *c__,
+	integer *ldc)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5,
+	    i__6;
+    real r__1;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
     static integer i__, j, l, info;
-    static doublereal temp;
+    static complex temp;
     extern logical lsame_(char *, char *);
     static integer nrowa;
+    static real rtemp;
     static logical upper;
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
@@ -2379,17 +3280,17 @@ L40:
     Purpose
     =======
 
-    DSYRK  performs one of the symmetric rank k operations
+    CHERK  performs one of the hermitian rank k operations
 
-       C := alpha*A*A' + beta*C,
+       C := alpha*A*conjg( A' ) + beta*C,
 
     or
 
-       C := alpha*A'*A + beta*C,
+       C := alpha*conjg( A' )*A + beta*C,
 
-    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-    and  A  is an  n by k  matrix in the first case and a  k by n  matrix
-    in the second case.
+    where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
+    matrix and  A  is an  n by k  matrix in the  first case and a  k by n
+    matrix in the second case.
 
     Parameters
     ==========
@@ -2411,11 +3312,9 @@ L40:
              On entry,  TRANS  specifies the operation to be performed as
              follows:
 
-                TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
-
-                TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+                TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C.
 
-                TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C.
+                TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C.
 
              Unchanged on exit.
 
@@ -2427,15 +3326,15 @@ L40:
     K      - INTEGER.
              On entry with  TRANS = 'N' or 'n',  K  specifies  the number
              of  columns   of  the   matrix   A,   and  on   entry   with
-             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-             of rows of the matrix  A.  K must be at least zero.
+             TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+             matrix A.  K must be at least zero.
              Unchanged on exit.
 
-    ALPHA  - DOUBLE PRECISION.
+    ALPHA  - REAL            .
              On entry, ALPHA specifies the scalar alpha.
              Unchanged on exit.
 
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+    A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
              k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
              Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
              part of the array  A  must contain the matrix  A,  otherwise
@@ -2450,23 +3349,26 @@ L40:
              be at least  max( 1, k ).
              Unchanged on exit.
 
-    BETA   - DOUBLE PRECISION.
+    BETA   - REAL            .
              On entry, BETA specifies the scalar beta.
              Unchanged on exit.
 
-    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+    C      - COMPLEX          array of DIMENSION ( LDC, n ).
              Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
              upper triangular part of the array C must contain the upper
-             triangular part  of the  symmetric matrix  and the strictly
+             triangular part  of the  hermitian matrix  and the strictly
              lower triangular part of C is not referenced.  On exit, the
              upper triangular part of the array  C is overwritten by the
              upper triangular part of the updated matrix.
              Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
              lower triangular part of the array C must contain the lower
-             triangular part  of the  symmetric matrix  and the strictly
+             triangular part  of the  hermitian matrix  and the strictly
              upper triangular part of C is not referenced.  On exit, the
              lower triangular part of the array  C is overwritten by the
              lower triangular part of the updated matrix.
+             Note that the imaginary parts of the diagonal elements need
+             not be set,  they are assumed to be zero,  and on exit they
+             are set to zero.
 
     LDC    - INTEGER.
              On entry, LDC specifies the first dimension of C as declared
@@ -2483,6 +3385,9 @@ L40:
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
+       Ed Anderson, Cray Research Inc.
+
 
        Test the input parameters.
 */
@@ -2507,7 +3412,7 @@ L40:
     if (! upper && ! lsame_(uplo, "L")) {
 	info = 1;
     } else if (! lsame_(trans, "N") && ! lsame_(trans,
-	    "T") && ! lsame_(trans, "C")) {
+	    "C")) {
 	info = 2;
     } else if (*n < 0) {
 	info = 3;
@@ -2519,26 +3424,27 @@ L40:
 	info = 10;
     }
     if (info != 0) {
-	xerbla_("DSYRK ", &info);
+	xerbla_("CHERK ", &info);
 	return 0;
     }
 
 /*     Quick return if possible. */
 
-    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
+    if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
 	return 0;
     }
 
 /*     And when  alpha.eq.zero. */
 
-    if (*alpha == 0.) {
+    if (*alpha == 0.f) {
 	if (upper) {
-	    if (*beta == 0.) {
+	    if (*beta == 0.f) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
 /* L10: */
 		    }
 /* L20: */
@@ -2546,21 +3452,30 @@ L40:
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = j;
+		    i__2 = j - 1;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 /* L30: */
 		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 /* L40: */
 		}
 	    }
 	} else {
-	    if (*beta == 0.) {
+	    if (*beta == 0.f) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = *n;
 		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
 /* L50: */
 		    }
 /* L60: */
@@ -2568,9 +3483,17 @@ L40:
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 /* L70: */
 		    }
 /* L80: */
@@ -2584,34 +3507,66 @@ L40:
 
     if (lsame_(trans, "N")) {
 
-/*        Form  C := alpha*A*A' + beta*C. */
+/*        Form  C := alpha*A*conjg( A' ) + beta*C. */
 
 	if (upper) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.) {
+		if (*beta == 0.f) {
 		    i__2 = j;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
 /* L90: */
 		    }
-		} else if (*beta != 1.) {
-		    i__2 = j;
+		} else if (*beta != 1.f) {
+		    i__2 = j - 1;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 /* L100: */
 		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		}
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
-		    if (a[j + l * a_dim1] != 0.) {
-			temp = *alpha * a[j + l * a_dim1];
-			i__3 = j;
+		    i__3 = j + l * a_dim1;
+		    if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
+			r_cnjg(&q__2, &a[j + l * a_dim1]);
+			q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+			i__3 = j - 1;
 			for (i__ = 1; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
-				    a_dim1];
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    q__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
+				    .i + q__2.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
 /* L110: */
 			}
+			i__3 = j + j * c_dim1;
+			i__4 = j + j * c_dim1;
+			i__5 = i__ + l * a_dim1;
+			q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				q__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
+				.r;
+			r__1 = c__[i__4].r + q__1.r;
+			c__[i__3].r = r__1, c__[i__3].i = 0.f;
 		    }
 /* L120: */
 		}
@@ -2620,27 +3575,59 @@ L40:
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		if (*beta == 0.) {
+		if (*beta == 0.f) {
 		    i__2 = *n;
 		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = 0.;
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
 /* L140: */
 		    }
-		} else if (*beta != 1.) {
+		} else if (*beta != 1.f) {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		    i__2 = *n;
-		    for (i__ = j; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 /* L150: */
 		    }
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
 		}
 		i__2 = *k;
 		for (l = 1; l <= i__2; ++l) {
-		    if (a[j + l * a_dim1] != 0.) {
-			temp = *alpha * a[j + l * a_dim1];
+		    i__3 = j + l * a_dim1;
+		    if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
+			r_cnjg(&q__2, &a[j + l * a_dim1]);
+			q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+			i__3 = j + j * c_dim1;
+			i__4 = j + j * c_dim1;
+			i__5 = j + l * a_dim1;
+			q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				q__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
+				.r;
+			r__1 = c__[i__4].r + q__1.r;
+			c__[i__3].r = r__1, c__[i__3].i = 0.f;
 			i__3 = *n;
-			for (i__ = j; i__ <= i__3; ++i__) {
-			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
-				    a_dim1];
+			for (i__ = j + 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    q__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
+				    .i + q__2.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
 /* L160: */
 			}
 		    }
@@ -2651,89 +3638,361 @@ L40:
 	}
     } else {
 
-/*        Form  C := alpha*A'*A + beta*C. */
+/*        Form  C := alpha*conjg( A' )*A + beta*C. */
 
 	if (upper) {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
+		i__2 = j - 1;
 		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp = 0.;
+		    temp.r = 0.f, temp.i = 0.f;
 		    i__3 = *k;
 		    for (l = 1; l <= i__3; ++l) {
-			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * a_dim1;
+			q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i,
+				q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
+				.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
 /* L190: */
 		    }
-		    if (*beta == 0.) {
-			c__[i__ + j * c_dim1] = *alpha * temp;
+		    if (*beta == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 		    } else {
-			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-				i__ + j * c_dim1];
+			i__3 = i__ + j * c_dim1;
+			q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[
+				i__4].i;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 		    }
 /* L200: */
 		}
+		rtemp = 0.f;
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    r_cnjg(&q__3, &a[l + j * a_dim1]);
+		    i__3 = l + j * a_dim1;
+		    q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i =
+			     q__3.r * a[i__3].i + q__3.i * a[i__3].r;
+		    q__1.r = rtemp + q__2.r, q__1.i = q__2.i;
+		    rtemp = q__1.r;
 /* L210: */
+		}
+		if (*beta == 0.f) {
+		    i__2 = j + j * c_dim1;
+		    r__1 = *alpha * rtemp;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *alpha * rtemp + *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		}
+/* L220: */
 	    }
 	} else {
 	    i__1 = *n;
 	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *n;
-		for (i__ = j; i__ <= i__2; ++i__) {
-		    temp = 0.;
+		rtemp = 0.f;
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    r_cnjg(&q__3, &a[l + j * a_dim1]);
+		    i__3 = l + j * a_dim1;
+		    q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i =
+			     q__3.r * a[i__3].i + q__3.i * a[i__3].r;
+		    q__1.r = rtemp + q__2.r, q__1.i = q__2.i;
+		    rtemp = q__1.r;
+/* L230: */
+		}
+		if (*beta == 0.f) {
+		    i__2 = j + j * c_dim1;
+		    r__1 = *alpha * rtemp;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *alpha * rtemp + *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		}
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
 		    i__3 = *k;
 		    for (l = 1; l <= i__3; ++l) {
-			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L220: */
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * a_dim1;
+			q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i,
+				q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
+				.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L240: */
 		    }
-		    if (*beta == 0.) {
-			c__[i__ + j * c_dim1] = *alpha * temp;
+		    if (*beta == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 		    } else {
-			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-				i__ + j * c_dim1];
+			i__3 = i__ + j * c_dim1;
+			q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[
+				i__4].i;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
 		    }
-/* L230: */
+/* L250: */
 		}
-/* L240: */
+/* L260: */
 	    }
 	}
     }
 
     return 0;
 
-/*     End of DSYRK . */
+/*     End of CHERK . */
 
-} /* dsyrk_ */
+} /* cherk_ */
 
-/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag,
-	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
-	lda, doublereal *b, integer *ldb)
+/* Subroutine */ int cscal_(integer *n, complex *ca, complex *cx, integer *
+	incx)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+    integer i__1, i__2, i__3, i__4;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__, nincx;
+
+
+/*
+       scales a vector by a constant.
+       jack dongarra, linpack,  3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cx;
+
+    /* Function Body */
+    if (*n <= 0 || *incx <= 0) {
+	return 0;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    nincx = *n * *incx;
+    i__1 = nincx;
+    i__2 = *incx;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	i__3 = i__;
+	i__4 = i__;
+	q__1.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__1.i = ca->r * cx[
+		i__4].i + ca->i * cx[i__4].r;
+	cx[i__3].r = q__1.r, cx[i__3].i = q__1.i;
+/* L10: */
+    }
+    return 0;
+
+/*        code for increment equal to 1 */
+
+L20:
+    i__2 = *n;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+	i__1 = i__;
+	i__3 = i__;
+	q__1.r = ca->r * cx[i__3].r - ca->i * cx[i__3].i, q__1.i = ca->r * cx[
+		i__3].i + ca->i * cx[i__3].r;
+	cx[i__1].r = q__1.r, cx[i__1].i = q__1.i;
+/* L30: */
+    }
+    return 0;
+} /* cscal_ */
+
+/* Subroutine */ int csscal_(integer *n, real *sa, complex *cx, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4;
+    real r__1, r__2;
+    complex q__1;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer i__, nincx;
+
+
+/*
+       scales a complex vector by a real constant.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cx;
+
+    /* Function Body */
+    if (*n <= 0 || *incx <= 0) {
+	return 0;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    nincx = *n * *incx;
+    i__1 = nincx;
+    i__2 = *incx;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	i__3 = i__;
+	i__4 = i__;
+	r__1 = *sa * cx[i__4].r;
+	r__2 = *sa * r_imag(&cx[i__]);
+	q__1.r = r__1, q__1.i = r__2;
+	cx[i__3].r = q__1.r, cx[i__3].i = q__1.i;
+/* L10: */
+    }
+    return 0;
+
+/*        code for increment equal to 1 */
+
+L20:
+    i__2 = *n;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+	i__1 = i__;
+	i__3 = i__;
+	r__1 = *sa * cx[i__3].r;
+	r__2 = *sa * r_imag(&cx[i__]);
+	q__1.r = r__1, q__1.i = r__2;
+	cx[i__1].r = q__1.r, cx[i__1].i = q__1.i;
+/* L30: */
+    }
+    return 0;
+} /* csscal_ */
+
+/* Subroutine */ int cswap_(integer *n, complex *cx, integer *incx, complex *
+	cy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static complex ctemp;
+
+
+/*
+       interchanges two vectors.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cy;
+    --cx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+         code for unequal increments or equal increments not equal
+           to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i;
+	i__2 = ix;
+	i__3 = iy;
+	cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i;
+	i__2 = iy;
+	cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*       code for both increments equal to 1 */
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i;
+	i__2 = i__;
+	i__3 = i__;
+	cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i;
+	i__2 = i__;
+	cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i;
+/* L30: */
+    }
+    return 0;
+} /* cswap_ */
+
+/* Subroutine */ int ctrmm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, complex *alpha, complex *a, integer *lda,
+	complex *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5,
+	    i__6;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
     static integer i__, j, k, info;
-    static doublereal temp;
-    static logical lside;
+    static complex temp;
     extern logical lsame_(char *, char *);
+    static logical lside;
     static integer nrowa;
     static logical upper;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical nounit;
+    static logical noconj, nounit;
 
 
 /*
     Purpose
     =======
 
-    DTRMM  performs one of the matrix-matrix operations
+    CTRMM  performs one of the matrix-matrix operations
 
-       B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
+       B := alpha*op( A )*B,   or   B := alpha*B*op( A )
 
     where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
     non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
 
-       op( A ) = A   or   op( A ) = A'.
+       op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
 
     Parameters
     ==========
@@ -2766,7 +4025,7 @@ L40:
 
                 TRANSA = 'T' or 't'   op( A ) = A'.
 
-                TRANSA = 'C' or 'c'   op( A ) = A'.
+                TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
 
              Unchanged on exit.
 
@@ -2791,13 +4050,13 @@ L40:
              at least zero.
              Unchanged on exit.
 
-    ALPHA  - DOUBLE PRECISION.
+    ALPHA  - COMPLEX         .
              On entry,  ALPHA specifies the scalar  alpha. When  alpha is
              zero then  A is not referenced and  B need not be set before
              entry.
              Unchanged on exit.
 
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
+    A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m
              when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
              Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
              upper triangular part of the array  A must contain the upper
@@ -2818,7 +4077,7 @@ L40:
              then LDA must be at least max( 1, n ).
              Unchanged on exit.
 
-    B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+    B      - COMPLEX          array of DIMENSION ( LDB, n ).
              Before entry,  the leading  m by n part of the array  B must
              contain the matrix  B,  and  on exit  is overwritten  by the
              transformed matrix.
@@ -2857,6 +4116,7 @@ L40:
     } else {
 	nrowa = *n;
     }
+    noconj = lsame_(transa, "T");
     nounit = lsame_(diag, "N");
     upper = lsame_(uplo, "U");
 
@@ -2881,7 +4141,7 @@ L40:
 	info = 11;
     }
     if (info != 0) {
-	xerbla_("DTRMM ", &info);
+	xerbla_("CTRMM ", &info);
 	return 0;
     }
 
@@ -2893,12 +4153,13 @@ L40:
 
 /*     And when  alpha.eq.zero. */
 
-    if (*alpha == 0.) {
+    if (alpha->r == 0.f && alpha->i == 0.f) {
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = *m;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
-		b[i__ + j * b_dim1] = 0.;
+		i__3 = i__ + j * b_dim1;
+		b[i__3].r = 0.f, b[i__3].i = 0.f;
 /* L10: */
 	    }
 /* L20: */
@@ -2918,18 +4179,35 @@ L40:
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = *m;
 		    for (k = 1; k <= i__2; ++k) {
-			if (b[k + j * b_dim1] != 0.) {
-			    temp = *alpha * b[k + j * b_dim1];
+			i__3 = k + j * b_dim1;
+			if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
+			    i__3 = k + j * b_dim1;
+			    q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
+				    .i, q__1.i = alpha->r * b[i__3].i +
+				    alpha->i * b[i__3].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
 			    i__3 = k - 1;
 			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] += temp * a[i__ + k *
-					a_dim1];
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = i__ + k * a_dim1;
+				q__2.r = temp.r * a[i__6].r - temp.i * a[i__6]
+					.i, q__2.i = temp.r * a[i__6].i +
+					temp.i * a[i__6].r;
+				q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
+					.i + q__2.i;
+				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
 /* L30: */
 			    }
 			    if (nounit) {
-				temp *= a[k + k * a_dim1];
+				i__3 = k + k * a_dim1;
+				q__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
+					.i, q__1.i = temp.r * a[i__3].i +
+					temp.i * a[i__3].r;
+				temp.r = q__1.r, temp.i = q__1.i;
 			    }
-			    b[k + j * b_dim1] = temp;
+			    i__3 = k + j * b_dim1;
+			    b[i__3].r = temp.r, b[i__3].i = temp.i;
 			}
 /* L40: */
 		    }
@@ -2939,16 +4217,35 @@ L40:
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    for (k = *m; k >= 1; --k) {
-			if (b[k + j * b_dim1] != 0.) {
-			    temp = *alpha * b[k + j * b_dim1];
-			    b[k + j * b_dim1] = temp;
+			i__2 = k + j * b_dim1;
+			if (b[i__2].r != 0.f || b[i__2].i != 0.f) {
+			    i__2 = k + j * b_dim1;
+			    q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2]
+				    .i, q__1.i = alpha->r * b[i__2].i +
+				    alpha->i * b[i__2].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    i__2 = k + j * b_dim1;
+			    b[i__2].r = temp.r, b[i__2].i = temp.i;
 			    if (nounit) {
-				b[k + j * b_dim1] *= a[k + k * a_dim1];
+				i__2 = k + j * b_dim1;
+				i__3 = k + j * b_dim1;
+				i__4 = k + k * a_dim1;
+				q__1.r = b[i__3].r * a[i__4].r - b[i__3].i *
+					a[i__4].i, q__1.i = b[i__3].r * a[
+					i__4].i + b[i__3].i * a[i__4].r;
+				b[i__2].r = q__1.r, b[i__2].i = q__1.i;
 			    }
 			    i__2 = *m;
 			    for (i__ = k + 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] += temp * a[i__ + k *
-					a_dim1];
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + k * a_dim1;
+				q__2.r = temp.r * a[i__5].r - temp.i * a[i__5]
+					.i, q__2.i = temp.r * a[i__5].i +
+					temp.i * a[i__5].r;
+				q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
+					.i + q__2.i;
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
 /* L60: */
 			    }
 			}
@@ -2959,44 +4256,120 @@ L40:
 	    }
 	} else {
 
-/*           Form  B := alpha*A'*B. */
+/*           Form  B := alpha*A'*B   or   B := alpha*conjg( A' )*B. */
 
 	    if (upper) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    for (i__ = *m; i__ >= 1; --i__) {
-			temp = b[i__ + j * b_dim1];
-			if (nounit) {
-			    temp *= a[i__ + i__ * a_dim1];
-			}
-			i__2 = i__ - 1;
-			for (k = 1; k <= i__2; ++k) {
-			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
+			i__2 = i__ + j * b_dim1;
+			temp.r = b[i__2].r, temp.i = b[i__2].i;
+			if (noconj) {
+			    if (nounit) {
+				i__2 = i__ + i__ * a_dim1;
+				q__1.r = temp.r * a[i__2].r - temp.i * a[i__2]
+					.i, q__1.i = temp.r * a[i__2].i +
+					temp.i * a[i__2].r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__2 = i__ - 1;
+			    for (k = 1; k <= i__2; ++k) {
+				i__3 = k + i__ * a_dim1;
+				i__4 = k + j * b_dim1;
+				q__2.r = a[i__3].r * b[i__4].r - a[i__3].i *
+					b[i__4].i, q__2.i = a[i__3].r * b[
+					i__4].i + a[i__3].i * b[i__4].r;
+				q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
 /* L90: */
-			}
-			b[i__ + j * b_dim1] = *alpha * temp;
+			    }
+			} else {
+			    if (nounit) {
+				r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
+				q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+					q__1.i = temp.r * q__2.i + temp.i *
+					q__2.r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__2 = i__ - 1;
+			    for (k = 1; k <= i__2; ++k) {
+				r_cnjg(&q__3, &a[k + i__ * a_dim1]);
+				i__3 = k + j * b_dim1;
+				q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3]
+					.i, q__2.i = q__3.r * b[i__3].i +
+					q__3.i * b[i__3].r;
+				q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
 /* L100: */
-		    }
+			    }
+			}
+			i__2 = i__ + j * b_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			b[i__2].r = q__1.r, b[i__2].i = q__1.i;
 /* L110: */
+		    }
+/* L120: */
 		}
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			temp = b[i__ + j * b_dim1];
-			if (nounit) {
-			    temp *= a[i__ + i__ * a_dim1];
-			}
-			i__3 = *m;
-			for (k = i__ + 1; k <= i__3; ++k) {
-			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L120: */
-			}
-			b[i__ + j * b_dim1] = *alpha * temp;
+			i__3 = i__ + j * b_dim1;
+			temp.r = b[i__3].r, temp.i = b[i__3].i;
+			if (noconj) {
+			    if (nounit) {
+				i__3 = i__ + i__ * a_dim1;
+				q__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
+					.i, q__1.i = temp.r * a[i__3].i +
+					temp.i * a[i__3].r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__3 = *m;
+			    for (k = i__ + 1; k <= i__3; ++k) {
+				i__4 = k + i__ * a_dim1;
+				i__5 = k + j * b_dim1;
+				q__2.r = a[i__4].r * b[i__5].r - a[i__4].i *
+					b[i__5].i, q__2.i = a[i__4].r * b[
+					i__5].i + a[i__4].i * b[i__5].r;
+				q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
 /* L130: */
-		    }
+			    }
+			} else {
+			    if (nounit) {
+				r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
+				q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+					q__1.i = temp.r * q__2.i + temp.i *
+					q__2.r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__3 = *m;
+			    for (k = i__ + 1; k <= i__3; ++k) {
+				r_cnjg(&q__3, &a[k + i__ * a_dim1]);
+				i__4 = k + j * b_dim1;
+				q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4]
+					.i, q__2.i = q__3.r * b[i__4].i +
+					q__3.i * b[i__4].r;
+				q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
 /* L140: */
+			    }
+			}
+			i__3 = i__ + j * b_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L150: */
+		    }
+/* L160: */
 		}
 	    }
 	}
@@ -3007,118 +4380,234 @@ L40:
 
 	    if (upper) {
 		for (j = *n; j >= 1; --j) {
-		    temp = *alpha;
+		    temp.r = alpha->r, temp.i = alpha->i;
 		    if (nounit) {
-			temp *= a[j + j * a_dim1];
+			i__1 = j + j * a_dim1;
+			q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
+				q__1.i = temp.r * a[i__1].i + temp.i * a[i__1]
+				.r;
+			temp.r = q__1.r, temp.i = q__1.i;
 		    }
 		    i__1 = *m;
 		    for (i__ = 1; i__ <= i__1; ++i__) {
-			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L150: */
+			i__2 = i__ + j * b_dim1;
+			i__3 = i__ + j * b_dim1;
+			q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
+				q__1.i = temp.r * b[i__3].i + temp.i * b[i__3]
+				.r;
+			b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L170: */
 		    }
 		    i__1 = j - 1;
 		    for (k = 1; k <= i__1; ++k) {
-			if (a[k + j * a_dim1] != 0.) {
-			    temp = *alpha * a[k + j * a_dim1];
+			i__2 = k + j * a_dim1;
+			if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
+			    i__2 = k + j * a_dim1;
+			    q__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2]
+				    .i, q__1.i = alpha->r * a[i__2].i +
+				    alpha->i * a[i__2].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
 			    i__2 = *m;
 			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] += temp * b[i__ + k *
-					b_dim1];
-/* L160: */
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + k * b_dim1;
+				q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
+					.i, q__2.i = temp.r * b[i__5].i +
+					temp.i * b[i__5].r;
+				q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
+					.i + q__2.i;
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L180: */
 			    }
 			}
-/* L170: */
+/* L190: */
 		    }
-/* L180: */
+/* L200: */
 		}
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    temp = *alpha;
+		    temp.r = alpha->r, temp.i = alpha->i;
 		    if (nounit) {
-			temp *= a[j + j * a_dim1];
+			i__2 = j + j * a_dim1;
+			q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+				q__1.i = temp.r * a[i__2].i + temp.i * a[i__2]
+				.r;
+			temp.r = q__1.r, temp.i = q__1.i;
 		    }
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L190: */
+			i__3 = i__ + j * b_dim1;
+			i__4 = i__ + j * b_dim1;
+			q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
+				q__1.i = temp.r * b[i__4].i + temp.i * b[i__4]
+				.r;
+			b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L210: */
 		    }
 		    i__2 = *n;
 		    for (k = j + 1; k <= i__2; ++k) {
-			if (a[k + j * a_dim1] != 0.) {
-			    temp = *alpha * a[k + j * a_dim1];
+			i__3 = k + j * a_dim1;
+			if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
+			    i__3 = k + j * a_dim1;
+			    q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3]
+				    .i, q__1.i = alpha->r * a[i__3].i +
+				    alpha->i * a[i__3].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
 			    i__3 = *m;
 			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] += temp * b[i__ + k *
-					b_dim1];
-/* L200: */
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = i__ + k * b_dim1;
+				q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
+					.i, q__2.i = temp.r * b[i__6].i +
+					temp.i * b[i__6].r;
+				q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
+					.i + q__2.i;
+				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+/* L220: */
 			    }
 			}
-/* L210: */
+/* L230: */
 		    }
-/* L220: */
+/* L240: */
 		}
 	    }
 	} else {
 
-/*           Form  B := alpha*B*A'. */
+/*           Form  B := alpha*B*A'   or   B := alpha*B*conjg( A' ). */
 
 	    if (upper) {
 		i__1 = *n;
 		for (k = 1; k <= i__1; ++k) {
 		    i__2 = k - 1;
 		    for (j = 1; j <= i__2; ++j) {
-			if (a[j + k * a_dim1] != 0.) {
-			    temp = *alpha * a[j + k * a_dim1];
+			i__3 = j + k * a_dim1;
+			if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
+			    if (noconj) {
+				i__3 = j + k * a_dim1;
+				q__1.r = alpha->r * a[i__3].r - alpha->i * a[
+					i__3].i, q__1.i = alpha->r * a[i__3]
+					.i + alpha->i * a[i__3].r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    } else {
+				r_cnjg(&q__2, &a[j + k * a_dim1]);
+				q__1.r = alpha->r * q__2.r - alpha->i *
+					q__2.i, q__1.i = alpha->r * q__2.i +
+					alpha->i * q__2.r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
 			    i__3 = *m;
 			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] += temp * b[i__ + k *
-					b_dim1];
-/* L230: */
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = i__ + k * b_dim1;
+				q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
+					.i, q__2.i = temp.r * b[i__6].i +
+					temp.i * b[i__6].r;
+				q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
+					.i + q__2.i;
+				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+/* L250: */
 			    }
 			}
-/* L240: */
+/* L260: */
 		    }
-		    temp = *alpha;
+		    temp.r = alpha->r, temp.i = alpha->i;
 		    if (nounit) {
-			temp *= a[k + k * a_dim1];
+			if (noconj) {
+			    i__2 = k + k * a_dim1;
+			    q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+				    q__1.i = temp.r * a[i__2].i + temp.i * a[
+				    i__2].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			} else {
+			    r_cnjg(&q__2, &a[k + k * a_dim1]);
+			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+				    q__1.i = temp.r * q__2.i + temp.i *
+				    q__2.r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
 		    }
-		    if (temp != 1.) {
+		    if (temp.r != 1.f || temp.i != 0.f) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L250: */
+			    i__3 = i__ + k * b_dim1;
+			    i__4 = i__ + k * b_dim1;
+			    q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
+				    q__1.i = temp.r * b[i__4].i + temp.i * b[
+				    i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L270: */
 			}
 		    }
-/* L260: */
+/* L280: */
 		}
 	    } else {
 		for (k = *n; k >= 1; --k) {
 		    i__1 = *n;
 		    for (j = k + 1; j <= i__1; ++j) {
-			if (a[j + k * a_dim1] != 0.) {
-			    temp = *alpha * a[j + k * a_dim1];
+			i__2 = j + k * a_dim1;
+			if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
+			    if (noconj) {
+				i__2 = j + k * a_dim1;
+				q__1.r = alpha->r * a[i__2].r - alpha->i * a[
+					i__2].i, q__1.i = alpha->r * a[i__2]
+					.i + alpha->i * a[i__2].r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    } else {
+				r_cnjg(&q__2, &a[j + k * a_dim1]);
+				q__1.r = alpha->r * q__2.r - alpha->i *
+					q__2.i, q__1.i = alpha->r * q__2.i +
+					alpha->i * q__2.r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
 			    i__2 = *m;
 			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] += temp * b[i__ + k *
-					b_dim1];
-/* L270: */
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + k * b_dim1;
+				q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
+					.i, q__2.i = temp.r * b[i__5].i +
+					temp.i * b[i__5].r;
+				q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
+					.i + q__2.i;
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L290: */
 			    }
 			}
-/* L280: */
+/* L300: */
 		    }
-		    temp = *alpha;
+		    temp.r = alpha->r, temp.i = alpha->i;
 		    if (nounit) {
-			temp *= a[k + k * a_dim1];
+			if (noconj) {
+			    i__1 = k + k * a_dim1;
+			    q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
+				    q__1.i = temp.r * a[i__1].i + temp.i * a[
+				    i__1].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			} else {
+			    r_cnjg(&q__2, &a[k + k * a_dim1]);
+			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+				    q__1.i = temp.r * q__2.i + temp.i *
+				    q__2.r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
 		    }
-		    if (temp != 1.) {
+		    if (temp.r != 1.f || temp.i != 0.f) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L290: */
+			    i__2 = i__ + k * b_dim1;
+			    i__3 = i__ + k * b_dim1;
+			    q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
+				    q__1.i = temp.r * b[i__3].i + temp.i * b[
+				    i__3].r;
+			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L310: */
 			}
 		    }
-/* L300: */
+/* L320: */
 		}
 	    }
 	}
@@ -3126,31 +4615,35 @@ L40:
 
     return 0;
 
-/*     End of DTRMM . */
+/*     End of CTRMM . */
 
-} /* dtrmm_ */
+} /* ctrmm_ */
 
-/* Subroutine */ int dtrmv_(char *uplo, char *trans, char *diag, integer *n,
-	doublereal *a, integer *lda, doublereal *x, integer *incx)
+/* Subroutine */ int ctrmv_(char *uplo, char *trans, char *diag, integer *n,
+	complex *a, integer *lda, complex *x, integer *incx)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
     static integer i__, j, ix, jx, kx, info;
-    static doublereal temp;
+    static complex temp;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical nounit;
+    static logical noconj, nounit;
 
 
 /*
     Purpose
     =======
 
-    DTRMV  performs one of the matrix-vector operations
+    CTRMV  performs one of the matrix-vector operations
 
-       x := A*x,   or   x := A'*x,
+       x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
 
     where x is an n element vector and  A is an n by n unit, or non-unit,
     upper or lower triangular matrix.
@@ -3176,7 +4669,7 @@ L40:
 
                 TRANS = 'T' or 't'   x := A'*x.
 
-                TRANS = 'C' or 'c'   x := A'*x.
+                TRANS = 'C' or 'c'   x := conjg( A' )*x.
 
              Unchanged on exit.
 
@@ -3196,7 +4689,7 @@ L40:
              N must be at least zero.
              Unchanged on exit.
 
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
              Before entry with  UPLO = 'U' or 'u', the leading n by n
              upper triangular part of the array A must contain the upper
              triangular matrix and the strictly lower triangular part of
@@ -3215,7 +4708,7 @@ L40:
              max( 1, n ).
              Unchanged on exit.
 
-    X      - DOUBLE PRECISION array of dimension at least
+    X      - COMPLEX          array of dimension at least
              ( 1 + ( n - 1 )*abs( INCX ) ).
              Before entry, the incremented array X must contain the n
              element vector x. On exit, X is overwritten with the
@@ -3263,7 +4756,7 @@ L40:
 	info = 8;
     }
     if (info != 0) {
-	xerbla_("DTRMV ", &info);
+	xerbla_("CTRMV ", &info);
 	return 0;
     }
 
@@ -3273,6 +4766,7 @@ L40:
 	return 0;
     }
 
+    noconj = lsame_(trans, "T");
     nounit = lsame_(diag, "N");
 
 /*
@@ -3299,15 +4793,31 @@ L40:
 	    if (*incx == 1) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    if (x[j] != 0.) {
-			temp = x[j];
+		    i__2 = j;
+		    if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
+			i__2 = j;
+			temp.r = x[i__2].r, temp.i = x[i__2].i;
 			i__2 = j - 1;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    x[i__] += temp * a[i__ + j * a_dim1];
+			    i__3 = i__;
+			    i__4 = i__;
+			    i__5 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				    q__2.i = temp.r * a[i__5].i + temp.i * a[
+				    i__5].r;
+			    q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i +
+				    q__2.i;
+			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
 /* L10: */
 			}
 			if (nounit) {
-			    x[j] *= a[j + j * a_dim1];
+			    i__2 = j;
+			    i__3 = j;
+			    i__4 = j + j * a_dim1;
+			    q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
+				    i__4].i, q__1.i = x[i__3].r * a[i__4].i +
+				    x[i__3].i * a[i__4].r;
+			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
 			}
 		    }
 /* L20: */
@@ -3316,17 +4826,33 @@ L40:
 		jx = kx;
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    if (x[jx] != 0.) {
-			temp = x[jx];
+		    i__2 = jx;
+		    if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
+			i__2 = jx;
+			temp.r = x[i__2].r, temp.i = x[i__2].i;
 			ix = kx;
 			i__2 = j - 1;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    x[ix] += temp * a[i__ + j * a_dim1];
+			    i__3 = ix;
+			    i__4 = ix;
+			    i__5 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				    q__2.i = temp.r * a[i__5].i + temp.i * a[
+				    i__5].r;
+			    q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i +
+				    q__2.i;
+			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
 			    ix += *incx;
 /* L30: */
 			}
 			if (nounit) {
-			    x[jx] *= a[j + j * a_dim1];
+			    i__2 = jx;
+			    i__3 = jx;
+			    i__4 = j + j * a_dim1;
+			    q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
+				    i__4].i, q__1.i = x[i__3].r * a[i__4].i +
+				    x[i__3].i * a[i__4].r;
+			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
 			}
 		    }
 		    jx += *incx;
@@ -3336,15 +4862,31 @@ L40:
 	} else {
 	    if (*incx == 1) {
 		for (j = *n; j >= 1; --j) {
-		    if (x[j] != 0.) {
-			temp = x[j];
+		    i__1 = j;
+		    if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
+			i__1 = j;
+			temp.r = x[i__1].r, temp.i = x[i__1].i;
 			i__1 = j + 1;
 			for (i__ = *n; i__ >= i__1; --i__) {
-			    x[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-			}
-			if (nounit) {
-			    x[j] *= a[j + j * a_dim1];
+			    i__2 = i__;
+			    i__3 = i__;
+			    i__4 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i,
+				    q__2.i = temp.r * a[i__4].i + temp.i * a[
+				    i__4].r;
+			    q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i +
+				    q__2.i;
+			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+/* L50: */
+			}
+			if (nounit) {
+			    i__1 = j;
+			    i__2 = j;
+			    i__3 = j + j * a_dim1;
+			    q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
+				    i__3].i, q__1.i = x[i__2].r * a[i__3].i +
+				    x[i__2].i * a[i__3].r;
+			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
 			}
 		    }
 /* L60: */
@@ -3353,17 +4895,33 @@ L40:
 		kx += (*n - 1) * *incx;
 		jx = kx;
 		for (j = *n; j >= 1; --j) {
-		    if (x[jx] != 0.) {
-			temp = x[jx];
+		    i__1 = jx;
+		    if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
+			i__1 = jx;
+			temp.r = x[i__1].r, temp.i = x[i__1].i;
 			ix = kx;
 			i__1 = j + 1;
 			for (i__ = *n; i__ >= i__1; --i__) {
-			    x[ix] += temp * a[i__ + j * a_dim1];
+			    i__2 = ix;
+			    i__3 = ix;
+			    i__4 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i,
+				    q__2.i = temp.r * a[i__4].i + temp.i * a[
+				    i__4].r;
+			    q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i +
+				    q__2.i;
+			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
 			    ix -= *incx;
 /* L70: */
 			}
 			if (nounit) {
-			    x[jx] *= a[j + j * a_dim1];
+			    i__1 = jx;
+			    i__2 = jx;
+			    i__3 = j + j * a_dim1;
+			    q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
+				    i__3].i, q__1.i = x[i__2].r * a[i__3].i +
+				    x[i__2].i * a[i__3].r;
+			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
 			}
 		    }
 		    jx -= *incx;
@@ -3373,74 +4931,214 @@ L40:
 	}
     } else {
 
-/*        Form  x := A'*x. */
+/*        Form  x := A'*x  or  x := conjg( A' )*x. */
 
 	if (lsame_(uplo, "U")) {
 	    if (*incx == 1) {
 		for (j = *n; j >= 1; --j) {
-		    temp = x[j];
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    for (i__ = j - 1; i__ >= 1; --i__) {
-			temp += a[i__ + j * a_dim1] * x[i__];
+		    i__1 = j;
+		    temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    if (noconj) {
+			if (nounit) {
+			    i__1 = j + j * a_dim1;
+			    q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
+				    q__1.i = temp.r * a[i__1].i + temp.i * a[
+				    i__1].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    i__1 = i__ + j * a_dim1;
+			    i__2 = i__;
+			    q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
+				    i__2].i, q__2.i = a[i__1].r * x[i__2].i +
+				    a[i__1].i * x[i__2].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
 /* L90: */
-		    }
-		    x[j] = temp;
+			}
+		    } else {
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+				    q__1.i = temp.r * q__2.i + temp.i *
+				    q__2.r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__1 = i__;
+			    q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i,
+				    q__2.i = q__3.r * x[i__1].i + q__3.i * x[
+				    i__1].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
 /* L100: */
+			}
+		    }
+		    i__1 = j;
+		    x[i__1].r = temp.r, x[i__1].i = temp.i;
+/* L110: */
 		}
 	    } else {
 		jx = kx + (*n - 1) * *incx;
 		for (j = *n; j >= 1; --j) {
-		    temp = x[jx];
+		    i__1 = jx;
+		    temp.r = x[i__1].r, temp.i = x[i__1].i;
 		    ix = jx;
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    for (i__ = j - 1; i__ >= 1; --i__) {
-			ix -= *incx;
-			temp += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
+		    if (noconj) {
+			if (nounit) {
+			    i__1 = j + j * a_dim1;
+			    q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
+				    q__1.i = temp.r * a[i__1].i + temp.i * a[
+				    i__1].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    ix -= *incx;
+			    i__1 = i__ + j * a_dim1;
+			    i__2 = ix;
+			    q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
+				    i__2].i, q__2.i = a[i__1].r * x[i__2].i +
+				    a[i__1].i * x[i__2].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L120: */
+			}
+		    } else {
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+				    q__1.i = temp.r * q__2.i + temp.i *
+				    q__2.r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    ix -= *incx;
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__1 = ix;
+			    q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i,
+				    q__2.i = q__3.r * x[i__1].i + q__3.i * x[
+				    i__1].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L130: */
+			}
 		    }
-		    x[jx] = temp;
+		    i__1 = jx;
+		    x[i__1].r = temp.r, x[i__1].i = temp.i;
 		    jx -= *incx;
-/* L120: */
+/* L140: */
 		}
 	    }
 	} else {
 	    if (*incx == 1) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    temp = x[j];
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    i__2 = *n;
-		    for (i__ = j + 1; i__ <= i__2; ++i__) {
-			temp += a[i__ + j * a_dim1] * x[i__];
-/* L130: */
+		    i__2 = j;
+		    temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    if (noconj) {
+			if (nounit) {
+			    i__2 = j + j * a_dim1;
+			    q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+				    q__1.i = temp.r * a[i__2].i + temp.i * a[
+				    i__2].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = i__;
+			    q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
+				    i__4].i, q__2.i = a[i__3].r * x[i__4].i +
+				    a[i__3].i * x[i__4].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L150: */
+			}
+		    } else {
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+				    q__1.i = temp.r * q__2.i + temp.i *
+				    q__2.r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__3 = i__;
+			    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
+				    q__2.i = q__3.r * x[i__3].i + q__3.i * x[
+				    i__3].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L160: */
+			}
 		    }
-		    x[j] = temp;
-/* L140: */
+		    i__2 = j;
+		    x[i__2].r = temp.r, x[i__2].i = temp.i;
+/* L170: */
 		}
 	    } else {
 		jx = kx;
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    temp = x[jx];
+		    i__2 = jx;
+		    temp.r = x[i__2].r, temp.i = x[i__2].i;
 		    ix = jx;
-		    if (nounit) {
-			temp *= a[j + j * a_dim1];
-		    }
-		    i__2 = *n;
-		    for (i__ = j + 1; i__ <= i__2; ++i__) {
-			ix += *incx;
-			temp += a[i__ + j * a_dim1] * x[ix];
-/* L150: */
+		    if (noconj) {
+			if (nounit) {
+			    i__2 = j + j * a_dim1;
+			    q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+				    q__1.i = temp.r * a[i__2].i + temp.i * a[
+				    i__2].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    ix += *incx;
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = ix;
+			    q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
+				    i__4].i, q__2.i = a[i__3].r * x[i__4].i +
+				    a[i__3].i * x[i__4].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L180: */
+			}
+		    } else {
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+				    q__1.i = temp.r * q__2.i + temp.i *
+				    q__2.r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    ix += *incx;
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__3 = ix;
+			    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
+				    q__2.i = q__3.r * x[i__3].i + q__3.i * x[
+				    i__3].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L190: */
+			}
 		    }
-		    x[jx] = temp;
+		    i__2 = jx;
+		    x[i__2].r = temp.r, x[i__2].i = temp.i;
 		    jx += *incx;
-/* L160: */
+/* L200: */
 		}
 	    }
 	}
@@ -3448,40 +5146,45 @@ L40:
 
     return 0;
 
-/*     End of DTRMV . */
+/*     End of CTRMV . */
 
-} /* dtrmv_ */
+} /* ctrmv_ */
 
-/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag,
-	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
-	lda, doublereal *b, integer *ldb)
+/* Subroutine */ int ctrsm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, complex *alpha, complex *a, integer *lda,
+	complex *b, integer *ldb)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5,
+	    i__6, i__7;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
 
     /* Local variables */
     static integer i__, j, k, info;
-    static doublereal temp;
-    static logical lside;
+    static complex temp;
     extern logical lsame_(char *, char *);
+    static logical lside;
     static integer nrowa;
     static logical upper;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical nounit;
+    static logical noconj, nounit;
 
 
 /*
     Purpose
     =======
 
-    DTRSM  solves one of the matrix equations
+    CTRSM  solves one of the matrix equations
 
        op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
 
     where alpha is a scalar, X and B are m by n matrices, A is a unit, or
     non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
 
-       op( A ) = A   or   op( A ) = A'.
+       op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
 
     The matrix X is overwritten on B.
 
@@ -3516,7 +5219,7 @@ L40:
 
                 TRANSA = 'T' or 't'   op( A ) = A'.
 
-                TRANSA = 'C' or 'c'   op( A ) = A'.
+                TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
 
              Unchanged on exit.
 
@@ -3541,13 +5244,13 @@ L40:
              at least zero.
              Unchanged on exit.
 
-    ALPHA  - DOUBLE PRECISION.
+    ALPHA  - COMPLEX         .
              On entry,  ALPHA specifies the scalar  alpha. When  alpha is
              zero then  A is not referenced and  B need not be set before
              entry.
              Unchanged on exit.
 
-    A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
+    A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m
              when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
              Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
              upper triangular part of the array  A must contain the upper
@@ -3568,7 +5271,7 @@ L40:
              then LDA must be at least max( 1, n ).
              Unchanged on exit.
 
-    B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+    B      - COMPLEX          array of DIMENSION ( LDB, n ).
              Before entry,  the leading  m by n part of the array  B must
              contain  the  right-hand  side  matrix  B,  and  on exit  is
              overwritten by the solution matrix  X.
@@ -3582,7 +5285,6 @@ L40:
 
     Level 3 Blas routine.
 
-
     -- Written on 8-February-1989.
        Jack Dongarra, Argonne National Laboratory.
        Iain Duff, AERE Harwell.
@@ -3608,6 +5310,7 @@ L40:
     } else {
 	nrowa = *n;
     }
+    noconj = lsame_(transa, "T");
     nounit = lsame_(diag, "N");
     upper = lsame_(uplo, "U");
 
@@ -3632,7 +5335,7 @@ L40:
 	info = 11;
     }
     if (info != 0) {
-	xerbla_("DTRSM ", &info);
+	xerbla_("CTRSM ", &info);
 	return 0;
     }
 
@@ -3644,12 +5347,13 @@ L40:
 
 /*     And when  alpha.eq.zero. */
 
-    if (*alpha == 0.) {
+    if (alpha->r == 0.f && alpha->i == 0.f) {
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = *m;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
-		b[i__ + j * b_dim1] = 0.;
+		i__3 = i__ + j * b_dim1;
+		b[i__3].r = 0.f, b[i__3].i = 0.f;
 /* L10: */
 	    }
 /* L20: */
@@ -3667,23 +5371,39 @@ L40:
 	    if (upper) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    if (*alpha != 1.) {
+		    if (alpha->r != 1.f || alpha->i != 0.f) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-				    ;
+			    i__3 = i__ + j * b_dim1;
+			    i__4 = i__ + j * b_dim1;
+			    q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
+				    .i, q__1.i = alpha->r * b[i__4].i +
+				    alpha->i * b[i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
 /* L30: */
 			}
 		    }
 		    for (k = *m; k >= 1; --k) {
-			if (b[k + j * b_dim1] != 0.) {
+			i__2 = k + j * b_dim1;
+			if (b[i__2].r != 0.f || b[i__2].i != 0.f) {
 			    if (nounit) {
-				b[k + j * b_dim1] /= a[k + k * a_dim1];
+				i__2 = k + j * b_dim1;
+				c_div(&q__1, &b[k + j * b_dim1], &a[k + k *
+					a_dim1]);
+				b[i__2].r = q__1.r, b[i__2].i = q__1.i;
 			    }
 			    i__2 = k - 1;
 			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-					i__ + k * a_dim1];
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = k + j * b_dim1;
+				i__6 = i__ + k * a_dim1;
+				q__2.r = b[i__5].r * a[i__6].r - b[i__5].i *
+					a[i__6].i, q__2.i = b[i__5].r * a[
+					i__6].i + b[i__5].i * a[i__6].r;
+				q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
+					.i - q__2.i;
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
 /* L40: */
 			    }
 			}
@@ -3694,24 +5414,40 @@ L40:
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    if (*alpha != 1.) {
+		    if (alpha->r != 1.f || alpha->i != 0.f) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-				    ;
+			    i__3 = i__ + j * b_dim1;
+			    i__4 = i__ + j * b_dim1;
+			    q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
+				    .i, q__1.i = alpha->r * b[i__4].i +
+				    alpha->i * b[i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
 /* L70: */
 			}
 		    }
 		    i__2 = *m;
 		    for (k = 1; k <= i__2; ++k) {
-			if (b[k + j * b_dim1] != 0.) {
+			i__3 = k + j * b_dim1;
+			if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
 			    if (nounit) {
-				b[k + j * b_dim1] /= a[k + k * a_dim1];
+				i__3 = k + j * b_dim1;
+				c_div(&q__1, &b[k + j * b_dim1], &a[k + k *
+					a_dim1]);
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
 			    }
 			    i__3 = *m;
 			    for (i__ = k + 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-					i__ + k * a_dim1];
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = k + j * b_dim1;
+				i__7 = i__ + k * a_dim1;
+				q__2.r = b[i__6].r * a[i__7].r - b[i__6].i *
+					a[i__7].i, q__2.i = b[i__6].r * a[
+					i__7].i + b[i__6].i * a[i__7].r;
+				q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
+					.i - q__2.i;
+				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
 /* L80: */
 			    }
 			}
@@ -3722,44 +5458,113 @@ L40:
 	    }
 	} else {
 
-/*           Form  B := alpha*inv( A' )*B. */
+/*
+             Form  B := alpha*inv( A' )*B
+             or    B := alpha*inv( conjg( A' ) )*B.
+*/
 
 	    if (upper) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    i__2 = *m;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			temp = *alpha * b[i__ + j * b_dim1];
-			i__3 = i__ - 1;
-			for (k = 1; k <= i__3; ++k) {
-			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
+			i__3 = i__ + j * b_dim1;
+			q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
+				q__1.i = alpha->r * b[i__3].i + alpha->i * b[
+				i__3].r;
+			temp.r = q__1.r, temp.i = q__1.i;
+			if (noconj) {
+			    i__3 = i__ - 1;
+			    for (k = 1; k <= i__3; ++k) {
+				i__4 = k + i__ * a_dim1;
+				i__5 = k + j * b_dim1;
+				q__2.r = a[i__4].r * b[i__5].r - a[i__4].i *
+					b[i__5].i, q__2.i = a[i__4].r * b[
+					i__5].i + a[i__4].i * b[i__5].r;
+				q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
 /* L110: */
-			}
-			if (nounit) {
-			    temp /= a[i__ + i__ * a_dim1];
-			}
-			b[i__ + j * b_dim1] = temp;
+			    }
+			    if (nounit) {
+				c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]);
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			} else {
+			    i__3 = i__ - 1;
+			    for (k = 1; k <= i__3; ++k) {
+				r_cnjg(&q__3, &a[k + i__ * a_dim1]);
+				i__4 = k + j * b_dim1;
+				q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4]
+					.i, q__2.i = q__3.r * b[i__4].i +
+					q__3.i * b[i__4].r;
+				q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
 /* L120: */
-		    }
+			    }
+			    if (nounit) {
+				r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
+				c_div(&q__1, &temp, &q__2);
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			}
+			i__3 = i__ + j * b_dim1;
+			b[i__3].r = temp.r, b[i__3].i = temp.i;
 /* L130: */
+		    }
+/* L140: */
 		}
 	    } else {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
 		    for (i__ = *m; i__ >= 1; --i__) {
-			temp = *alpha * b[i__ + j * b_dim1];
-			i__2 = *m;
-			for (k = i__ + 1; k <= i__2; ++k) {
-			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L140: */
-			}
-			if (nounit) {
-			    temp /= a[i__ + i__ * a_dim1];
-			}
-			b[i__ + j * b_dim1] = temp;
+			i__2 = i__ + j * b_dim1;
+			q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i,
+				q__1.i = alpha->r * b[i__2].i + alpha->i * b[
+				i__2].r;
+			temp.r = q__1.r, temp.i = q__1.i;
+			if (noconj) {
+			    i__2 = *m;
+			    for (k = i__ + 1; k <= i__2; ++k) {
+				i__3 = k + i__ * a_dim1;
+				i__4 = k + j * b_dim1;
+				q__2.r = a[i__3].r * b[i__4].r - a[i__3].i *
+					b[i__4].i, q__2.i = a[i__3].r * b[
+					i__4].i + a[i__3].i * b[i__4].r;
+				q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
 /* L150: */
-		    }
+			    }
+			    if (nounit) {
+				c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]);
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			} else {
+			    i__2 = *m;
+			    for (k = i__ + 1; k <= i__2; ++k) {
+				r_cnjg(&q__3, &a[k + i__ * a_dim1]);
+				i__3 = k + j * b_dim1;
+				q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3]
+					.i, q__2.i = q__3.r * b[i__3].i +
+					q__3.i * b[i__3].r;
+				q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
 /* L160: */
+			    }
+			    if (nounit) {
+				r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
+				c_div(&q__1, &temp, &q__2);
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			}
+			i__2 = i__ + j * b_dim1;
+			b[i__2].r = temp.r, b[i__2].i = temp.i;
+/* L170: */
+		    }
+/* L180: */
 		}
 	    }
 	}
@@ -3771,139 +5576,240 @@ L40:
 	    if (upper) {
 		i__1 = *n;
 		for (j = 1; j <= i__1; ++j) {
-		    if (*alpha != 1.) {
+		    if (alpha->r != 1.f || alpha->i != 0.f) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-				    ;
-/* L170: */
+			    i__3 = i__ + j * b_dim1;
+			    i__4 = i__ + j * b_dim1;
+			    q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
+				    .i, q__1.i = alpha->r * b[i__4].i +
+				    alpha->i * b[i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L190: */
 			}
 		    }
 		    i__2 = j - 1;
 		    for (k = 1; k <= i__2; ++k) {
-			if (a[k + j * a_dim1] != 0.) {
+			i__3 = k + j * a_dim1;
+			if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
 			    i__3 = *m;
 			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-					i__ + k * b_dim1];
-/* L180: */
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = k + j * a_dim1;
+				i__7 = i__ + k * b_dim1;
+				q__2.r = a[i__6].r * b[i__7].r - a[i__6].i *
+					b[i__7].i, q__2.i = a[i__6].r * b[
+					i__7].i + a[i__6].i * b[i__7].r;
+				q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
+					.i - q__2.i;
+				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+/* L200: */
 			    }
 			}
-/* L190: */
+/* L210: */
 		    }
 		    if (nounit) {
-			temp = 1. / a[j + j * a_dim1];
+			c_div(&q__1, &c_b21, &a[j + j * a_dim1]);
+			temp.r = q__1.r, temp.i = q__1.i;
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L200: */
+			    i__3 = i__ + j * b_dim1;
+			    i__4 = i__ + j * b_dim1;
+			    q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
+				    q__1.i = temp.r * b[i__4].i + temp.i * b[
+				    i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L220: */
 			}
 		    }
-/* L210: */
+/* L230: */
 		}
 	    } else {
 		for (j = *n; j >= 1; --j) {
-		    if (*alpha != 1.) {
+		    if (alpha->r != 1.f || alpha->i != 0.f) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-				    ;
-/* L220: */
+			    i__2 = i__ + j * b_dim1;
+			    i__3 = i__ + j * b_dim1;
+			    q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
+				    .i, q__1.i = alpha->r * b[i__3].i +
+				    alpha->i * b[i__3].r;
+			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L240: */
 			}
 		    }
 		    i__1 = *n;
 		    for (k = j + 1; k <= i__1; ++k) {
-			if (a[k + j * a_dim1] != 0.) {
+			i__2 = k + j * a_dim1;
+			if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
 			    i__2 = *m;
 			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-					i__ + k * b_dim1];
-/* L230: */
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = k + j * a_dim1;
+				i__6 = i__ + k * b_dim1;
+				q__2.r = a[i__5].r * b[i__6].r - a[i__5].i *
+					b[i__6].i, q__2.i = a[i__5].r * b[
+					i__6].i + a[i__5].i * b[i__6].r;
+				q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
+					.i - q__2.i;
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L250: */
 			    }
 			}
-/* L240: */
+/* L260: */
 		    }
 		    if (nounit) {
-			temp = 1. / a[j + j * a_dim1];
+			c_div(&q__1, &c_b21, &a[j + j * a_dim1]);
+			temp.r = q__1.r, temp.i = q__1.i;
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L250: */
+			    i__2 = i__ + j * b_dim1;
+			    i__3 = i__ + j * b_dim1;
+			    q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
+				    q__1.i = temp.r * b[i__3].i + temp.i * b[
+				    i__3].r;
+			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L270: */
 			}
 		    }
-/* L260: */
+/* L280: */
 		}
 	    }
 	} else {
 
-/*           Form  B := alpha*B*inv( A' ). */
+/*
+             Form  B := alpha*B*inv( A' )
+             or    B := alpha*B*inv( conjg( A' ) ).
+*/
 
 	    if (upper) {
 		for (k = *n; k >= 1; --k) {
 		    if (nounit) {
-			temp = 1. / a[k + k * a_dim1];
+			if (noconj) {
+			    c_div(&q__1, &c_b21, &a[k + k * a_dim1]);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			} else {
+			    r_cnjg(&q__2, &a[k + k * a_dim1]);
+			    c_div(&q__1, &c_b21, &q__2);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L270: */
+			    i__2 = i__ + k * b_dim1;
+			    i__3 = i__ + k * b_dim1;
+			    q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
+				    q__1.i = temp.r * b[i__3].i + temp.i * b[
+				    i__3].r;
+			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L290: */
 			}
 		    }
 		    i__1 = k - 1;
 		    for (j = 1; j <= i__1; ++j) {
-			if (a[j + k * a_dim1] != 0.) {
-			    temp = a[j + k * a_dim1];
+			i__2 = j + k * a_dim1;
+			if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
+			    if (noconj) {
+				i__2 = j + k * a_dim1;
+				temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    } else {
+				r_cnjg(&q__1, &a[j + k * a_dim1]);
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
 			    i__2 = *m;
 			    for (i__ = 1; i__ <= i__2; ++i__) {
-				b[i__ + j * b_dim1] -= temp * b[i__ + k *
-					b_dim1];
-/* L280: */
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + k * b_dim1;
+				q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
+					.i, q__2.i = temp.r * b[i__5].i +
+					temp.i * b[i__5].r;
+				q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
+					.i - q__2.i;
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L300: */
 			    }
 			}
-/* L290: */
+/* L310: */
 		    }
-		    if (*alpha != 1.) {
+		    if (alpha->r != 1.f || alpha->i != 0.f) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
-			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-				    ;
-/* L300: */
+			    i__2 = i__ + k * b_dim1;
+			    i__3 = i__ + k * b_dim1;
+			    q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
+				    .i, q__1.i = alpha->r * b[i__3].i +
+				    alpha->i * b[i__3].r;
+			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L320: */
 			}
 		    }
-/* L310: */
+/* L330: */
 		}
 	    } else {
 		i__1 = *n;
 		for (k = 1; k <= i__1; ++k) {
 		    if (nounit) {
-			temp = 1. / a[k + k * a_dim1];
+			if (noconj) {
+			    c_div(&q__1, &c_b21, &a[k + k * a_dim1]);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			} else {
+			    r_cnjg(&q__2, &a[k + k * a_dim1]);
+			    c_div(&q__1, &c_b21, &q__2);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L320: */
+			    i__3 = i__ + k * b_dim1;
+			    i__4 = i__ + k * b_dim1;
+			    q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
+				    q__1.i = temp.r * b[i__4].i + temp.i * b[
+				    i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L340: */
 			}
 		    }
 		    i__2 = *n;
 		    for (j = k + 1; j <= i__2; ++j) {
-			if (a[j + k * a_dim1] != 0.) {
-			    temp = a[j + k * a_dim1];
+			i__3 = j + k * a_dim1;
+			if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
+			    if (noconj) {
+				i__3 = j + k * a_dim1;
+				temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    } else {
+				r_cnjg(&q__1, &a[j + k * a_dim1]);
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
 			    i__3 = *m;
 			    for (i__ = 1; i__ <= i__3; ++i__) {
-				b[i__ + j * b_dim1] -= temp * b[i__ + k *
-					b_dim1];
-/* L330: */
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = i__ + k * b_dim1;
+				q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
+					.i, q__2.i = temp.r * b[i__6].i +
+					temp.i * b[i__6].r;
+				q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
+					.i - q__2.i;
+				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+/* L350: */
 			    }
 			}
-/* L340: */
+/* L360: */
 		    }
-		    if (*alpha != 1.) {
+		    if (alpha->r != 1.f || alpha->i != 0.f) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
-			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-				    ;
-/* L350: */
+			    i__3 = i__ + k * b_dim1;
+			    i__4 = i__ + k * b_dim1;
+			    q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
+				    .i, q__1.i = alpha->r * b[i__4].i +
+				    alpha->i * b[i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L370: */
 			}
 		    }
-/* L360: */
+/* L380: */
 		}
 	    }
 	}
@@ -3911,400 +5817,8964 @@ L40:
 
     return 0;
 
-/*     End of DTRSM . */
+/*     End of CTRSM . */
 
-} /* dtrsm_ */
+} /* ctrsm_ */
 
-doublereal dzasum_(integer *n, doublecomplex *zx, integer *incx)
+/* Subroutine */ int ctrsv_(char *uplo, char *trans, char *diag, integer *n,
+	complex *a, integer *lda, complex *x, integer *incx)
 {
     /* System generated locals */
-    integer i__1;
-    doublereal ret_val;
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
 
     /* Local variables */
-    static integer i__, ix;
-    static doublereal stemp;
-    extern doublereal dcabs1_(doublecomplex *);
+    static integer i__, j, ix, jx, kx, info;
+    static complex temp;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconj, nounit;
 
 
 /*
-       takes the sum of the absolute values.
-       jack dongarra, 3/11/78.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
+    Purpose
+    =======
 
+    CTRSV  solves one of the systems of equations
 
-    /* Parameter adjustments */
-    --zx;
+       A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
 
-    /* Function Body */
-    ret_val = 0.;
-    stemp = 0.;
-    if (*n <= 0 || *incx <= 0) {
-	return ret_val;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
+    where b and x are n element vectors and A is an n by n unit, or
+    non-unit, upper or lower triangular matrix.
 
-/*        code for increment not equal to 1 */
+    No test for singularity or near-singularity is included in this
+    routine. Such tests must be performed before calling this routine.
 
-    ix = 1;
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	stemp += dcabs1_(&zx[ix]);
-	ix += *incx;
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
+    Parameters
+    ==========
 
-/*        code for increment equal to 1 */
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix is an upper or
+             lower triangular matrix as follows:
 
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	stemp += dcabs1_(&zx[i__]);
-/* L30: */
-    }
-    ret_val = stemp;
-    return ret_val;
-} /* dzasum_ */
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
 
-doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    doublereal ret_val, d__1;
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
 
-    /* Builtin functions */
-    double d_imag(doublecomplex *), sqrt(doublereal);
+             Unchanged on exit.
 
-    /* Local variables */
-    static integer ix;
-    static doublereal ssq, temp, norm, scale;
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the equations to be solved as
+             follows:
 
+                TRANS = 'N' or 'n'   A*x = b.
 
-/*
-    DZNRM2 returns the euclidean norm of a vector via the function
-    name, so that
+                TRANS = 'T' or 't'   A'*x = b.
 
-       DZNRM2 := sqrt( conjg( x' )*x )
+                TRANS = 'C' or 'c'   conjg( A' )*x = b.
 
+             Unchanged on exit.
 
-    -- This version written on 25-October-1982.
-       Modified on 14-October-1993 to inline the call to ZLASSQ.
-       Sven Hammarling, Nag Ltd.
-*/
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit
+             triangular as follows:
 
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
 
-    /* Parameter adjustments */
-    --x;
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
 
-    /* Function Body */
-    if (*n < 1 || *incx < 1) {
-	norm = 0.;
-    } else {
-	scale = 0.;
-	ssq = 1.;
-/*
-          The following loop is equivalent to this call to the LAPACK
-          auxiliary routine:
-          CALL ZLASSQ( N, X, INCX, SCALE, SSQ )
-*/
+             Unchanged on exit.
 
-	i__1 = (*n - 1) * *incx + 1;
-	i__2 = *incx;
-	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-	    i__3 = ix;
-	    if (x[i__3].r != 0.) {
-		i__3 = ix;
-		temp = (d__1 = x[i__3].r, abs(d__1));
-		if (scale < temp) {
-/* Computing 2nd power */
-		    d__1 = scale / temp;
-		    ssq = ssq * (d__1 * d__1) + 1.;
-		    scale = temp;
-		} else {
-/* Computing 2nd power */
-		    d__1 = temp / scale;
-		    ssq += d__1 * d__1;
-		}
-	    }
-	    if (d_imag(&x[ix]) != 0.) {
-		temp = (d__1 = d_imag(&x[ix]), abs(d__1));
-		if (scale < temp) {
-/* Computing 2nd power */
-		    d__1 = scale / temp;
-		    ssq = ssq * (d__1 * d__1) + 1.;
-		    scale = temp;
-		} else {
-/* Computing 2nd power */
-		    d__1 = temp / scale;
-		    ssq += d__1 * d__1;
-		}
-	    }
-/* L10: */
-	}
-	norm = scale * sqrt(ssq);
-    }
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
 
-    ret_val = norm;
-    return ret_val;
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular matrix and the strictly lower triangular part of
+             A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular matrix and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u', the diagonal elements of
+             A are not referenced either, but are assumed to be unity.
+             Unchanged on exit.
 
-/*     End of DZNRM2. */
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
 
-} /* dznrm2_ */
+    X      - COMPLEX          array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element right-hand side vector b. On exit, X is overwritten
+             with the solution vector x.
 
-integer idamax_(integer *n, doublereal *dx, integer *incx)
-{
-    /* System generated locals */
-    integer ret_val, i__1;
-    doublereal d__1;
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
 
-    /* Local variables */
-    static integer i__, ix;
-    static doublereal dmax__;
 
+    Level 2 Blas routine.
 
-/*
-       finds the index of element having max. absolute value.
-       jack dongarra, linpack, 3/11/78.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
-*/
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
 
 
+       Test the input parameters.
+*/
+
     /* Parameter adjustments */
-    --dx;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
 
     /* Function Body */
-    ret_val = 0;
-    if (*n < 1 || *incx <= 0) {
-	return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-	return ret_val;
-    }
-    if (*incx == 1) {
-	goto L20;
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*lda < max(1,*n)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
     }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    dmax__ = abs(dx[1]);
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	if ((d__1 = dx[ix], abs(d__1)) <= dmax__) {
-	    goto L5;
-	}
-	ret_val = i__;
-	dmax__ = (d__1 = dx[ix], abs(d__1));
-L5:
-	ix += *incx;
-/* L10: */
+    if (info != 0) {
+	xerbla_("CTRSV ", &info);
+	return 0;
     }
-    return ret_val;
 
-/*        code for increment equal to 1 */
+/*     Quick return if possible. */
 
-L20:
-    dmax__ = abs(dx[1]);
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	if ((d__1 = dx[i__], abs(d__1)) <= dmax__) {
-	    goto L30;
-	}
-	ret_val = i__;
-	dmax__ = (d__1 = dx[i__], abs(d__1));
-L30:
-	;
+    if (*n == 0) {
+	return 0;
     }
-    return ret_val;
-} /* idamax_ */
-
-integer izamax_(integer *n, doublecomplex *zx, integer *incx)
-{
-    /* System generated locals */
-    integer ret_val, i__1;
-
-    /* Local variables */
-    static integer i__, ix;
-    static doublereal smax;
-    extern doublereal dcabs1_(doublecomplex *);
 
+    noconj = lsame_(trans, "T");
+    nounit = lsame_(diag, "N");
 
 /*
-       finds the index of element having max. absolute value.
-       jack dongarra, 1/15/85.
-       modified 3/93 to return if incx .le. 0.
-       modified 12/3/93, array(1) declarations changed to array(*)
+       Set up the start point in X if the increment is not unity. This
+       will be  ( N - 1 )*INCX  too small for descending loops.
 */
 
+    if (*incx <= 0) {
+	kx = 1 - (*n - 1) * *incx;
+    } else if (*incx != 1) {
+	kx = 1;
+    }
 
-    /* Parameter adjustments */
-    --zx;
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
 
-    /* Function Body */
-    ret_val = 0;
-    if (*n < 1 || *incx <= 0) {
-	return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-	return ret_val;
-    }
-    if (*incx == 1) {
-	goto L20;
-    }
+    if (lsame_(trans, "N")) {
 
-/*        code for increment not equal to 1 */
+/*        Form  x := inv( A )*x. */
 
-    ix = 1;
-    smax = dcabs1_(&zx[1]);
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	if (dcabs1_(&zx[ix]) <= smax) {
-	    goto L5;
-	}
-	ret_val = i__;
-	smax = dcabs1_(&zx[ix]);
-L5:
-	ix += *incx;
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    i__1 = j;
+		    if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
+			if (nounit) {
+			    i__1 = j;
+			    c_div(&q__1, &x[j], &a[j + j * a_dim1]);
+			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+			}
+			i__1 = j;
+			temp.r = x[i__1].r, temp.i = x[i__1].i;
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    i__1 = i__;
+			    i__2 = i__;
+			    i__3 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i,
+				    q__2.i = temp.r * a[i__3].i + temp.i * a[
+				    i__3].r;
+			    q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i -
+				    q__2.i;
+			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
 /* L10: */
-    }
-    return ret_val;
+			}
+		    }
+/* L20: */
+		}
+	    } else {
+		jx = kx + (*n - 1) * *incx;
+		for (j = *n; j >= 1; --j) {
+		    i__1 = jx;
+		    if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
+			if (nounit) {
+			    i__1 = jx;
+			    c_div(&q__1, &x[jx], &a[j + j * a_dim1]);
+			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+			}
+			i__1 = jx;
+			temp.r = x[i__1].r, temp.i = x[i__1].i;
+			ix = jx;
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    ix -= *incx;
+			    i__1 = ix;
+			    i__2 = ix;
+			    i__3 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i,
+				    q__2.i = temp.r * a[i__3].i + temp.i * a[
+				    i__3].r;
+			    q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i -
+				    q__2.i;
+			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+/* L30: */
+			}
+		    }
+		    jx -= *incx;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
+			if (nounit) {
+			    i__2 = j;
+			    c_div(&q__1, &x[j], &a[j + j * a_dim1]);
+			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+			}
+			i__2 = j;
+			temp.r = x[i__2].r, temp.i = x[i__2].i;
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    i__3 = i__;
+			    i__4 = i__;
+			    i__5 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				    q__2.i = temp.r * a[i__5].i + temp.i * a[
+				    i__5].r;
+			    q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i -
+				    q__2.i;
+			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+/* L50: */
+			}
+		    }
+/* L60: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = jx;
+		    if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
+			if (nounit) {
+			    i__2 = jx;
+			    c_div(&q__1, &x[jx], &a[j + j * a_dim1]);
+			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+			}
+			i__2 = jx;
+			temp.r = x[i__2].r, temp.i = x[i__2].i;
+			ix = jx;
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    ix += *incx;
+			    i__3 = ix;
+			    i__4 = ix;
+			    i__5 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				    q__2.i = temp.r * a[i__5].i + temp.i * a[
+				    i__5].r;
+			    q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i -
+				    q__2.i;
+			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+/* L70: */
+			}
+		    }
+		    jx += *incx;
+/* L80: */
+		}
+	    }
+	}
+    } else {
 
-/*        code for increment equal to 1 */
+/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */
 
-L20:
-    smax = dcabs1_(&zx[1]);
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-	if (dcabs1_(&zx[i__]) <= smax) {
-	    goto L30;
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    if (noconj) {
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = i__;
+			    q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
+				    i__4].i, q__2.i = a[i__3].r * x[i__4].i +
+				    a[i__3].i * x[i__4].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L90: */
+			}
+			if (nounit) {
+			    c_div(&q__1, &temp, &a[j + j * a_dim1]);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    } else {
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__3 = i__;
+			    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
+				    q__2.i = q__3.r * x[i__3].i + q__3.i * x[
+				    i__3].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L100: */
+			}
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    c_div(&q__1, &temp, &q__2);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    }
+		    i__2 = j;
+		    x[i__2].r = temp.r, x[i__2].i = temp.i;
+/* L110: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    ix = kx;
+		    i__2 = jx;
+		    temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    if (noconj) {
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = ix;
+			    q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
+				    i__4].i, q__2.i = a[i__3].r * x[i__4].i +
+				    a[i__3].i * x[i__4].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    ix += *incx;
+/* L120: */
+			}
+			if (nounit) {
+			    c_div(&q__1, &temp, &a[j + j * a_dim1]);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    } else {
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__3 = ix;
+			    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
+				    q__2.i = q__3.r * x[i__3].i + q__3.i * x[
+				    i__3].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    ix += *incx;
+/* L130: */
+			}
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    c_div(&q__1, &temp, &q__2);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    }
+		    i__2 = jx;
+		    x[i__2].r = temp.r, x[i__2].i = temp.i;
+		    jx += *incx;
+/* L140: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    i__1 = j;
+		    temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    if (noconj) {
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    i__2 = i__ + j * a_dim1;
+			    i__3 = i__;
+			    q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
+				    i__3].i, q__2.i = a[i__2].r * x[i__3].i +
+				    a[i__2].i * x[i__3].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L150: */
+			}
+			if (nounit) {
+			    c_div(&q__1, &temp, &a[j + j * a_dim1]);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    } else {
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__2 = i__;
+			    q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i,
+				    q__2.i = q__3.r * x[i__2].i + q__3.i * x[
+				    i__2].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L160: */
+			}
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    c_div(&q__1, &temp, &q__2);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    }
+		    i__1 = j;
+		    x[i__1].r = temp.r, x[i__1].i = temp.i;
+/* L170: */
+		}
+	    } else {
+		kx += (*n - 1) * *incx;
+		jx = kx;
+		for (j = *n; j >= 1; --j) {
+		    ix = kx;
+		    i__1 = jx;
+		    temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    if (noconj) {
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    i__2 = i__ + j * a_dim1;
+			    i__3 = ix;
+			    q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
+				    i__3].i, q__2.i = a[i__2].r * x[i__3].i +
+				    a[i__2].i * x[i__3].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    ix -= *incx;
+/* L180: */
+			}
+			if (nounit) {
+			    c_div(&q__1, &temp, &a[j + j * a_dim1]);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    } else {
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__2 = ix;
+			    q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i,
+				    q__2.i = q__3.r * x[i__2].i + q__3.i * x[
+				    i__2].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    ix -= *incx;
+/* L190: */
+			}
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    c_div(&q__1, &temp, &q__2);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    }
+		    i__1 = jx;
+		    x[i__1].r = temp.r, x[i__1].i = temp.i;
+		    jx -= *incx;
+/* L200: */
+		}
+	    }
 	}
-	ret_val = i__;
-	smax = dcabs1_(&zx[i__]);
-L30:
-	;
     }
-    return ret_val;
-} /* izamax_ */
 
-logical lsame_(char *ca, char *cb)
+    return 0;
+
+/*     End of CTRSV . */
+
+} /* ctrsv_ */
+
+/* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx,
+	integer *incx, doublereal *dy, integer *incy)
 {
     /* System generated locals */
-    logical ret_val;
+    integer i__1;
 
     /* Local variables */
-    static integer inta, intb, zcode;
+    static integer i__, m, ix, iy, mp1;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
+       constant times a vector plus a vector.
+       uses unrolled loops for increments equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
 
-    LSAME returns .TRUE. if CA is the same letter as CB regardless of
-    case.
 
-    Arguments
-    =========
+    /* Parameter adjustments */
+    --dy;
+    --dx;
 
-    CA      (input) CHARACTER*1
-    CB      (input) CHARACTER*1
-            CA and CB specify the single characters to be compared.
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*da == 0.) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
 
-   =====================================================================
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
 
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dy[iy] += *da * dx[ix];
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
 
-       Test if the characters are equal
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
 */
 
-    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
-    if (ret_val) {
-	return ret_val;
+L20:
+    m = *n % 4;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dy[i__] += *da * dx[i__];
+/* L30: */
+    }
+    if (*n < 4) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 4) {
+	dy[i__] += *da * dx[i__];
+	dy[i__ + 1] += *da * dx[i__ + 1];
+	dy[i__ + 2] += *da * dx[i__ + 2];
+	dy[i__ + 3] += *da * dx[i__ + 3];
+/* L50: */
     }
+    return 0;
+} /* daxpy_ */
 
-/*     Now test for equivalence if both characters are alphabetic. */
+doublereal dcabs1_(doublecomplex *z__)
+{
+    /* System generated locals */
+    doublereal ret_val;
+    static doublecomplex equiv_0[1];
+
+    /* Local variables */
+#define t ((doublereal *)equiv_0)
+#define zz (equiv_0)
+
+    zz->r = z__->r, zz->i = z__->i;
+    ret_val = abs(t[0]) + abs(t[1]);
+    return ret_val;
+} /* dcabs1_ */
+
+#undef zz
+#undef t
+
+
+/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx,
+	doublereal *dy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
 
-    zcode = 'Z';
 
 /*
-       Use 'Z' rather than 'A' so that ASCII can be detected on Prime
-       machines, on which ICHAR returns a value with bit 8 set.
-       ICHAR('A') on Prime machines returns 193 which is the same as
-       ICHAR('A') on an EBCDIC machine.
+       copies a vector, x, to a vector, y.
+       uses unrolled loops for increments equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
 */
 
-    inta = *(unsigned char *)ca;
-    intb = *(unsigned char *)cb;
 
-    if (zcode == 90 || zcode == 122) {
+    /* Parameter adjustments */
+    --dy;
+    --dx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
 
 /*
-          ASCII is assumed - ZCODE is the ASCII code of either lower or
-          upper case 'Z'.
+          code for unequal increments or equal increments
+            not equal to 1
 */
 
-	if (inta >= 97 && inta <= 122) {
-	    inta += -32;
-	}
-	if (intb >= 97 && intb <= 122) {
-	    intb += -32;
-	}
-
-    } else if (zcode == 233 || zcode == 169) {
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dy[iy] = dx[ix];
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
 
 /*
-          EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
-          upper case 'Z'.
+          code for both increments equal to 1
+
+
+          clean-up loop
 */
 
-	if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta
-		>= 162 && inta <= 169) {
-	    inta += 64;
-	}
-	if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb
-		>= 162 && intb <= 169) {
-	    intb += 64;
-	}
+L20:
+    m = *n % 7;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dy[i__] = dx[i__];
+/* L30: */
+    }
+    if (*n < 7) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 7) {
+	dy[i__] = dx[i__];
+	dy[i__ + 1] = dx[i__ + 1];
+	dy[i__ + 2] = dx[i__ + 2];
+	dy[i__ + 3] = dx[i__ + 3];
+	dy[i__ + 4] = dx[i__ + 4];
+	dy[i__ + 5] = dx[i__ + 5];
+	dy[i__ + 6] = dx[i__ + 6];
+/* L50: */
+    }
+    return 0;
+} /* dcopy_ */
+
+doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
+	integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal ret_val;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+    static doublereal dtemp;
 
-    } else if (zcode == 218 || zcode == 250) {
 
 /*
-          ASCII is assumed, on Prime machines - ZCODE is the ASCII code
-          plus 128 of either lower or upper case 'Z'.
+       forms the dot product of two vectors.
+       uses unrolled loops for increments equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
 */
 
-	if (inta >= 225 && inta <= 250) {
-	    inta += -32;
-	}
-	if (intb >= 225 && intb <= 250) {
-	    intb += -32;
-	}
+
+    /* Parameter adjustments */
+    --dy;
+    --dx;
+
+    /* Function Body */
+    ret_val = 0.;
+    dtemp = 0.;
+    if (*n <= 0) {
+	return ret_val;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
     }
-    ret_val = inta == intb;
 
 /*
-       RETURN
-
-       End of LSAME
+          code for unequal increments or equal increments
+            not equal to 1
 */
 
-    return ret_val;
-} /* lsame_ */
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp += dx[ix] * dy[iy];
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    ret_val = dtemp;
+    return ret_val;
+
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 5;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp += dx[i__] * dy[i__];
+/* L30: */
+    }
+    if (*n < 5) {
+	goto L60;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 5) {
+	dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[
+		i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ +
+		4] * dy[i__ + 4];
+/* L50: */
+    }
+L60:
+    ret_val = dtemp;
+    return ret_val;
+} /* ddot_ */
+
+/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer *
+	n, integer *k, doublereal *alpha, doublereal *a, integer *lda,
+	doublereal *b, integer *ldb, doublereal *beta, doublereal *c__,
+	integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3;
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static logical nota, notb;
+    static doublereal temp;
+    static integer ncola;
+    extern logical lsame_(char *, char *);
+    static integer nrowa, nrowb;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DGEMM  performs one of the matrix-matrix operations
+
+       C := alpha*op( A )*op( B ) + beta*C,
+
+    where  op( X ) is one of
+
+       op( X ) = X   or   op( X ) = X',
+
+    alpha and beta are scalars, and A, B and C are matrices, with op( A )
+    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+
+    Parameters
+    ==========
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n',  op( A ) = A.
+
+                TRANSA = 'T' or 't',  op( A ) = A'.
+
+                TRANSA = 'C' or 'c',  op( A ) = A'.
+
+             Unchanged on exit.
+
+    TRANSB - CHARACTER*1.
+             On entry, TRANSB specifies the form of op( B ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSB = 'N' or 'n',  op( B ) = B.
+
+                TRANSB = 'T' or 't',  op( B ) = B'.
+
+                TRANSB = 'C' or 'c',  op( B ) = B'.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry,  M  specifies  the number  of rows  of the  matrix
+             op( A )  and of the  matrix  C.  M  must  be at least  zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N  specifies the number  of columns of the matrix
+             op( B ) and the number of columns of the matrix C. N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry,  K  specifies  the number of columns of the matrix
+             op( A ) and the number of rows of the matrix op( B ). K must
+             be at least  zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by m  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+             LDA must be at least  max( 1, m ), otherwise  LDA must be at
+             least  max( 1, k ).
+             Unchanged on exit.
+
+    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
+             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+             part of the array  B  must contain the matrix  B,  otherwise
+             the leading  n by k  part of the array  B  must contain  the
+             matrix B.
+             Unchanged on exit.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+             LDB must be at least  max( 1, k ), otherwise  LDB must be at
+             least  max( 1, n ).
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION.
+             On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+             supplied as zero then C need not be set on input.
+             Unchanged on exit.
+
+    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+             Before entry, the leading  m by n  part of the array  C must
+             contain the matrix  C,  except when  beta  is zero, in which
+             case C need not be set on entry.
+             On exit, the array  C  is overwritten by the  m by n  matrix
+             ( alpha*op( A )*op( B ) + beta*C ).
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+       transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
+       and  columns of  A  and the  number of  rows  of  B  respectively.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    nota = lsame_(transa, "N");
+    notb = lsame_(transb, "N");
+    if (nota) {
+	nrowa = *m;
+	ncola = *k;
+    } else {
+	nrowa = *k;
+	ncola = *m;
+    }
+    if (notb) {
+	nrowb = *k;
+    } else {
+	nrowb = *n;
+    }
+
+/*     Test the input parameters. */
+
+    info = 0;
+    if (! nota && ! lsame_(transa, "C") && ! lsame_(
+	    transa, "T")) {
+	info = 1;
+    } else if (! notb && ! lsame_(transb, "C") && !
+	    lsame_(transb, "T")) {
+	info = 2;
+    } else if (*m < 0) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*k < 0) {
+	info = 5;
+    } else if (*lda < max(1,nrowa)) {
+	info = 8;
+    } else if (*ldb < max(1,nrowb)) {
+	info = 10;
+    } else if (*ldc < max(1,*m)) {
+	info = 13;
+    }
+    if (info != 0) {
+	xerbla_("DGEMM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
+	return 0;
+    }
+
+/*     And if  alpha.eq.zero. */
+
+    if (*alpha == 0.) {
+	if (*beta == 0.) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    c__[i__ + j * c_dim1] = 0.;
+/* L10: */
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (notb) {
+	if (nota) {
+
+/*           Form  C := alpha*A*B + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L50: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L60: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (b[l + j * b_dim1] != 0.) {
+			temp = *alpha * b[l + j * b_dim1];
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L70: */
+			}
+		    }
+/* L80: */
+		}
+/* L90: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A'*B + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+/* L100: */
+		    }
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L110: */
+		}
+/* L120: */
+	    }
+	}
+    } else {
+	if (nota) {
+
+/*           Form  C := alpha*A*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L130: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L140: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (b[j + l * b_dim1] != 0.) {
+			temp = *alpha * b[j + l * b_dim1];
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L150: */
+			}
+		    }
+/* L160: */
+		}
+/* L170: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A'*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
+/* L180: */
+		    }
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L190: */
+		}
+/* L200: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DGEMM . */
+
+} /* dgemm_ */
+
+/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal *
+	alpha, doublereal *a, integer *lda, doublereal *x, integer *incx,
+	doublereal *beta, doublereal *y, integer *incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static doublereal temp;
+    static integer lenx, leny;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DGEMV  performs one of the matrix-vector operations
+
+       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+
+    where alpha and beta are scalars, x and y are vectors and A is an
+    m by n matrix.
+
+    Parameters
+    ==========
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+
+                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+
+                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+    X      - DOUBLE PRECISION array of DIMENSION at least
+             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+             and at least
+             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+             Before entry, the incremented array X must contain the
+             vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION.
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - DOUBLE PRECISION array of DIMENSION at least
+             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+             and at least
+             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+             Before entry with BETA non-zero, the incremented array Y
+             must contain the vector y. On exit, Y is overwritten by the
+             updated vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
+	    ) {
+	info = 1;
+    } else if (*m < 0) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*lda < max(1,*m)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    } else if (*incy == 0) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("DGEMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
+	return 0;
+    }
+
+/*
+       Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+       up the start points in  X  and  Y.
+*/
+
+    if (lsame_(trans, "N")) {
+	lenx = *n;
+	leny = *m;
+    } else {
+	lenx = *m;
+	leny = *n;
+    }
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (lenx - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (leny - 1) * *incy;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+
+       First form  y := beta*y.
+*/
+
+    if (*beta != 1.) {
+	if (*incy == 1) {
+	    if (*beta == 0.) {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = 0.;
+/* L10: */
+		}
+	    } else {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = *beta * y[i__];
+/* L20: */
+		}
+	    }
+	} else {
+	    iy = ky;
+	    if (*beta == 0.) {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = 0.;
+		    iy += *incy;
+/* L30: */
+		}
+	    } else {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = *beta * y[iy];
+		    iy += *incy;
+/* L40: */
+		}
+	    }
+	}
+    }
+    if (*alpha == 0.) {
+	return 0;
+    }
+    if (lsame_(trans, "N")) {
+
+/*        Form  y := alpha*A*x + y. */
+
+	jx = kx;
+	if (*incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[jx] != 0.) {
+		    temp = *alpha * x[jx];
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			y[i__] += temp * a[i__ + j * a_dim1];
+/* L50: */
+		    }
+		}
+		jx += *incx;
+/* L60: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[jx] != 0.) {
+		    temp = *alpha * x[jx];
+		    iy = ky;
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			y[iy] += temp * a[i__ + j * a_dim1];
+			iy += *incy;
+/* L70: */
+		    }
+		}
+		jx += *incx;
+/* L80: */
+	    }
+	}
+    } else {
+
+/*        Form  y := alpha*A'*x + y. */
+
+	jy = ky;
+	if (*incx == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = 0.;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp += a[i__ + j * a_dim1] * x[i__];
+/* L90: */
+		}
+		y[jy] += *alpha * temp;
+		jy += *incy;
+/* L100: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = 0.;
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp += a[i__ + j * a_dim1] * x[ix];
+		    ix += *incx;
+/* L110: */
+		}
+		y[jy] += *alpha * temp;
+		jy += *incy;
+/* L120: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DGEMV . */
+
+} /* dgemv_ */
+
+/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha,
+	doublereal *x, integer *incx, doublereal *y, integer *incy,
+	doublereal *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, jy, kx, info;
+    static doublereal temp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DGER   performs the rank 1 operation
+
+       A := alpha*x*y' + A,
+
+    where alpha is a scalar, x is an m element vector, y is an n element
+    vector and A is an m by n matrix.
+
+    Parameters
+    ==========
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( m - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the m
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients. On exit, A is
+             overwritten by the updated matrix.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (*m < 0) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("DGER  ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0 || *alpha == 0.) {
+	return 0;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (*incy > 0) {
+	jy = 1;
+    } else {
+	jy = 1 - (*n - 1) * *incy;
+    }
+    if (*incx == 1) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (y[jy] != 0.) {
+		temp = *alpha * y[jy];
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    a[i__ + j * a_dim1] += x[i__] * temp;
+/* L10: */
+		}
+	    }
+	    jy += *incy;
+/* L20: */
+	}
+    } else {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*m - 1) * *incx;
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (y[jy] != 0.) {
+		temp = *alpha * y[jy];
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    a[i__ + j * a_dim1] += x[ix] * temp;
+		    ix += *incx;
+/* L30: */
+		}
+	    }
+	    jy += *incy;
+/* L40: */
+	}
+    }
+
+    return 0;
+
+/*     End of DGER  . */
+
+} /* dger_ */
+
+doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    doublereal ret_val, d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer ix;
+    static doublereal ssq, norm, scale, absxi;
+
+
+/*
+    DNRM2 returns the euclidean norm of a vector via the function
+    name, so that
+
+       DNRM2 := sqrt( x'*x )
+
+
+    -- This version written on 25-October-1982.
+       Modified on 14-October-1993 to inline the call to DLASSQ.
+       Sven Hammarling, Nag Ltd.
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n < 1 || *incx < 1) {
+	norm = 0.;
+    } else if (*n == 1) {
+	norm = abs(x[1]);
+    } else {
+	scale = 0.;
+	ssq = 1.;
+/*
+          The following loop is equivalent to this call to the LAPACK
+          auxiliary routine:
+          CALL DLASSQ( N, X, INCX, SCALE, SSQ )
+*/
+
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    if (x[ix] != 0.) {
+		absxi = (d__1 = x[ix], abs(d__1));
+		if (scale < absxi) {
+/* Computing 2nd power */
+		    d__1 = scale / absxi;
+		    ssq = ssq * (d__1 * d__1) + 1.;
+		    scale = absxi;
+		} else {
+/* Computing 2nd power */
+		    d__1 = absxi / scale;
+		    ssq += d__1 * d__1;
+		}
+	    }
+/* L10: */
+	}
+	norm = scale * sqrt(ssq);
+    }
+
+    ret_val = norm;
+    return ret_val;
+
+/*     End of DNRM2. */
+
+} /* dnrm2_ */
+
+/* Subroutine */ int drot_(integer *n, doublereal *dx, integer *incx,
+	doublereal *dy, integer *incy, doublereal *c__, doublereal *s)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static doublereal dtemp;
+
+
+/*
+       applies a plane rotation.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --dy;
+    --dx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+         code for unequal increments or equal increments not equal
+           to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp = *c__ * dx[ix] + *s * dy[iy];
+	dy[iy] = *c__ * dy[iy] - *s * dx[ix];
+	dx[ix] = dtemp;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*       code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp = *c__ * dx[i__] + *s * dy[i__];
+	dy[i__] = *c__ * dy[i__] - *s * dx[i__];
+	dx[i__] = dtemp;
+/* L30: */
+    }
+    return 0;
+} /* drot_ */
+
+/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx,
+	integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Local variables */
+    static integer i__, m, mp1, nincx;
+
+
+/*
+       scales a vector by a constant.
+       uses unrolled loops for increment equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --dx;
+
+    /* Function Body */
+    if (*n <= 0 || *incx <= 0) {
+	return 0;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    nincx = *n * *incx;
+    i__1 = nincx;
+    i__2 = *incx;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	dx[i__] = *da * dx[i__];
+/* L10: */
+    }
+    return 0;
+
+/*
+          code for increment equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 5;
+    if (m == 0) {
+	goto L40;
+    }
+    i__2 = m;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+	dx[i__] = *da * dx[i__];
+/* L30: */
+    }
+    if (*n < 5) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__2 = *n;
+    for (i__ = mp1; i__ <= i__2; i__ += 5) {
+	dx[i__] = *da * dx[i__];
+	dx[i__ + 1] = *da * dx[i__ + 1];
+	dx[i__ + 2] = *da * dx[i__ + 2];
+	dx[i__ + 3] = *da * dx[i__ + 3];
+	dx[i__ + 4] = *da * dx[i__ + 4];
+/* L50: */
+    }
+    return 0;
+} /* dscal_ */
+
+/* Subroutine */ int dswap_(integer *n, doublereal *dx, integer *incx,
+	doublereal *dy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+    static doublereal dtemp;
+
+
+/*
+       interchanges two vectors.
+       uses unrolled loops for increments equal one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --dy;
+    --dx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+         code for unequal increments or equal increments not equal
+           to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp = dx[ix];
+	dx[ix] = dy[iy];
+	dy[iy] = dtemp;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*
+         code for both increments equal to 1
+
+
+         clean-up loop
+*/
+
+L20:
+    m = *n % 3;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp = dx[i__];
+	dx[i__] = dy[i__];
+	dy[i__] = dtemp;
+/* L30: */
+    }
+    if (*n < 3) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 3) {
+	dtemp = dx[i__];
+	dx[i__] = dy[i__];
+	dy[i__] = dtemp;
+	dtemp = dx[i__ + 1];
+	dx[i__ + 1] = dy[i__ + 1];
+	dy[i__ + 1] = dtemp;
+	dtemp = dx[i__ + 2];
+	dx[i__ + 2] = dy[i__ + 2];
+	dy[i__ + 2] = dtemp;
+/* L50: */
+    }
+    return 0;
+} /* dswap_ */
+
+/* Subroutine */ int dsymv_(char *uplo, integer *n, doublereal *alpha,
+	doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal
+	*beta, doublereal *y, integer *incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static doublereal temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DSYMV  performs the matrix-vector  operation
+
+       y := alpha*A*x + beta*y,
+
+    where alpha and beta are scalars, x and y are n element vectors and
+    A is an n by n symmetric matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the symmetric matrix and the strictly
+             lower triangular part of A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the symmetric matrix and the strictly
+             upper triangular part of A is not referenced.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION.
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y. On exit, Y is overwritten by the updated
+             vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*lda < max(1,*n)) {
+	info = 5;
+    } else if (*incx == 0) {
+	info = 7;
+    } else if (*incy == 0) {
+	info = 10;
+    }
+    if (info != 0) {
+	xerbla_("DSYMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0 || *alpha == 0. && *beta == 1.) {
+	return 0;
+    }
+
+/*     Set up the start points in  X  and  Y. */
+
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (*n - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (*n - 1) * *incy;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through the triangular part
+       of A.
+
+       First form  y := beta*y.
+*/
+
+    if (*beta != 1.) {
+	if (*incy == 1) {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = 0.;
+/* L10: */
+		}
+	    } else {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = *beta * y[i__];
+/* L20: */
+		}
+	    }
+	} else {
+	    iy = ky;
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = 0.;
+		    iy += *incy;
+/* L30: */
+		}
+	    } else {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = *beta * y[iy];
+		    iy += *incy;
+/* L40: */
+		}
+	    }
+	}
+    }
+    if (*alpha == 0.) {
+	return 0;
+    }
+    if (lsame_(uplo, "U")) {
+
+/*        Form  y  when A is stored in upper triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[j];
+		temp2 = 0.;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    y[i__] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[i__];
+/* L50: */
+		}
+		y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
+/* L60: */
+	    }
+	} else {
+	    jx = kx;
+	    jy = ky;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[jx];
+		temp2 = 0.;
+		ix = kx;
+		iy = ky;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    y[iy] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[ix];
+		    ix += *incx;
+		    iy += *incy;
+/* L70: */
+		}
+		y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
+		jx += *incx;
+		jy += *incy;
+/* L80: */
+	    }
+	}
+    } else {
+
+/*        Form  y  when A is stored in lower triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[j];
+		temp2 = 0.;
+		y[j] += temp1 * a[j + j * a_dim1];
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    y[i__] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[i__];
+/* L90: */
+		}
+		y[j] += *alpha * temp2;
+/* L100: */
+	    }
+	} else {
+	    jx = kx;
+	    jy = ky;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[jx];
+		temp2 = 0.;
+		y[jy] += temp1 * a[j + j * a_dim1];
+		ix = jx;
+		iy = jy;
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    ix += *incx;
+		    iy += *incy;
+		    y[iy] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[ix];
+/* L110: */
+		}
+		y[jy] += *alpha * temp2;
+		jx += *incx;
+		jy += *incy;
+/* L120: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DSYMV . */
+
+} /* dsymv_ */
+
+/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha,
+	doublereal *x, integer *incx, doublereal *y, integer *incy,
+	doublereal *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static doublereal temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DSYR2  performs the symmetric rank 2 operation
+
+       A := alpha*x*y' + alpha*y*x' + A,
+
+    where alpha is a scalar, x and y are n element vectors and A is an n
+    by n symmetric matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the symmetric matrix and the strictly
+             lower triangular part of A is not referenced. On exit, the
+             upper triangular part of the array A is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the symmetric matrix and the strictly
+             upper triangular part of A is not referenced. On exit, the
+             lower triangular part of the array A is overwritten by the
+             lower triangular part of the updated matrix.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*n)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("DSYR2 ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0 || *alpha == 0.) {
+	return 0;
+    }
+
+/*
+       Set up the start points in X and Y if the increments are not both
+       unity.
+*/
+
+    if (*incx != 1 || *incy != 1) {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*n - 1) * *incx;
+	}
+	if (*incy > 0) {
+	    ky = 1;
+	} else {
+	    ky = 1 - (*n - 1) * *incy;
+	}
+	jx = kx;
+	jy = ky;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through the triangular part
+       of A.
+*/
+
+    if (lsame_(uplo, "U")) {
+
+/*        Form  A  when A is stored in the upper triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[j] != 0. || y[j] != 0.) {
+		    temp1 = *alpha * y[j];
+		    temp2 = *alpha * x[j];
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
+				temp1 + y[i__] * temp2;
+/* L10: */
+		    }
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[jx] != 0. || y[jy] != 0.) {
+		    temp1 = *alpha * y[jy];
+		    temp2 = *alpha * x[jx];
+		    ix = kx;
+		    iy = ky;
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
+				temp1 + y[iy] * temp2;
+			ix += *incx;
+			iy += *incy;
+/* L30: */
+		    }
+		}
+		jx += *incx;
+		jy += *incy;
+/* L40: */
+	    }
+	}
+    } else {
+
+/*        Form  A  when A is stored in the lower triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[j] != 0. || y[j] != 0.) {
+		    temp1 = *alpha * y[j];
+		    temp2 = *alpha * x[j];
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
+				temp1 + y[i__] * temp2;
+/* L50: */
+		    }
+		}
+/* L60: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[jx] != 0. || y[jy] != 0.) {
+		    temp1 = *alpha * y[jy];
+		    temp2 = *alpha * x[jx];
+		    ix = jx;
+		    iy = jy;
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
+				temp1 + y[iy] * temp2;
+			ix += *incx;
+			iy += *incy;
+/* L70: */
+		    }
+		}
+		jx += *incx;
+		jy += *incy;
+/* L80: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DSYR2 . */
+
+} /* dsyr2_ */
+
+/* Subroutine */ int dsyr2k_(char *uplo, char *trans, integer *n, integer *k,
+	doublereal *alpha, doublereal *a, integer *lda, doublereal *b,
+	integer *ldb, doublereal *beta, doublereal *c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3;
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static doublereal temp1, temp2;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DSYR2K  performs one of the symmetric rank 2k operations
+
+       C := alpha*A*B' + alpha*B*A' + beta*C,
+
+    or
+
+       C := alpha*A'*B + alpha*B'*A + beta*C,
+
+    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+    and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+    matrices in the second case.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On  entry,   UPLO  specifies  whether  the  upper  or  lower
+             triangular  part  of the  array  C  is to be  referenced  as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry,  TRANS  specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +
+                                          beta*C.
+
+                TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +
+                                          beta*C.
+
+                TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +
+                                          beta*C.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N specifies the order of the matrix C.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+             of  columns  of the  matrices  A and B,  and on  entry  with
+             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+             of rows of the matrices  A and B.  K must be at least  zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by n  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDA must be at least  max( 1, n ), otherwise  LDA must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  B  must contain the matrix  B,  otherwise
+             the leading  k by n  part of the array  B  must contain  the
+             matrix B.
+             Unchanged on exit.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDB must be at least  max( 1, n ), otherwise  LDB must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION.
+             On entry, BETA specifies the scalar beta.
+             Unchanged on exit.
+
+    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+             upper triangular part of the array C must contain the upper
+             triangular part  of the  symmetric matrix  and the strictly
+             lower triangular part of C is not referenced.  On exit, the
+             upper triangular part of the array  C is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+             lower triangular part of the array C must contain the lower
+             triangular part  of the  symmetric matrix  and the strictly
+             upper triangular part of C is not referenced.  On exit, the
+             lower triangular part of the array  C is overwritten by the
+             lower triangular part of the updated matrix.
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    if (lsame_(trans, "N")) {
+	nrowa = *n;
+    } else {
+	nrowa = *k;
+    }
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! upper && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*k < 0) {
+	info = 4;
+    } else if (*lda < max(1,nrowa)) {
+	info = 7;
+    } else if (*ldb < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldc < max(1,*n)) {
+	info = 12;
+    }
+    if (info != 0) {
+	xerbla_("DSYR2K", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.) {
+	if (upper) {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L10: */
+		    }
+/* L20: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L30: */
+		    }
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L90: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L100: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) {
+			temp1 = *alpha * b[j + l * b_dim1];
+			temp2 = *alpha * a[j + l * a_dim1];
+			i__3 = j;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
+				    i__ + l * a_dim1] * temp1 + b[i__ + l *
+				    b_dim1] * temp2;
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L140: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L150: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) {
+			temp1 = *alpha * b[j + l * b_dim1];
+			temp2 = *alpha * a[j + l * a_dim1];
+			i__3 = *n;
+			for (i__ = j; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
+				    i__ + l * a_dim1] * temp1 + b[i__ + l *
+				    b_dim1] * temp2;
+/* L160: */
+			}
+		    }
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+    } else {
+
+/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp1 = 0.;
+		    temp2 = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
+/* L190: */
+		    }
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
+				temp2;
+		    } else {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
+				+ *alpha * temp1 + *alpha * temp2;
+		    }
+/* L200: */
+		}
+/* L210: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+		    temp1 = 0.;
+		    temp2 = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
+/* L220: */
+		    }
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
+				temp2;
+		    } else {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
+				+ *alpha * temp1 + *alpha * temp2;
+		    }
+/* L230: */
+		}
+/* L240: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DSYR2K. */
+
+} /* dsyr2k_ */
+
+/* Subroutine */ int dsyrk_(char *uplo, char *trans, integer *n, integer *k,
+	doublereal *alpha, doublereal *a, integer *lda, doublereal *beta,
+	doublereal *c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static doublereal temp;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DSYRK  performs one of the symmetric rank k operations
+
+       C := alpha*A*A' + beta*C,
+
+    or
+
+       C := alpha*A'*A + beta*C,
+
+    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+    and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+    in the second case.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On  entry,   UPLO  specifies  whether  the  upper  or  lower
+             triangular  part  of the  array  C  is to be  referenced  as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry,  TRANS  specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
+
+                TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+
+                TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N specifies the order of the matrix C.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+             of  columns   of  the   matrix   A,   and  on   entry   with
+             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+             of rows of the matrix  A.  K must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by n  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDA must be at least  max( 1, n ), otherwise  LDA must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION.
+             On entry, BETA specifies the scalar beta.
+             Unchanged on exit.
+
+    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+             upper triangular part of the array C must contain the upper
+             triangular part  of the  symmetric matrix  and the strictly
+             lower triangular part of C is not referenced.  On exit, the
+             upper triangular part of the array  C is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+             lower triangular part of the array C must contain the lower
+             triangular part  of the  symmetric matrix  and the strictly
+             upper triangular part of C is not referenced.  On exit, the
+             lower triangular part of the array  C is overwritten by the
+             lower triangular part of the updated matrix.
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    if (lsame_(trans, "N")) {
+	nrowa = *n;
+    } else {
+	nrowa = *k;
+    }
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! upper && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*k < 0) {
+	info = 4;
+    } else if (*lda < max(1,nrowa)) {
+	info = 7;
+    } else if (*ldc < max(1,*n)) {
+	info = 10;
+    }
+    if (info != 0) {
+	xerbla_("DSYRK ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.) {
+	if (upper) {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L10: */
+		    }
+/* L20: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L30: */
+		    }
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  C := alpha*A*A' + beta*C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L90: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L100: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (a[j + l * a_dim1] != 0.) {
+			temp = *alpha * a[j + l * a_dim1];
+			i__3 = j;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L140: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L150: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (a[j + l * a_dim1] != 0.) {
+			temp = *alpha * a[j + l * a_dim1];
+			i__3 = *n;
+			for (i__ = j; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L160: */
+			}
+		    }
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+    } else {
+
+/*        Form  C := alpha*A'*A + beta*C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
+/* L190: */
+		    }
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L200: */
+		}
+/* L210: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+		    temp = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
+/* L220: */
+		    }
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L230: */
+		}
+/* L240: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DSYRK . */
+
+} /* dsyrk_ */
+
+/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
+	lda, doublereal *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, k, info;
+    static doublereal temp;
+    static logical lside;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    Purpose
+    =======
+
+    DTRMM  performs one of the matrix-matrix operations
+
+       B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
+
+    where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+       op( A ) = A   or   op( A ) = A'.
+
+    Parameters
+    ==========
+
+    SIDE   - CHARACTER*1.
+             On entry,  SIDE specifies whether  op( A ) multiplies B from
+             the left or right as follows:
+
+                SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+
+                SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+
+             Unchanged on exit.
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix A is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n'   op( A ) = A.
+
+                TRANSA = 'T' or 't'   op( A ) = A'.
+
+                TRANSA = 'C' or 'c'   op( A ) = A'.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit triangular
+             as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of B. M must be at
+             least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of B.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+             zero then  A is not referenced and  B need not be set before
+             entry.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
+             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+             upper triangular part of the array  A must contain the upper
+             triangular matrix  and the strictly lower triangular part of
+             A is not referenced.
+             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+             lower triangular part of the array  A must contain the lower
+             triangular matrix  and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+             A  are not referenced either,  but are assumed to be  unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+             then LDA must be at least max( 1, n ).
+             Unchanged on exit.
+
+    B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+             Before entry,  the leading  m by n part of the array  B must
+             contain the matrix  B,  and  on exit  is overwritten  by the
+             transformed matrix.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   LDB  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    lside = lsame_(side, "L");
+    if (lside) {
+	nrowa = *m;
+    } else {
+	nrowa = *n;
+    }
+    nounit = lsame_(diag, "N");
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! lside && ! lsame_(side, "R")) {
+	info = 1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	info = 2;
+    } else if (! lsame_(transa, "N") && ! lsame_(transa,
+	     "T") && ! lsame_(transa, "C")) {
+	info = 3;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 4;
+    } else if (*m < 0) {
+	info = 5;
+    } else if (*n < 0) {
+	info = 6;
+    } else if (*lda < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldb < max(1,*m)) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("DTRMM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lside) {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*A*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (k = 1; k <= i__2; ++k) {
+			if (b[k + j * b_dim1] != 0.) {
+			    temp = *alpha * b[k + j * b_dim1];
+			    i__3 = k - 1;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] += temp * a[i__ + k *
+					a_dim1];
+/* L30: */
+			    }
+			    if (nounit) {
+				temp *= a[k + k * a_dim1];
+			    }
+			    b[k + j * b_dim1] = temp;
+			}
+/* L40: */
+		    }
+/* L50: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (k = *m; k >= 1; --k) {
+			if (b[k + j * b_dim1] != 0.) {
+			    temp = *alpha * b[k + j * b_dim1];
+			    b[k + j * b_dim1] = temp;
+			    if (nounit) {
+				b[k + j * b_dim1] *= a[k + k * a_dim1];
+			    }
+			    i__2 = *m;
+			    for (i__ = k + 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] += temp * a[i__ + k *
+					a_dim1];
+/* L60: */
+			    }
+			}
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*A'*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (i__ = *m; i__ >= 1; --i__) {
+			temp = b[i__ + j * b_dim1];
+			if (nounit) {
+			    temp *= a[i__ + i__ * a_dim1];
+			}
+			i__2 = i__ - 1;
+			for (k = 1; k <= i__2; ++k) {
+			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L90: */
+			}
+			b[i__ + j * b_dim1] = *alpha * temp;
+/* L100: */
+		    }
+/* L110: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			temp = b[i__ + j * b_dim1];
+			if (nounit) {
+			    temp *= a[i__ + i__ * a_dim1];
+			}
+			i__3 = *m;
+			for (k = i__ + 1; k <= i__3; ++k) {
+			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L120: */
+			}
+			b[i__ + j * b_dim1] = *alpha * temp;
+/* L130: */
+		    }
+/* L140: */
+		}
+	    }
+	}
+    } else {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*B*A. */
+
+	    if (upper) {
+		for (j = *n; j >= 1; --j) {
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__1 = *m;
+		    for (i__ = 1; i__ <= i__1; ++i__) {
+			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L150: */
+		    }
+		    i__1 = j - 1;
+		    for (k = 1; k <= i__1; ++k) {
+			if (a[k + j * a_dim1] != 0.) {
+			    temp = *alpha * a[k + j * a_dim1];
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L160: */
+			    }
+			}
+/* L170: */
+		    }
+/* L180: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L190: */
+		    }
+		    i__2 = *n;
+		    for (k = j + 1; k <= i__2; ++k) {
+			if (a[k + j * a_dim1] != 0.) {
+			    temp = *alpha * a[k + j * a_dim1];
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L200: */
+			    }
+			}
+/* L210: */
+		    }
+/* L220: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*B*A'. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (k = 1; k <= i__1; ++k) {
+		    i__2 = k - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			if (a[j + k * a_dim1] != 0.) {
+			    temp = *alpha * a[j + k * a_dim1];
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L230: */
+			    }
+			}
+/* L240: */
+		    }
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[k + k * a_dim1];
+		    }
+		    if (temp != 1.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L250: */
+			}
+		    }
+/* L260: */
+		}
+	    } else {
+		for (k = *n; k >= 1; --k) {
+		    i__1 = *n;
+		    for (j = k + 1; j <= i__1; ++j) {
+			if (a[j + k * a_dim1] != 0.) {
+			    temp = *alpha * a[j + k * a_dim1];
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L270: */
+			    }
+			}
+/* L280: */
+		    }
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[k + k * a_dim1];
+		    }
+		    if (temp != 1.) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L290: */
+			}
+		    }
+/* L300: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DTRMM . */
+
+} /* dtrmm_ */
+
+/* Subroutine */ int dtrmv_(char *uplo, char *trans, char *diag, integer *n,
+	doublereal *a, integer *lda, doublereal *x, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, jx, kx, info;
+    static doublereal temp;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    Purpose
+    =======
+
+    DTRMV  performs one of the matrix-vector operations
+
+       x := A*x,   or   x := A'*x,
+
+    where x is an n element vector and  A is an n by n unit, or non-unit,
+    upper or lower triangular matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   x := A*x.
+
+                TRANS = 'T' or 't'   x := A'*x.
+
+                TRANS = 'C' or 'c'   x := A'*x.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit
+             triangular as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular matrix and the strictly lower triangular part of
+             A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular matrix and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u', the diagonal elements of
+             A are not referenced either, but are assumed to be unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x. On exit, X is overwritten with the
+             tranformed vector x.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*lda < max(1,*n)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    }
+    if (info != 0) {
+	xerbla_("DTRMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    nounit = lsame_(diag, "N");
+
+/*
+       Set up the start point in X if the increment is not unity. This
+       will be  ( N - 1 )*INCX  too small for descending loops.
+*/
+
+    if (*incx <= 0) {
+	kx = 1 - (*n - 1) * *incx;
+    } else if (*incx != 1) {
+	kx = 1;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  x := A*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (x[j] != 0.) {
+			temp = x[j];
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    x[i__] += temp * a[i__ + j * a_dim1];
+/* L10: */
+			}
+			if (nounit) {
+			    x[j] *= a[j + j * a_dim1];
+			}
+		    }
+/* L20: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (x[jx] != 0.) {
+			temp = x[jx];
+			ix = kx;
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    x[ix] += temp * a[i__ + j * a_dim1];
+			    ix += *incx;
+/* L30: */
+			}
+			if (nounit) {
+			    x[jx] *= a[j + j * a_dim1];
+			}
+		    }
+		    jx += *incx;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    if (x[j] != 0.) {
+			temp = x[j];
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    x[i__] += temp * a[i__ + j * a_dim1];
+/* L50: */
+			}
+			if (nounit) {
+			    x[j] *= a[j + j * a_dim1];
+			}
+		    }
+/* L60: */
+		}
+	    } else {
+		kx += (*n - 1) * *incx;
+		jx = kx;
+		for (j = *n; j >= 1; --j) {
+		    if (x[jx] != 0.) {
+			temp = x[jx];
+			ix = kx;
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    x[ix] += temp * a[i__ + j * a_dim1];
+			    ix -= *incx;
+/* L70: */
+			}
+			if (nounit) {
+			    x[jx] *= a[j + j * a_dim1];
+			}
+		    }
+		    jx -= *incx;
+/* L80: */
+		}
+	    }
+	}
+    } else {
+
+/*        Form  x := A'*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    temp = x[j];
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    for (i__ = j - 1; i__ >= 1; --i__) {
+			temp += a[i__ + j * a_dim1] * x[i__];
+/* L90: */
+		    }
+		    x[j] = temp;
+/* L100: */
+		}
+	    } else {
+		jx = kx + (*n - 1) * *incx;
+		for (j = *n; j >= 1; --j) {
+		    temp = x[jx];
+		    ix = jx;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    for (i__ = j - 1; i__ >= 1; --i__) {
+			ix -= *incx;
+			temp += a[i__ + j * a_dim1] * x[ix];
+/* L110: */
+		    }
+		    x[jx] = temp;
+		    jx -= *incx;
+/* L120: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = x[j];
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			temp += a[i__ + j * a_dim1] * x[i__];
+/* L130: */
+		    }
+		    x[j] = temp;
+/* L140: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = x[jx];
+		    ix = jx;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			ix += *incx;
+			temp += a[i__ + j * a_dim1] * x[ix];
+/* L150: */
+		    }
+		    x[jx] = temp;
+		    jx += *incx;
+/* L160: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DTRMV . */
+
+} /* dtrmv_ */
+
+/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
+	lda, doublereal *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, k, info;
+    static doublereal temp;
+    static logical lside;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    Purpose
+    =======
+
+    DTRSM  solves one of the matrix equations
+
+       op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+
+    where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+       op( A ) = A   or   op( A ) = A'.
+
+    The matrix X is overwritten on B.
+
+    Parameters
+    ==========
+
+    SIDE   - CHARACTER*1.
+             On entry, SIDE specifies whether op( A ) appears on the left
+             or right of X as follows:
+
+                SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+
+                SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+
+             Unchanged on exit.
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix A is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n'   op( A ) = A.
+
+                TRANSA = 'T' or 't'   op( A ) = A'.
+
+                TRANSA = 'C' or 'c'   op( A ) = A'.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit triangular
+             as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of B. M must be at
+             least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of B.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+             zero then  A is not referenced and  B need not be set before
+             entry.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
+             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+             upper triangular part of the array  A must contain the upper
+             triangular matrix  and the strictly lower triangular part of
+             A is not referenced.
+             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+             lower triangular part of the array  A must contain the lower
+             triangular matrix  and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+             A  are not referenced either,  but are assumed to be  unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+             then LDA must be at least max( 1, n ).
+             Unchanged on exit.
+
+    B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+             Before entry,  the leading  m by n part of the array  B must
+             contain  the  right-hand  side  matrix  B,  and  on exit  is
+             overwritten by the solution matrix  X.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   LDB  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    lside = lsame_(side, "L");
+    if (lside) {
+	nrowa = *m;
+    } else {
+	nrowa = *n;
+    }
+    nounit = lsame_(diag, "N");
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! lside && ! lsame_(side, "R")) {
+	info = 1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	info = 2;
+    } else if (! lsame_(transa, "N") && ! lsame_(transa,
+	     "T") && ! lsame_(transa, "C")) {
+	info = 3;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 4;
+    } else if (*m < 0) {
+	info = 5;
+    } else if (*n < 0) {
+	info = 6;
+    } else if (*lda < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldb < max(1,*m)) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("DTRSM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lside) {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*inv( A )*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (*alpha != 1.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L30: */
+			}
+		    }
+		    for (k = *m; k >= 1; --k) {
+			if (b[k + j * b_dim1] != 0.) {
+			    if (nounit) {
+				b[k + j * b_dim1] /= a[k + k * a_dim1];
+			    }
+			    i__2 = k - 1;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
+					i__ + k * a_dim1];
+/* L40: */
+			    }
+			}
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (*alpha != 1.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L70: */
+			}
+		    }
+		    i__2 = *m;
+		    for (k = 1; k <= i__2; ++k) {
+			if (b[k + j * b_dim1] != 0.) {
+			    if (nounit) {
+				b[k + j * b_dim1] /= a[k + k * a_dim1];
+			    }
+			    i__3 = *m;
+			    for (i__ = k + 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
+					i__ + k * a_dim1];
+/* L80: */
+			    }
+			}
+/* L90: */
+		    }
+/* L100: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*inv( A' )*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			temp = *alpha * b[i__ + j * b_dim1];
+			i__3 = i__ - 1;
+			for (k = 1; k <= i__3; ++k) {
+			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L110: */
+			}
+			if (nounit) {
+			    temp /= a[i__ + i__ * a_dim1];
+			}
+			b[i__ + j * b_dim1] = temp;
+/* L120: */
+		    }
+/* L130: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (i__ = *m; i__ >= 1; --i__) {
+			temp = *alpha * b[i__ + j * b_dim1];
+			i__2 = *m;
+			for (k = i__ + 1; k <= i__2; ++k) {
+			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L140: */
+			}
+			if (nounit) {
+			    temp /= a[i__ + i__ * a_dim1];
+			}
+			b[i__ + j * b_dim1] = temp;
+/* L150: */
+		    }
+/* L160: */
+		}
+	    }
+	}
+    } else {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*B*inv( A ). */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (*alpha != 1.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L170: */
+			}
+		    }
+		    i__2 = j - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			if (a[k + j * a_dim1] != 0.) {
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
+					i__ + k * b_dim1];
+/* L180: */
+			    }
+			}
+/* L190: */
+		    }
+		    if (nounit) {
+			temp = 1. / a[j + j * a_dim1];
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L200: */
+			}
+		    }
+/* L210: */
+		}
+	    } else {
+		for (j = *n; j >= 1; --j) {
+		    if (*alpha != 1.) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L220: */
+			}
+		    }
+		    i__1 = *n;
+		    for (k = j + 1; k <= i__1; ++k) {
+			if (a[k + j * a_dim1] != 0.) {
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
+					i__ + k * b_dim1];
+/* L230: */
+			    }
+			}
+/* L240: */
+		    }
+		    if (nounit) {
+			temp = 1. / a[j + j * a_dim1];
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L250: */
+			}
+		    }
+/* L260: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*B*inv( A' ). */
+
+	    if (upper) {
+		for (k = *n; k >= 1; --k) {
+		    if (nounit) {
+			temp = 1. / a[k + k * a_dim1];
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L270: */
+			}
+		    }
+		    i__1 = k - 1;
+		    for (j = 1; j <= i__1; ++j) {
+			if (a[j + k * a_dim1] != 0.) {
+			    temp = a[j + k * a_dim1];
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] -= temp * b[i__ + k *
+					b_dim1];
+/* L280: */
+			    }
+			}
+/* L290: */
+		    }
+		    if (*alpha != 1.) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
+				    ;
+/* L300: */
+			}
+		    }
+/* L310: */
+		}
+	    } else {
+		i__1 = *n;
+		for (k = 1; k <= i__1; ++k) {
+		    if (nounit) {
+			temp = 1. / a[k + k * a_dim1];
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L320: */
+			}
+		    }
+		    i__2 = *n;
+		    for (j = k + 1; j <= i__2; ++j) {
+			if (a[j + k * a_dim1] != 0.) {
+			    temp = a[j + k * a_dim1];
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] -= temp * b[i__ + k *
+					b_dim1];
+/* L330: */
+			    }
+			}
+/* L340: */
+		    }
+		    if (*alpha != 1.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
+				    ;
+/* L350: */
+			}
+		    }
+/* L360: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DTRSM . */
+
+} /* dtrsm_ */
+
+doublereal dzasum_(integer *n, doublecomplex *zx, integer *incx)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal ret_val;
+
+    /* Local variables */
+    static integer i__, ix;
+    static doublereal stemp;
+    extern doublereal dcabs1_(doublecomplex *);
+
+
+/*
+       takes the sum of the absolute values.
+       jack dongarra, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --zx;
+
+    /* Function Body */
+    ret_val = 0.;
+    stemp = 0.;
+    if (*n <= 0 || *incx <= 0) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    ix = 1;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp += dcabs1_(&zx[ix]);
+	ix += *incx;
+/* L10: */
+    }
+    ret_val = stemp;
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp += dcabs1_(&zx[i__]);
+/* L30: */
+    }
+    ret_val = stemp;
+    return ret_val;
+} /* dzasum_ */
+
+doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    doublereal ret_val, d__1;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer ix;
+    static doublereal ssq, temp, norm, scale;
+
+
+/*
+    DZNRM2 returns the euclidean norm of a vector via the function
+    name, so that
+
+       DZNRM2 := sqrt( conjg( x' )*x )
+
+
+    -- This version written on 25-October-1982.
+       Modified on 14-October-1993 to inline the call to ZLASSQ.
+       Sven Hammarling, Nag Ltd.
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n < 1 || *incx < 1) {
+	norm = 0.;
+    } else {
+	scale = 0.;
+	ssq = 1.;
+/*
+          The following loop is equivalent to this call to the LAPACK
+          auxiliary routine:
+          CALL ZLASSQ( N, X, INCX, SCALE, SSQ )
+*/
+
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    i__3 = ix;
+	    if (x[i__3].r != 0.) {
+		i__3 = ix;
+		temp = (d__1 = x[i__3].r, abs(d__1));
+		if (scale < temp) {
+/* Computing 2nd power */
+		    d__1 = scale / temp;
+		    ssq = ssq * (d__1 * d__1) + 1.;
+		    scale = temp;
+		} else {
+/* Computing 2nd power */
+		    d__1 = temp / scale;
+		    ssq += d__1 * d__1;
+		}
+	    }
+	    if (d_imag(&x[ix]) != 0.) {
+		temp = (d__1 = d_imag(&x[ix]), abs(d__1));
+		if (scale < temp) {
+/* Computing 2nd power */
+		    d__1 = scale / temp;
+		    ssq = ssq * (d__1 * d__1) + 1.;
+		    scale = temp;
+		} else {
+/* Computing 2nd power */
+		    d__1 = temp / scale;
+		    ssq += d__1 * d__1;
+		}
+	    }
+/* L10: */
+	}
+	norm = scale * sqrt(ssq);
+    }
+
+    ret_val = norm;
+    return ret_val;
+
+/*     End of DZNRM2. */
+
+} /* dznrm2_ */
+
+integer icamax_(integer *n, complex *cx, integer *incx)
+{
+    /* System generated locals */
+    integer ret_val, i__1, i__2;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer i__, ix;
+    static real smax;
+
+
+/*
+       finds the index of element having max. absolute value.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cx;
+
+    /* Function Body */
+    ret_val = 0;
+    if (*n < 1 || *incx <= 0) {
+	return ret_val;
+    }
+    ret_val = 1;
+    if (*n == 1) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    ix = 1;
+    smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2));
+    ix += *incx;
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]), dabs(
+		r__2)) <= smax) {
+	    goto L5;
+	}
+	ret_val = i__;
+	i__2 = ix;
+	smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]),
+		dabs(r__2));
+L5:
+	ix += *incx;
+/* L10: */
+    }
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2));
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]), dabs(
+		r__2)) <= smax) {
+	    goto L30;
+	}
+	ret_val = i__;
+	i__2 = i__;
+	smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]),
+		dabs(r__2));
+L30:
+	;
+    }
+    return ret_val;
+} /* icamax_ */
+
+integer idamax_(integer *n, doublereal *dx, integer *incx)
+{
+    /* System generated locals */
+    integer ret_val, i__1;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer i__, ix;
+    static doublereal dmax__;
+
+
+/*
+       finds the index of element having max. absolute value.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --dx;
+
+    /* Function Body */
+    ret_val = 0;
+    if (*n < 1 || *incx <= 0) {
+	return ret_val;
+    }
+    ret_val = 1;
+    if (*n == 1) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    ix = 1;
+    dmax__ = abs(dx[1]);
+    ix += *incx;
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if ((d__1 = dx[ix], abs(d__1)) <= dmax__) {
+	    goto L5;
+	}
+	ret_val = i__;
+	dmax__ = (d__1 = dx[ix], abs(d__1));
+L5:
+	ix += *incx;
+/* L10: */
+    }
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    dmax__ = abs(dx[1]);
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if ((d__1 = dx[i__], abs(d__1)) <= dmax__) {
+	    goto L30;
+	}
+	ret_val = i__;
+	dmax__ = (d__1 = dx[i__], abs(d__1));
+L30:
+	;
+    }
+    return ret_val;
+} /* idamax_ */
+
+integer isamax_(integer *n, real *sx, integer *incx)
+{
+    /* System generated locals */
+    integer ret_val, i__1;
+    real r__1;
+
+    /* Local variables */
+    static integer i__, ix;
+    static real smax;
+
+
+/*
+       finds the index of element having max. absolute value.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sx;
+
+    /* Function Body */
+    ret_val = 0;
+    if (*n < 1 || *incx <= 0) {
+	return ret_val;
+    }
+    ret_val = 1;
+    if (*n == 1) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    ix = 1;
+    smax = dabs(sx[1]);
+    ix += *incx;
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if ((r__1 = sx[ix], dabs(r__1)) <= smax) {
+	    goto L5;
+	}
+	ret_val = i__;
+	smax = (r__1 = sx[ix], dabs(r__1));
+L5:
+	ix += *incx;
+/* L10: */
+    }
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    smax = dabs(sx[1]);
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if ((r__1 = sx[i__], dabs(r__1)) <= smax) {
+	    goto L30;
+	}
+	ret_val = i__;
+	smax = (r__1 = sx[i__], dabs(r__1));
+L30:
+	;
+    }
+    return ret_val;
+} /* isamax_ */
+
+integer izamax_(integer *n, doublecomplex *zx, integer *incx)
+{
+    /* System generated locals */
+    integer ret_val, i__1;
+
+    /* Local variables */
+    static integer i__, ix;
+    static doublereal smax;
+    extern doublereal dcabs1_(doublecomplex *);
+
+
+/*
+       finds the index of element having max. absolute value.
+       jack dongarra, 1/15/85.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --zx;
+
+    /* Function Body */
+    ret_val = 0;
+    if (*n < 1 || *incx <= 0) {
+	return ret_val;
+    }
+    ret_val = 1;
+    if (*n == 1) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    ix = 1;
+    smax = dcabs1_(&zx[1]);
+    ix += *incx;
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if (dcabs1_(&zx[ix]) <= smax) {
+	    goto L5;
+	}
+	ret_val = i__;
+	smax = dcabs1_(&zx[ix]);
+L5:
+	ix += *incx;
+/* L10: */
+    }
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    smax = dcabs1_(&zx[1]);
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if (dcabs1_(&zx[i__]) <= smax) {
+	    goto L30;
+	}
+	ret_val = i__;
+	smax = dcabs1_(&zx[i__]);
+L30:
+	;
+    }
+    return ret_val;
+} /* izamax_ */
+
+logical lsame_(char *ca, char *cb)
+{
+    /* System generated locals */
+    logical ret_val;
+
+    /* Local variables */
+    static integer inta, intb, zcode;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    LSAME returns .TRUE. if CA is the same letter as CB regardless of
+    case.
+
+    Arguments
+    =========
+
+    CA      (input) CHARACTER*1
+    CB      (input) CHARACTER*1
+            CA and CB specify the single characters to be compared.
+
+   =====================================================================
+
+
+       Test if the characters are equal
+*/
+
+    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
+    if (ret_val) {
+	return ret_val;
+    }
+
+/*     Now test for equivalence if both characters are alphabetic. */
+
+    zcode = 'Z';
+
+/*
+       Use 'Z' rather than 'A' so that ASCII can be detected on Prime
+       machines, on which ICHAR returns a value with bit 8 set.
+       ICHAR('A') on Prime machines returns 193 which is the same as
+       ICHAR('A') on an EBCDIC machine.
+*/
+
+    inta = *(unsigned char *)ca;
+    intb = *(unsigned char *)cb;
+
+    if (zcode == 90 || zcode == 122) {
+
+/*
+          ASCII is assumed - ZCODE is the ASCII code of either lower or
+          upper case 'Z'.
+*/
+
+	if (inta >= 97 && inta <= 122) {
+	    inta += -32;
+	}
+	if (intb >= 97 && intb <= 122) {
+	    intb += -32;
+	}
+
+    } else if (zcode == 233 || zcode == 169) {
+
+/*
+          EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
+          upper case 'Z'.
+*/
+
+	if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta
+		>= 162 && inta <= 169) {
+	    inta += 64;
+	}
+	if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb
+		>= 162 && intb <= 169) {
+	    intb += 64;
+	}
+
+    } else if (zcode == 218 || zcode == 250) {
+
+/*
+          ASCII is assumed, on Prime machines - ZCODE is the ASCII code
+          plus 128 of either lower or upper case 'Z'.
+*/
+
+	if (inta >= 225 && inta <= 250) {
+	    inta += -32;
+	}
+	if (intb >= 225 && intb <= 250) {
+	    intb += -32;
+	}
+    }
+    ret_val = inta == intb;
+
+/*
+       RETURN
+
+       End of LSAME
+*/
+
+    return ret_val;
+} /* lsame_ */
+
+/* Subroutine */ int saxpy_(integer *n, real *sa, real *sx, integer *incx,
+	real *sy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+
+
+/*
+       constant times a vector plus a vector.
+       uses unrolled loop for increments equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sy;
+    --sx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*sa == 0.f) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	sy[iy] += *sa * sx[ix];
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 4;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	sy[i__] += *sa * sx[i__];
+/* L30: */
+    }
+    if (*n < 4) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 4) {
+	sy[i__] += *sa * sx[i__];
+	sy[i__ + 1] += *sa * sx[i__ + 1];
+	sy[i__ + 2] += *sa * sx[i__ + 2];
+	sy[i__ + 3] += *sa * sx[i__ + 3];
+/* L50: */
+    }
+    return 0;
+} /* saxpy_ */
+
+doublereal scasum_(integer *n, complex *cx, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    real ret_val, r__1, r__2;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer i__, nincx;
+    static real stemp;
+
+
+/*
+       takes the sum of the absolute values of a complex vector and
+       returns a single precision result.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cx;
+
+    /* Function Body */
+    ret_val = 0.f;
+    stemp = 0.f;
+    if (*n <= 0 || *incx <= 0) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    nincx = *n * *incx;
+    i__1 = nincx;
+    i__2 = *incx;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	i__3 = i__;
+	stemp = stemp + (r__1 = cx[i__3].r, dabs(r__1)) + (r__2 = r_imag(&cx[
+		i__]), dabs(r__2));
+/* L10: */
+    }
+    ret_val = stemp;
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    i__2 = *n;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+	i__1 = i__;
+	stemp = stemp + (r__1 = cx[i__1].r, dabs(r__1)) + (r__2 = r_imag(&cx[
+		i__]), dabs(r__2));
+/* L30: */
+    }
+    ret_val = stemp;
+    return ret_val;
+} /* scasum_ */
+
+doublereal scnrm2_(integer *n, complex *x, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    real ret_val, r__1;
+
+    /* Builtin functions */
+    double r_imag(complex *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer ix;
+    static real ssq, temp, norm, scale;
+
+
+/*
+    SCNRM2 returns the euclidean norm of a vector via the function
+    name, so that
+
+       SCNRM2 := sqrt( conjg( x' )*x )
+
+
+    -- This version written on 25-October-1982.
+       Modified on 14-October-1993 to inline the call to CLASSQ.
+       Sven Hammarling, Nag Ltd.
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n < 1 || *incx < 1) {
+	norm = 0.f;
+    } else {
+	scale = 0.f;
+	ssq = 1.f;
+/*
+          The following loop is equivalent to this call to the LAPACK
+          auxiliary routine:
+          CALL CLASSQ( N, X, INCX, SCALE, SSQ )
+*/
+
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    i__3 = ix;
+	    if (x[i__3].r != 0.f) {
+		i__3 = ix;
+		temp = (r__1 = x[i__3].r, dabs(r__1));
+		if (scale < temp) {
+/* Computing 2nd power */
+		    r__1 = scale / temp;
+		    ssq = ssq * (r__1 * r__1) + 1.f;
+		    scale = temp;
+		} else {
+/* Computing 2nd power */
+		    r__1 = temp / scale;
+		    ssq += r__1 * r__1;
+		}
+	    }
+	    if (r_imag(&x[ix]) != 0.f) {
+		temp = (r__1 = r_imag(&x[ix]), dabs(r__1));
+		if (scale < temp) {
+/* Computing 2nd power */
+		    r__1 = scale / temp;
+		    ssq = ssq * (r__1 * r__1) + 1.f;
+		    scale = temp;
+		} else {
+/* Computing 2nd power */
+		    r__1 = temp / scale;
+		    ssq += r__1 * r__1;
+		}
+	    }
+/* L10: */
+	}
+	norm = scale * sqrt(ssq);
+    }
+
+    ret_val = norm;
+    return ret_val;
+
+/*     End of SCNRM2. */
+
+} /* scnrm2_ */
+
+/* Subroutine */ int scopy_(integer *n, real *sx, integer *incx, real *sy,
+	integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+
+
+/*
+       copies a vector, x, to a vector, y.
+       uses unrolled loops for increments equal to 1.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sy;
+    --sx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	sy[iy] = sx[ix];
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 7;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	sy[i__] = sx[i__];
+/* L30: */
+    }
+    if (*n < 7) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 7) {
+	sy[i__] = sx[i__];
+	sy[i__ + 1] = sx[i__ + 1];
+	sy[i__ + 2] = sx[i__ + 2];
+	sy[i__ + 3] = sx[i__ + 3];
+	sy[i__ + 4] = sx[i__ + 4];
+	sy[i__ + 5] = sx[i__ + 5];
+	sy[i__ + 6] = sx[i__ + 6];
+/* L50: */
+    }
+    return 0;
+} /* scopy_ */
+
+doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+    real ret_val;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+    static real stemp;
+
+
+/*
+       forms the dot product of two vectors.
+       uses unrolled loops for increments equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sy;
+    --sx;
+
+    /* Function Body */
+    stemp = 0.f;
+    ret_val = 0.f;
+    if (*n <= 0) {
+	return ret_val;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp += sx[ix] * sy[iy];
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    ret_val = stemp;
+    return ret_val;
+
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 5;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp += sx[i__] * sy[i__];
+/* L30: */
+    }
+    if (*n < 5) {
+	goto L60;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 5) {
+	stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[
+		i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ +
+		4] * sy[i__ + 4];
+/* L50: */
+    }
+L60:
+    ret_val = stemp;
+    return ret_val;
+} /* sdot_ */
+
+/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer *
+	n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *
+	ldb, real *beta, real *c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3;
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static logical nota, notb;
+    static real temp;
+    static integer ncola;
+    extern logical lsame_(char *, char *);
+    static integer nrowa, nrowb;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SGEMM  performs one of the matrix-matrix operations
+
+       C := alpha*op( A )*op( B ) + beta*C,
+
+    where  op( X ) is one of
+
+       op( X ) = X   or   op( X ) = X',
+
+    alpha and beta are scalars, and A, B and C are matrices, with op( A )
+    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+
+    Parameters
+    ==========
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n',  op( A ) = A.
+
+                TRANSA = 'T' or 't',  op( A ) = A'.
+
+                TRANSA = 'C' or 'c',  op( A ) = A'.
+
+             Unchanged on exit.
+
+    TRANSB - CHARACTER*1.
+             On entry, TRANSB specifies the form of op( B ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSB = 'N' or 'n',  op( B ) = B.
+
+                TRANSB = 'T' or 't',  op( B ) = B'.
+
+                TRANSB = 'C' or 'c',  op( B ) = B'.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry,  M  specifies  the number  of rows  of the  matrix
+             op( A )  and of the  matrix  C.  M  must  be at least  zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N  specifies the number  of columns of the matrix
+             op( B ) and the number of columns of the matrix C. N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry,  K  specifies  the number of columns of the matrix
+             op( A ) and the number of rows of the matrix op( B ). K must
+             be at least  zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by m  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+             LDA must be at least  max( 1, m ), otherwise  LDA must be at
+             least  max( 1, k ).
+             Unchanged on exit.
+
+    B      - REAL             array of DIMENSION ( LDB, kb ), where kb is
+             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+             part of the array  B  must contain the matrix  B,  otherwise
+             the leading  n by k  part of the array  B  must contain  the
+             matrix B.
+             Unchanged on exit.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+             LDB must be at least  max( 1, k ), otherwise  LDB must be at
+             least  max( 1, n ).
+             Unchanged on exit.
+
+    BETA   - REAL            .
+             On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+             supplied as zero then C need not be set on input.
+             Unchanged on exit.
+
+    C      - REAL             array of DIMENSION ( LDC, n ).
+             Before entry, the leading  m by n  part of the array  C must
+             contain the matrix  C,  except when  beta  is zero, in which
+             case C need not be set on entry.
+             On exit, the array  C  is overwritten by the  m by n  matrix
+             ( alpha*op( A )*op( B ) + beta*C ).
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+       transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
+       and  columns of  A  and the  number of  rows  of  B  respectively.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    nota = lsame_(transa, "N");
+    notb = lsame_(transb, "N");
+    if (nota) {
+	nrowa = *m;
+	ncola = *k;
+    } else {
+	nrowa = *k;
+	ncola = *m;
+    }
+    if (notb) {
+	nrowb = *k;
+    } else {
+	nrowb = *n;
+    }
+
+/*     Test the input parameters. */
+
+    info = 0;
+    if (! nota && ! lsame_(transa, "C") && ! lsame_(
+	    transa, "T")) {
+	info = 1;
+    } else if (! notb && ! lsame_(transb, "C") && !
+	    lsame_(transb, "T")) {
+	info = 2;
+    } else if (*m < 0) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*k < 0) {
+	info = 5;
+    } else if (*lda < max(1,nrowa)) {
+	info = 8;
+    } else if (*ldb < max(1,nrowb)) {
+	info = 10;
+    } else if (*ldc < max(1,*m)) {
+	info = 13;
+    }
+    if (info != 0) {
+	xerbla_("SGEMM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
+	return 0;
+    }
+
+/*     And if  alpha.eq.zero. */
+
+    if (*alpha == 0.f) {
+	if (*beta == 0.f) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    c__[i__ + j * c_dim1] = 0.f;
+/* L10: */
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (notb) {
+	if (nota) {
+
+/*           Form  C := alpha*A*B + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L50: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L60: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (b[l + j * b_dim1] != 0.f) {
+			temp = *alpha * b[l + j * b_dim1];
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L70: */
+			}
+		    }
+/* L80: */
+		}
+/* L90: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A'*B + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+/* L100: */
+		    }
+		    if (*beta == 0.f) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L110: */
+		}
+/* L120: */
+	    }
+	}
+    } else {
+	if (nota) {
+
+/*           Form  C := alpha*A*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L130: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L140: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (b[j + l * b_dim1] != 0.f) {
+			temp = *alpha * b[j + l * b_dim1];
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L150: */
+			}
+		    }
+/* L160: */
+		}
+/* L170: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A'*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
+/* L180: */
+		    }
+		    if (*beta == 0.f) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L190: */
+		}
+/* L200: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SGEMM . */
+
+} /* sgemm_ */
+
+/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha,
+	real *a, integer *lda, real *x, integer *incx, real *beta, real *y,
+	integer *incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static real temp;
+    static integer lenx, leny;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SGEMV  performs one of the matrix-vector operations
+
+       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+
+    where alpha and beta are scalars, x and y are vectors and A is an
+    m by n matrix.
+
+    Parameters
+    ==========
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+
+                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+
+                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+    X      - REAL             array of DIMENSION at least
+             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+             and at least
+             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+             Before entry, the incremented array X must contain the
+             vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - REAL            .
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - REAL             array of DIMENSION at least
+             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+             and at least
+             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+             Before entry with BETA non-zero, the incremented array Y
+             must contain the vector y. On exit, Y is overwritten by the
+             updated vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
+	    ) {
+	info = 1;
+    } else if (*m < 0) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*lda < max(1,*m)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    } else if (*incy == 0) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("SGEMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
+	return 0;
+    }
+
+/*
+       Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+       up the start points in  X  and  Y.
+*/
+
+    if (lsame_(trans, "N")) {
+	lenx = *n;
+	leny = *m;
+    } else {
+	lenx = *m;
+	leny = *n;
+    }
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (lenx - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (leny - 1) * *incy;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+
+       First form  y := beta*y.
+*/
+
+    if (*beta != 1.f) {
+	if (*incy == 1) {
+	    if (*beta == 0.f) {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = 0.f;
+/* L10: */
+		}
+	    } else {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = *beta * y[i__];
+/* L20: */
+		}
+	    }
+	} else {
+	    iy = ky;
+	    if (*beta == 0.f) {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = 0.f;
+		    iy += *incy;
+/* L30: */
+		}
+	    } else {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = *beta * y[iy];
+		    iy += *incy;
+/* L40: */
+		}
+	    }
+	}
+    }
+    if (*alpha == 0.f) {
+	return 0;
+    }
+    if (lsame_(trans, "N")) {
+
+/*        Form  y := alpha*A*x + y. */
+
+	jx = kx;
+	if (*incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[jx] != 0.f) {
+		    temp = *alpha * x[jx];
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			y[i__] += temp * a[i__ + j * a_dim1];
+/* L50: */
+		    }
+		}
+		jx += *incx;
+/* L60: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[jx] != 0.f) {
+		    temp = *alpha * x[jx];
+		    iy = ky;
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			y[iy] += temp * a[i__ + j * a_dim1];
+			iy += *incy;
+/* L70: */
+		    }
+		}
+		jx += *incx;
+/* L80: */
+	    }
+	}
+    } else {
+
+/*        Form  y := alpha*A'*x + y. */
+
+	jy = ky;
+	if (*incx == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = 0.f;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp += a[i__ + j * a_dim1] * x[i__];
+/* L90: */
+		}
+		y[jy] += *alpha * temp;
+		jy += *incy;
+/* L100: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = 0.f;
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp += a[i__ + j * a_dim1] * x[ix];
+		    ix += *incx;
+/* L110: */
+		}
+		y[jy] += *alpha * temp;
+		jy += *incy;
+/* L120: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SGEMV . */
+
+} /* sgemv_ */
+
+/* Subroutine */ int sger_(integer *m, integer *n, real *alpha, real *x,
+	integer *incx, real *y, integer *incy, real *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, jy, kx, info;
+    static real temp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SGER   performs the rank 1 operation
+
+       A := alpha*x*y' + A,
+
+    where alpha is a scalar, x is an m element vector, y is an n element
+    vector and A is an m by n matrix.
+
+    Parameters
+    ==========
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - REAL             array of dimension at least
+             ( 1 + ( m - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the m
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - REAL             array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients. On exit, A is
+             overwritten by the updated matrix.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (*m < 0) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("SGER  ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0 || *alpha == 0.f) {
+	return 0;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (*incy > 0) {
+	jy = 1;
+    } else {
+	jy = 1 - (*n - 1) * *incy;
+    }
+    if (*incx == 1) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (y[jy] != 0.f) {
+		temp = *alpha * y[jy];
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    a[i__ + j * a_dim1] += x[i__] * temp;
+/* L10: */
+		}
+	    }
+	    jy += *incy;
+/* L20: */
+	}
+    } else {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*m - 1) * *incx;
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (y[jy] != 0.f) {
+		temp = *alpha * y[jy];
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    a[i__ + j * a_dim1] += x[ix] * temp;
+		    ix += *incx;
+/* L30: */
+		}
+	    }
+	    jy += *incy;
+/* L40: */
+	}
+    }
+
+    return 0;
+
+/*     End of SGER  . */
+
+} /* sger_ */
+
+doublereal snrm2_(integer *n, real *x, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    real ret_val, r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer ix;
+    static real ssq, norm, scale, absxi;
+
+
+/*
+    SNRM2 returns the euclidean norm of a vector via the function
+    name, so that
+
+       SNRM2 := sqrt( x'*x )
+
+
+    -- This version written on 25-October-1982.
+       Modified on 14-October-1993 to inline the call to SLASSQ.
+       Sven Hammarling, Nag Ltd.
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n < 1 || *incx < 1) {
+	norm = 0.f;
+    } else if (*n == 1) {
+	norm = dabs(x[1]);
+    } else {
+	scale = 0.f;
+	ssq = 1.f;
+/*
+          The following loop is equivalent to this call to the LAPACK
+          auxiliary routine:
+          CALL SLASSQ( N, X, INCX, SCALE, SSQ )
+*/
+
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    if (x[ix] != 0.f) {
+		absxi = (r__1 = x[ix], dabs(r__1));
+		if (scale < absxi) {
+/* Computing 2nd power */
+		    r__1 = scale / absxi;
+		    ssq = ssq * (r__1 * r__1) + 1.f;
+		    scale = absxi;
+		} else {
+/* Computing 2nd power */
+		    r__1 = absxi / scale;
+		    ssq += r__1 * r__1;
+		}
+	    }
+/* L10: */
+	}
+	norm = scale * sqrt(ssq);
+    }
+
+    ret_val = norm;
+    return ret_val;
+
+/*     End of SNRM2. */
+
+} /* snrm2_ */
+
+/* Subroutine */ int srot_(integer *n, real *sx, integer *incx, real *sy,
+	integer *incy, real *c__, real *s)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static real stemp;
+
+
+/*
+       applies a plane rotation.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sy;
+    --sx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+         code for unequal increments or equal increments not equal
+           to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp = *c__ * sx[ix] + *s * sy[iy];
+	sy[iy] = *c__ * sy[iy] - *s * sx[ix];
+	sx[ix] = stemp;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*       code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp = *c__ * sx[i__] + *s * sy[i__];
+	sy[i__] = *c__ * sy[i__] - *s * sx[i__];
+	sx[i__] = stemp;
+/* L30: */
+    }
+    return 0;
+} /* srot_ */
+
+/* Subroutine */ int sscal_(integer *n, real *sa, real *sx, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Local variables */
+    static integer i__, m, mp1, nincx;
+
+
+/*
+       scales a vector by a constant.
+       uses unrolled loops for increment equal to 1.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sx;
+
+    /* Function Body */
+    if (*n <= 0 || *incx <= 0) {
+	return 0;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    nincx = *n * *incx;
+    i__1 = nincx;
+    i__2 = *incx;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	sx[i__] = *sa * sx[i__];
+/* L10: */
+    }
+    return 0;
+
+/*
+          code for increment equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 5;
+    if (m == 0) {
+	goto L40;
+    }
+    i__2 = m;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+	sx[i__] = *sa * sx[i__];
+/* L30: */
+    }
+    if (*n < 5) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__2 = *n;
+    for (i__ = mp1; i__ <= i__2; i__ += 5) {
+	sx[i__] = *sa * sx[i__];
+	sx[i__ + 1] = *sa * sx[i__ + 1];
+	sx[i__ + 2] = *sa * sx[i__ + 2];
+	sx[i__ + 3] = *sa * sx[i__ + 3];
+	sx[i__ + 4] = *sa * sx[i__ + 4];
+/* L50: */
+    }
+    return 0;
+} /* sscal_ */
+
+/* Subroutine */ int sswap_(integer *n, real *sx, integer *incx, real *sy,
+	integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+    static real stemp;
+
+
+/*
+       interchanges two vectors.
+       uses unrolled loops for increments equal to 1.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sy;
+    --sx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+         code for unequal increments or equal increments not equal
+           to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp = sx[ix];
+	sx[ix] = sy[iy];
+	sy[iy] = stemp;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*
+         code for both increments equal to 1
+
+
+         clean-up loop
+*/
+
+L20:
+    m = *n % 3;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp = sx[i__];
+	sx[i__] = sy[i__];
+	sy[i__] = stemp;
+/* L30: */
+    }
+    if (*n < 3) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 3) {
+	stemp = sx[i__];
+	sx[i__] = sy[i__];
+	sy[i__] = stemp;
+	stemp = sx[i__ + 1];
+	sx[i__ + 1] = sy[i__ + 1];
+	sy[i__ + 1] = stemp;
+	stemp = sx[i__ + 2];
+	sx[i__ + 2] = sy[i__ + 2];
+	sy[i__ + 2] = stemp;
+/* L50: */
+    }
+    return 0;
+} /* sswap_ */
+
+/* Subroutine */ int ssymv_(char *uplo, integer *n, real *alpha, real *a,
+	integer *lda, real *x, integer *incx, real *beta, real *y, integer *
+	incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static real temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SSYMV  performs the matrix-vector  operation
+
+       y := alpha*A*x + beta*y,
+
+    where alpha and beta are scalars, x and y are n element vectors and
+    A is an n by n symmetric matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the symmetric matrix and the strictly
+             lower triangular part of A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the symmetric matrix and the strictly
+             upper triangular part of A is not referenced.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - REAL             array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - REAL            .
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - REAL             array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y. On exit, Y is overwritten by the updated
+             vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*lda < max(1,*n)) {
+	info = 5;
+    } else if (*incx == 0) {
+	info = 7;
+    } else if (*incy == 0) {
+	info = 10;
+    }
+    if (info != 0) {
+	xerbla_("SSYMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0 || *alpha == 0.f && *beta == 1.f) {
+	return 0;
+    }
+
+/*     Set up the start points in  X  and  Y. */
+
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (*n - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (*n - 1) * *incy;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through the triangular part
+       of A.
+
+       First form  y := beta*y.
+*/
+
+    if (*beta != 1.f) {
+	if (*incy == 1) {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = 0.f;
+/* L10: */
+		}
+	    } else {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = *beta * y[i__];
+/* L20: */
+		}
+	    }
+	} else {
+	    iy = ky;
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = 0.f;
+		    iy += *incy;
+/* L30: */
+		}
+	    } else {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = *beta * y[iy];
+		    iy += *incy;
+/* L40: */
+		}
+	    }
+	}
+    }
+    if (*alpha == 0.f) {
+	return 0;
+    }
+    if (lsame_(uplo, "U")) {
+
+/*        Form  y  when A is stored in upper triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[j];
+		temp2 = 0.f;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    y[i__] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[i__];
+/* L50: */
+		}
+		y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
+/* L60: */
+	    }
+	} else {
+	    jx = kx;
+	    jy = ky;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[jx];
+		temp2 = 0.f;
+		ix = kx;
+		iy = ky;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    y[iy] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[ix];
+		    ix += *incx;
+		    iy += *incy;
+/* L70: */
+		}
+		y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
+		jx += *incx;
+		jy += *incy;
+/* L80: */
+	    }
+	}
+    } else {
+
+/*        Form  y  when A is stored in lower triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[j];
+		temp2 = 0.f;
+		y[j] += temp1 * a[j + j * a_dim1];
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    y[i__] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[i__];
+/* L90: */
+		}
+		y[j] += *alpha * temp2;
+/* L100: */
+	    }
+	} else {
+	    jx = kx;
+	    jy = ky;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[jx];
+		temp2 = 0.f;
+		y[jy] += temp1 * a[j + j * a_dim1];
+		ix = jx;
+		iy = jy;
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    ix += *incx;
+		    iy += *incy;
+		    y[iy] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[ix];
+/* L110: */
+		}
+		y[jy] += *alpha * temp2;
+		jx += *incx;
+		jy += *incy;
+/* L120: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SSYMV . */
+
+} /* ssymv_ */
+
+/* Subroutine */ int ssyr2_(char *uplo, integer *n, real *alpha, real *x,
+	integer *incx, real *y, integer *incy, real *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static real temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SSYR2  performs the symmetric rank 2 operation
+
+       A := alpha*x*y' + alpha*y*x' + A,
+
+    where alpha is a scalar, x and y are n element vectors and A is an n
+    by n symmetric matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - REAL             array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - REAL             array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the symmetric matrix and the strictly
+             lower triangular part of A is not referenced. On exit, the
+             upper triangular part of the array A is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the symmetric matrix and the strictly
+             upper triangular part of A is not referenced. On exit, the
+             lower triangular part of the array A is overwritten by the
+             lower triangular part of the updated matrix.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*n)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("SSYR2 ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0 || *alpha == 0.f) {
+	return 0;
+    }
+
+/*
+       Set up the start points in X and Y if the increments are not both
+       unity.
+*/
+
+    if (*incx != 1 || *incy != 1) {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*n - 1) * *incx;
+	}
+	if (*incy > 0) {
+	    ky = 1;
+	} else {
+	    ky = 1 - (*n - 1) * *incy;
+	}
+	jx = kx;
+	jy = ky;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through the triangular part
+       of A.
+*/
+
+    if (lsame_(uplo, "U")) {
+
+/*        Form  A  when A is stored in the upper triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[j] != 0.f || y[j] != 0.f) {
+		    temp1 = *alpha * y[j];
+		    temp2 = *alpha * x[j];
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
+				temp1 + y[i__] * temp2;
+/* L10: */
+		    }
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[jx] != 0.f || y[jy] != 0.f) {
+		    temp1 = *alpha * y[jy];
+		    temp2 = *alpha * x[jx];
+		    ix = kx;
+		    iy = ky;
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
+				temp1 + y[iy] * temp2;
+			ix += *incx;
+			iy += *incy;
+/* L30: */
+		    }
+		}
+		jx += *incx;
+		jy += *incy;
+/* L40: */
+	    }
+	}
+    } else {
+
+/*        Form  A  when A is stored in the lower triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[j] != 0.f || y[j] != 0.f) {
+		    temp1 = *alpha * y[j];
+		    temp2 = *alpha * x[j];
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
+				temp1 + y[i__] * temp2;
+/* L50: */
+		    }
+		}
+/* L60: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[jx] != 0.f || y[jy] != 0.f) {
+		    temp1 = *alpha * y[jy];
+		    temp2 = *alpha * x[jx];
+		    ix = jx;
+		    iy = jy;
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
+				temp1 + y[iy] * temp2;
+			ix += *incx;
+			iy += *incy;
+/* L70: */
+		    }
+		}
+		jx += *incx;
+		jy += *incy;
+/* L80: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SSYR2 . */
+
+} /* ssyr2_ */
+
+/* Subroutine */ int ssyr2k_(char *uplo, char *trans, integer *n, integer *k,
+	real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta,
+	 real *c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3;
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static real temp1, temp2;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SSYR2K  performs one of the symmetric rank 2k operations
+
+       C := alpha*A*B' + alpha*B*A' + beta*C,
+
+    or
+
+       C := alpha*A'*B + alpha*B'*A + beta*C,
+
+    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+    and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+    matrices in the second case.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On  entry,   UPLO  specifies  whether  the  upper  or  lower
+             triangular  part  of the  array  C  is to be  referenced  as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry,  TRANS  specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +
+                                          beta*C.
+
+                TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +
+                                          beta*C.
+
+                TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +
+                                          beta*C.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N specifies the order of the matrix C.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+             of  columns  of the  matrices  A and B,  and on  entry  with
+             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+             of rows of the matrices  A and B.  K must be at least  zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by n  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDA must be at least  max( 1, n ), otherwise  LDA must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    B      - REAL             array of DIMENSION ( LDB, kb ), where kb is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  B  must contain the matrix  B,  otherwise
+             the leading  k by n  part of the array  B  must contain  the
+             matrix B.
+             Unchanged on exit.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDB must be at least  max( 1, n ), otherwise  LDB must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    BETA   - REAL            .
+             On entry, BETA specifies the scalar beta.
+             Unchanged on exit.
+
+    C      - REAL             array of DIMENSION ( LDC, n ).
+             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+             upper triangular part of the array C must contain the upper
+             triangular part  of the  symmetric matrix  and the strictly
+             lower triangular part of C is not referenced.  On exit, the
+             upper triangular part of the array  C is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+             lower triangular part of the array C must contain the lower
+             triangular part  of the  symmetric matrix  and the strictly
+             upper triangular part of C is not referenced.  On exit, the
+             lower triangular part of the array  C is overwritten by the
+             lower triangular part of the updated matrix.
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    if (lsame_(trans, "N")) {
+	nrowa = *n;
+    } else {
+	nrowa = *k;
+    }
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! upper && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*k < 0) {
+	info = 4;
+    } else if (*lda < max(1,nrowa)) {
+	info = 7;
+    } else if (*ldb < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldc < max(1,*n)) {
+	info = 12;
+    }
+    if (info != 0) {
+	xerbla_("SSYR2K", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.f) {
+	if (upper) {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L10: */
+		    }
+/* L20: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L30: */
+		    }
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L90: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L100: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (a[j + l * a_dim1] != 0.f || b[j + l * b_dim1] != 0.f)
+			    {
+			temp1 = *alpha * b[j + l * b_dim1];
+			temp2 = *alpha * a[j + l * a_dim1];
+			i__3 = j;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
+				    i__ + l * a_dim1] * temp1 + b[i__ + l *
+				    b_dim1] * temp2;
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L140: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L150: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (a[j + l * a_dim1] != 0.f || b[j + l * b_dim1] != 0.f)
+			    {
+			temp1 = *alpha * b[j + l * b_dim1];
+			temp2 = *alpha * a[j + l * a_dim1];
+			i__3 = *n;
+			for (i__ = j; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
+				    i__ + l * a_dim1] * temp1 + b[i__ + l *
+				    b_dim1] * temp2;
+/* L160: */
+			}
+		    }
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+    } else {
+
+/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp1 = 0.f;
+		    temp2 = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
+/* L190: */
+		    }
+		    if (*beta == 0.f) {
+			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
+				temp2;
+		    } else {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
+				+ *alpha * temp1 + *alpha * temp2;
+		    }
+/* L200: */
+		}
+/* L210: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+		    temp1 = 0.f;
+		    temp2 = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
+/* L220: */
+		    }
+		    if (*beta == 0.f) {
+			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
+				temp2;
+		    } else {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
+				+ *alpha * temp1 + *alpha * temp2;
+		    }
+/* L230: */
+		}
+/* L240: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SSYR2K. */
+
+} /* ssyr2k_ */
+
+/* Subroutine */ int ssyrk_(char *uplo, char *trans, integer *n, integer *k,
+	real *alpha, real *a, integer *lda, real *beta, real *c__, integer *
+	ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static real temp;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SSYRK  performs one of the symmetric rank k operations
+
+       C := alpha*A*A' + beta*C,
+
+    or
+
+       C := alpha*A'*A + beta*C,
+
+    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+    and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+    in the second case.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On  entry,   UPLO  specifies  whether  the  upper  or  lower
+             triangular  part  of the  array  C  is to be  referenced  as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry,  TRANS  specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
+
+                TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+
+                TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N specifies the order of the matrix C.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+             of  columns   of  the   matrix   A,   and  on   entry   with
+             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+             of rows of the matrix  A.  K must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by n  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDA must be at least  max( 1, n ), otherwise  LDA must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    BETA   - REAL            .
+             On entry, BETA specifies the scalar beta.
+             Unchanged on exit.
+
+    C      - REAL             array of DIMENSION ( LDC, n ).
+             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+             upper triangular part of the array C must contain the upper
+             triangular part  of the  symmetric matrix  and the strictly
+             lower triangular part of C is not referenced.  On exit, the
+             upper triangular part of the array  C is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+             lower triangular part of the array C must contain the lower
+             triangular part  of the  symmetric matrix  and the strictly
+             upper triangular part of C is not referenced.  On exit, the
+             lower triangular part of the array  C is overwritten by the
+             lower triangular part of the updated matrix.
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    if (lsame_(trans, "N")) {
+	nrowa = *n;
+    } else {
+	nrowa = *k;
+    }
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! upper && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*k < 0) {
+	info = 4;
+    } else if (*lda < max(1,nrowa)) {
+	info = 7;
+    } else if (*ldc < max(1,*n)) {
+	info = 10;
+    }
+    if (info != 0) {
+	xerbla_("SSYRK ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.f) {
+	if (upper) {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L10: */
+		    }
+/* L20: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L30: */
+		    }
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  C := alpha*A*A' + beta*C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L90: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L100: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (a[j + l * a_dim1] != 0.f) {
+			temp = *alpha * a[j + l * a_dim1];
+			i__3 = j;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L140: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L150: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (a[j + l * a_dim1] != 0.f) {
+			temp = *alpha * a[j + l * a_dim1];
+			i__3 = *n;
+			for (i__ = j; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L160: */
+			}
+		    }
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+    } else {
+
+/*        Form  C := alpha*A'*A + beta*C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
+/* L190: */
+		    }
+		    if (*beta == 0.f) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L200: */
+		}
+/* L210: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+		    temp = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
+/* L220: */
+		    }
+		    if (*beta == 0.f) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L230: */
+		}
+/* L240: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SSYRK . */
+
+} /* ssyrk_ */
+
+/* Subroutine */ int strmm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, real *alpha, real *a, integer *lda, real *b,
+	integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, k, info;
+    static real temp;
+    static logical lside;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    Purpose
+    =======
+
+    STRMM  performs one of the matrix-matrix operations
+
+       B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
+
+    where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+       op( A ) = A   or   op( A ) = A'.
+
+    Parameters
+    ==========
+
+    SIDE   - CHARACTER*1.
+             On entry,  SIDE specifies whether  op( A ) multiplies B from
+             the left or right as follows:
+
+                SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+
+                SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+
+             Unchanged on exit.
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix A is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n'   op( A ) = A.
+
+                TRANSA = 'T' or 't'   op( A ) = A'.
+
+                TRANSA = 'C' or 'c'   op( A ) = A'.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit triangular
+             as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of B. M must be at
+             least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of B.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+             zero then  A is not referenced and  B need not be set before
+             entry.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, k ), where k is m
+             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+             upper triangular part of the array  A must contain the upper
+             triangular matrix  and the strictly lower triangular part of
+             A is not referenced.
+             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+             lower triangular part of the array  A must contain the lower
+             triangular matrix  and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+             A  are not referenced either,  but are assumed to be  unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+             then LDA must be at least max( 1, n ).
+             Unchanged on exit.
+
+    B      - REAL             array of DIMENSION ( LDB, n ).
+             Before entry,  the leading  m by n part of the array  B must
+             contain the matrix  B,  and  on exit  is overwritten  by the
+             transformed matrix.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   LDB  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    lside = lsame_(side, "L");
+    if (lside) {
+	nrowa = *m;
+    } else {
+	nrowa = *n;
+    }
+    nounit = lsame_(diag, "N");
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! lside && ! lsame_(side, "R")) {
+	info = 1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	info = 2;
+    } else if (! lsame_(transa, "N") && ! lsame_(transa,
+	     "T") && ! lsame_(transa, "C")) {
+	info = 3;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 4;
+    } else if (*m < 0) {
+	info = 5;
+    } else if (*n < 0) {
+	info = 6;
+    } else if (*lda < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldb < max(1,*m)) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("STRMM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.f) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lside) {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*A*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (k = 1; k <= i__2; ++k) {
+			if (b[k + j * b_dim1] != 0.f) {
+			    temp = *alpha * b[k + j * b_dim1];
+			    i__3 = k - 1;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] += temp * a[i__ + k *
+					a_dim1];
+/* L30: */
+			    }
+			    if (nounit) {
+				temp *= a[k + k * a_dim1];
+			    }
+			    b[k + j * b_dim1] = temp;
+			}
+/* L40: */
+		    }
+/* L50: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (k = *m; k >= 1; --k) {
+			if (b[k + j * b_dim1] != 0.f) {
+			    temp = *alpha * b[k + j * b_dim1];
+			    b[k + j * b_dim1] = temp;
+			    if (nounit) {
+				b[k + j * b_dim1] *= a[k + k * a_dim1];
+			    }
+			    i__2 = *m;
+			    for (i__ = k + 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] += temp * a[i__ + k *
+					a_dim1];
+/* L60: */
+			    }
+			}
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*A'*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (i__ = *m; i__ >= 1; --i__) {
+			temp = b[i__ + j * b_dim1];
+			if (nounit) {
+			    temp *= a[i__ + i__ * a_dim1];
+			}
+			i__2 = i__ - 1;
+			for (k = 1; k <= i__2; ++k) {
+			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L90: */
+			}
+			b[i__ + j * b_dim1] = *alpha * temp;
+/* L100: */
+		    }
+/* L110: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			temp = b[i__ + j * b_dim1];
+			if (nounit) {
+			    temp *= a[i__ + i__ * a_dim1];
+			}
+			i__3 = *m;
+			for (k = i__ + 1; k <= i__3; ++k) {
+			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L120: */
+			}
+			b[i__ + j * b_dim1] = *alpha * temp;
+/* L130: */
+		    }
+/* L140: */
+		}
+	    }
+	}
+    } else {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*B*A. */
+
+	    if (upper) {
+		for (j = *n; j >= 1; --j) {
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__1 = *m;
+		    for (i__ = 1; i__ <= i__1; ++i__) {
+			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L150: */
+		    }
+		    i__1 = j - 1;
+		    for (k = 1; k <= i__1; ++k) {
+			if (a[k + j * a_dim1] != 0.f) {
+			    temp = *alpha * a[k + j * a_dim1];
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L160: */
+			    }
+			}
+/* L170: */
+		    }
+/* L180: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L190: */
+		    }
+		    i__2 = *n;
+		    for (k = j + 1; k <= i__2; ++k) {
+			if (a[k + j * a_dim1] != 0.f) {
+			    temp = *alpha * a[k + j * a_dim1];
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L200: */
+			    }
+			}
+/* L210: */
+		    }
+/* L220: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*B*A'. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (k = 1; k <= i__1; ++k) {
+		    i__2 = k - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			if (a[j + k * a_dim1] != 0.f) {
+			    temp = *alpha * a[j + k * a_dim1];
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L230: */
+			    }
+			}
+/* L240: */
+		    }
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[k + k * a_dim1];
+		    }
+		    if (temp != 1.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L250: */
+			}
+		    }
+/* L260: */
+		}
+	    } else {
+		for (k = *n; k >= 1; --k) {
+		    i__1 = *n;
+		    for (j = k + 1; j <= i__1; ++j) {
+			if (a[j + k * a_dim1] != 0.f) {
+			    temp = *alpha * a[j + k * a_dim1];
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L270: */
+			    }
+			}
+/* L280: */
+		    }
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[k + k * a_dim1];
+		    }
+		    if (temp != 1.f) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L290: */
+			}
+		    }
+/* L300: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of STRMM . */
+
+} /* strmm_ */
+
+/* Subroutine */ int strmv_(char *uplo, char *trans, char *diag, integer *n,
+	real *a, integer *lda, real *x, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, jx, kx, info;
+    static real temp;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    Purpose
+    =======
+
+    STRMV  performs one of the matrix-vector operations
+
+       x := A*x,   or   x := A'*x,
+
+    where x is an n element vector and  A is an n by n unit, or non-unit,
+    upper or lower triangular matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   x := A*x.
+
+                TRANS = 'T' or 't'   x := A'*x.
+
+                TRANS = 'C' or 'c'   x := A'*x.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit
+             triangular as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular matrix and the strictly lower triangular part of
+             A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular matrix and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u', the diagonal elements of
+             A are not referenced either, but are assumed to be unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - REAL             array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x. On exit, X is overwritten with the
+             tranformed vector x.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*lda < max(1,*n)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    }
+    if (info != 0) {
+	xerbla_("STRMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    nounit = lsame_(diag, "N");
+
+/*
+       Set up the start point in X if the increment is not unity. This
+       will be  ( N - 1 )*INCX  too small for descending loops.
+*/
+
+    if (*incx <= 0) {
+	kx = 1 - (*n - 1) * *incx;
+    } else if (*incx != 1) {
+	kx = 1;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  x := A*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (x[j] != 0.f) {
+			temp = x[j];
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    x[i__] += temp * a[i__ + j * a_dim1];
+/* L10: */
+			}
+			if (nounit) {
+			    x[j] *= a[j + j * a_dim1];
+			}
+		    }
+/* L20: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (x[jx] != 0.f) {
+			temp = x[jx];
+			ix = kx;
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    x[ix] += temp * a[i__ + j * a_dim1];
+			    ix += *incx;
+/* L30: */
+			}
+			if (nounit) {
+			    x[jx] *= a[j + j * a_dim1];
+			}
+		    }
+		    jx += *incx;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    if (x[j] != 0.f) {
+			temp = x[j];
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    x[i__] += temp * a[i__ + j * a_dim1];
+/* L50: */
+			}
+			if (nounit) {
+			    x[j] *= a[j + j * a_dim1];
+			}
+		    }
+/* L60: */
+		}
+	    } else {
+		kx += (*n - 1) * *incx;
+		jx = kx;
+		for (j = *n; j >= 1; --j) {
+		    if (x[jx] != 0.f) {
+			temp = x[jx];
+			ix = kx;
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    x[ix] += temp * a[i__ + j * a_dim1];
+			    ix -= *incx;
+/* L70: */
+			}
+			if (nounit) {
+			    x[jx] *= a[j + j * a_dim1];
+			}
+		    }
+		    jx -= *incx;
+/* L80: */
+		}
+	    }
+	}
+    } else {
+
+/*        Form  x := A'*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    temp = x[j];
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    for (i__ = j - 1; i__ >= 1; --i__) {
+			temp += a[i__ + j * a_dim1] * x[i__];
+/* L90: */
+		    }
+		    x[j] = temp;
+/* L100: */
+		}
+	    } else {
+		jx = kx + (*n - 1) * *incx;
+		for (j = *n; j >= 1; --j) {
+		    temp = x[jx];
+		    ix = jx;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    for (i__ = j - 1; i__ >= 1; --i__) {
+			ix -= *incx;
+			temp += a[i__ + j * a_dim1] * x[ix];
+/* L110: */
+		    }
+		    x[jx] = temp;
+		    jx -= *incx;
+/* L120: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = x[j];
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			temp += a[i__ + j * a_dim1] * x[i__];
+/* L130: */
+		    }
+		    x[j] = temp;
+/* L140: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = x[jx];
+		    ix = jx;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			ix += *incx;
+			temp += a[i__ + j * a_dim1] * x[ix];
+/* L150: */
+		    }
+		    x[jx] = temp;
+		    jx += *incx;
+/* L160: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of STRMV . */
+
+} /* strmv_ */
+
+/* Subroutine */ int strsm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, real *alpha, real *a, integer *lda, real *b,
+	integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, k, info;
+    static real temp;
+    static logical lside;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    Purpose
+    =======
+
+    STRSM  solves one of the matrix equations
+
+       op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+
+    where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+       op( A ) = A   or   op( A ) = A'.
+
+    The matrix X is overwritten on B.
+
+    Parameters
+    ==========
+
+    SIDE   - CHARACTER*1.
+             On entry, SIDE specifies whether op( A ) appears on the left
+             or right of X as follows:
+
+                SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+
+                SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+
+             Unchanged on exit.
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix A is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n'   op( A ) = A.
+
+                TRANSA = 'T' or 't'   op( A ) = A'.
+
+                TRANSA = 'C' or 'c'   op( A ) = A'.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit triangular
+             as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of B. M must be at
+             least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of B.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+             zero then  A is not referenced and  B need not be set before
+             entry.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, k ), where k is m
+             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+             upper triangular part of the array  A must contain the upper
+             triangular matrix  and the strictly lower triangular part of
+             A is not referenced.
+             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+             lower triangular part of the array  A must contain the lower
+             triangular matrix  and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+             A  are not referenced either,  but are assumed to be  unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+             then LDA must be at least max( 1, n ).
+             Unchanged on exit.
+
+    B      - REAL             array of DIMENSION ( LDB, n ).
+             Before entry,  the leading  m by n part of the array  B must
+             contain  the  right-hand  side  matrix  B,  and  on exit  is
+             overwritten by the solution matrix  X.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   LDB  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    lside = lsame_(side, "L");
+    if (lside) {
+	nrowa = *m;
+    } else {
+	nrowa = *n;
+    }
+    nounit = lsame_(diag, "N");
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! lside && ! lsame_(side, "R")) {
+	info = 1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	info = 2;
+    } else if (! lsame_(transa, "N") && ! lsame_(transa,
+	     "T") && ! lsame_(transa, "C")) {
+	info = 3;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 4;
+    } else if (*m < 0) {
+	info = 5;
+    } else if (*n < 0) {
+	info = 6;
+    } else if (*lda < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldb < max(1,*m)) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("STRSM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.f) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lside) {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*inv( A )*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (*alpha != 1.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L30: */
+			}
+		    }
+		    for (k = *m; k >= 1; --k) {
+			if (b[k + j * b_dim1] != 0.f) {
+			    if (nounit) {
+				b[k + j * b_dim1] /= a[k + k * a_dim1];
+			    }
+			    i__2 = k - 1;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
+					i__ + k * a_dim1];
+/* L40: */
+			    }
+			}
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (*alpha != 1.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L70: */
+			}
+		    }
+		    i__2 = *m;
+		    for (k = 1; k <= i__2; ++k) {
+			if (b[k + j * b_dim1] != 0.f) {
+			    if (nounit) {
+				b[k + j * b_dim1] /= a[k + k * a_dim1];
+			    }
+			    i__3 = *m;
+			    for (i__ = k + 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
+					i__ + k * a_dim1];
+/* L80: */
+			    }
+			}
+/* L90: */
+		    }
+/* L100: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*inv( A' )*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			temp = *alpha * b[i__ + j * b_dim1];
+			i__3 = i__ - 1;
+			for (k = 1; k <= i__3; ++k) {
+			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L110: */
+			}
+			if (nounit) {
+			    temp /= a[i__ + i__ * a_dim1];
+			}
+			b[i__ + j * b_dim1] = temp;
+/* L120: */
+		    }
+/* L130: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (i__ = *m; i__ >= 1; --i__) {
+			temp = *alpha * b[i__ + j * b_dim1];
+			i__2 = *m;
+			for (k = i__ + 1; k <= i__2; ++k) {
+			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L140: */
+			}
+			if (nounit) {
+			    temp /= a[i__ + i__ * a_dim1];
+			}
+			b[i__ + j * b_dim1] = temp;
+/* L150: */
+		    }
+/* L160: */
+		}
+	    }
+	}
+    } else {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*B*inv( A ). */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (*alpha != 1.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L170: */
+			}
+		    }
+		    i__2 = j - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			if (a[k + j * a_dim1] != 0.f) {
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
+					i__ + k * b_dim1];
+/* L180: */
+			    }
+			}
+/* L190: */
+		    }
+		    if (nounit) {
+			temp = 1.f / a[j + j * a_dim1];
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L200: */
+			}
+		    }
+/* L210: */
+		}
+	    } else {
+		for (j = *n; j >= 1; --j) {
+		    if (*alpha != 1.f) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L220: */
+			}
+		    }
+		    i__1 = *n;
+		    for (k = j + 1; k <= i__1; ++k) {
+			if (a[k + j * a_dim1] != 0.f) {
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
+					i__ + k * b_dim1];
+/* L230: */
+			    }
+			}
+/* L240: */
+		    }
+		    if (nounit) {
+			temp = 1.f / a[j + j * a_dim1];
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L250: */
+			}
+		    }
+/* L260: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*B*inv( A' ). */
+
+	    if (upper) {
+		for (k = *n; k >= 1; --k) {
+		    if (nounit) {
+			temp = 1.f / a[k + k * a_dim1];
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L270: */
+			}
+		    }
+		    i__1 = k - 1;
+		    for (j = 1; j <= i__1; ++j) {
+			if (a[j + k * a_dim1] != 0.f) {
+			    temp = a[j + k * a_dim1];
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] -= temp * b[i__ + k *
+					b_dim1];
+/* L280: */
+			    }
+			}
+/* L290: */
+		    }
+		    if (*alpha != 1.f) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
+				    ;
+/* L300: */
+			}
+		    }
+/* L310: */
+		}
+	    } else {
+		i__1 = *n;
+		for (k = 1; k <= i__1; ++k) {
+		    if (nounit) {
+			temp = 1.f / a[k + k * a_dim1];
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L320: */
+			}
+		    }
+		    i__2 = *n;
+		    for (j = k + 1; j <= i__2; ++j) {
+			if (a[j + k * a_dim1] != 0.f) {
+			    temp = a[j + k * a_dim1];
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] -= temp * b[i__ + k *
+					b_dim1];
+/* L330: */
+			    }
+			}
+/* L340: */
+		    }
+		    if (*alpha != 1.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
+				    ;
+/* L350: */
+			}
+		    }
+/* L360: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of STRSM . */
+
+} /* strsm_ */
 
 /* Subroutine */ int zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx,
 	integer *incx, doublecomplex *zy, integer *incy)
@@ -9889,7 +20359,7 @@ L20:
 /* L210: */
 		    }
 		    if (nounit) {
-			z_div(&z__1, &c_b353, &a[j + j * a_dim1]);
+			z_div(&z__1, &c_b1071, &a[j + j * a_dim1]);
 			temp.r = z__1.r, temp.i = z__1.i;
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
@@ -9940,7 +20410,7 @@ L20:
 /* L260: */
 		    }
 		    if (nounit) {
-			z_div(&z__1, &c_b353, &a[j + j * a_dim1]);
+			z_div(&z__1, &c_b1071, &a[j + j * a_dim1]);
 			temp.r = z__1.r, temp.i = z__1.i;
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
@@ -9967,11 +20437,11 @@ L20:
 		for (k = *n; k >= 1; --k) {
 		    if (nounit) {
 			if (noconj) {
-			    z_div(&z__1, &c_b353, &a[k + k * a_dim1]);
+			    z_div(&z__1, &c_b1071, &a[k + k * a_dim1]);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			} else {
 			    d_cnjg(&z__2, &a[k + k * a_dim1]);
-			    z_div(&z__1, &c_b353, &z__2);
+			    z_div(&z__1, &c_b1071, &z__2);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			}
 			i__1 = *m;
@@ -10031,11 +20501,11 @@ L20:
 		for (k = 1; k <= i__1; ++k) {
 		    if (nounit) {
 			if (noconj) {
-			    z_div(&z__1, &c_b353, &a[k + k * a_dim1]);
+			    z_div(&z__1, &c_b1071, &a[k + k * a_dim1]);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			} else {
 			    d_cnjg(&z__2, &a[k + k * a_dim1]);
-			    z_div(&z__1, &c_b353, &z__2);
+			    z_div(&z__1, &c_b1071, &z__2);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			}
 			i__2 = *m;
diff --git a/numpy/linalg/lapack_lite/dlapack_lite.c b/numpy/linalg/lapack_lite/dlapack_lite.c
index 15d6457eb..be6e0c6d4 100644
--- a/numpy/linalg/lapack_lite/dlapack_lite.c
+++ b/numpy/linalg/lapack_lite/dlapack_lite.c
@@ -20,257 +20,147 @@ extern doublereal dlapy2_(doublereal *x, doublereal *y);
 
 /* Table of constant values */
 
-static integer c__9 = 9;
-static integer c__0 = 0;
-static doublereal c_b15 = 1.;
 static integer c__1 = 1;
-static doublereal c_b29 = 0.;
-static doublereal c_b94 = -.125;
-static doublereal c_b151 = -1.;
+static complex c_b55 = {0.f,0.f};
+static complex c_b56 = {1.f,0.f};
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
+static integer c__0 = 0;
 static integer c__8 = 8;
 static integer c__4 = 4;
 static integer c__65 = 65;
-static integer c__6 = 6;
+static real c_b871 = 1.f;
 static integer c__15 = 15;
 static logical c_false = FALSE_;
+static real c_b1101 = 0.f;
+static integer c__9 = 9;
+static real c_b1150 = -1.f;
+static real c_b1794 = .5f;
+static doublereal c_b2453 = 1.;
+static doublereal c_b2467 = 0.;
+static doublereal c_b2532 = -.125;
+static doublereal c_b2589 = -1.;
+static integer c__6 = 6;
 static integer c__10 = 10;
 static integer c__11 = 11;
-static doublereal c_b2804 = 2.;
+static doublereal c_b5242 = 2.;
 static logical c_true = TRUE_;
-static real c_b3825 = 0.f;
-static real c_b3826 = 1.f;
+static real c_b8920 = 2.f;
 
-/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal *
-	d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt,
-	integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer *
-	iwork, integer *info)
+/* Subroutine */ int cgebak_(char *job, char *side, integer *n, integer *ilo,
+	integer *ihi, real *scale, integer *m, complex *v, integer *ldv,
+	integer *info)
 {
     /* System generated locals */
-    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double d_sign(doublereal *, doublereal *), log(doublereal);
+    integer v_dim1, v_offset, i__1;
 
     /* Local variables */
-    static integer i__, j, k;
-    static doublereal p, r__;
-    static integer z__, ic, ii, kk;
-    static doublereal cs;
-    static integer is, iu;
-    static doublereal sn;
-    static integer nm1;
-    static doublereal eps;
-    static integer ivt, difl, difr, ierr, perm, mlvl, sqre;
+    static integer i__, k;
+    static real s;
+    static integer ii;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *
-	    , doublereal *, integer *), dswap_(integer *, doublereal *,
-	    integer *, doublereal *, integer *);
-    static integer poles, iuplo, nsize, start;
-    extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    integer *, integer *, doublereal *, integer *);
-
-    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *, integer *, integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
-	     integer *), dlascl_(char *, integer *, integer *, doublereal *,
-	    doublereal *, integer *, integer *, doublereal *, integer *,
-	    integer *), dlasdq_(char *, integer *, integer *, integer
-	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
-	     integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlaset_(char *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static integer givcol;
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    static integer icompq;
-    static doublereal orgnrm;
-    static integer givnum, givptr, qstart, smlsiz, wstart, smlszp;
+    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static logical leftv;
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *), xerbla_(char *, integer *);
+    static logical rightv;
 
 
 /*
     -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       December 1, 1999
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DBDSDC computes the singular value decomposition (SVD) of a real
-    N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,
-    using a divide and conquer method, where S is a diagonal matrix
-    with non-negative diagonal elements (the singular values of B), and
-    U and VT are orthogonal matrices of left and right singular vectors,
-    respectively. DBDSDC can be used to compute all singular values,
-    and optionally, singular vectors or singular vectors in compact form.
-
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.  See DLASD3 for details.
-
-    The code currently call DLASDQ if singular values only are desired.
-    However, it can be slightly modified to compute singular values
-    using the divide and conquer method.
+    CGEBAK forms the right or left eigenvectors of a complex general
+    matrix by backward transformation on the computed eigenvectors of the
+    balanced matrix output by CGEBAL.
 
     Arguments
     =========
 
-    UPLO    (input) CHARACTER*1
-            = 'U':  B is upper bidiagonal.
-            = 'L':  B is lower bidiagonal.
+    JOB     (input) CHARACTER*1
+            Specifies the type of backward transformation required:
+            = 'N', do nothing, return immediately;
+            = 'P', do backward transformation for permutation only;
+            = 'S', do backward transformation for scaling only;
+            = 'B', do backward transformations for both permutation and
+                   scaling.
+            JOB must be the same as the argument JOB supplied to CGEBAL.
 
-    COMPQ   (input) CHARACTER*1
-            Specifies whether singular vectors are to be computed
-            as follows:
-            = 'N':  Compute singular values only;
-            = 'P':  Compute singular values and compute singular
-                    vectors in compact form;
-            = 'I':  Compute singular values and singular vectors.
+    SIDE    (input) CHARACTER*1
+            = 'R':  V contains right eigenvectors;
+            = 'L':  V contains left eigenvectors.
 
     N       (input) INTEGER
-            The order of the matrix B.  N >= 0.
-
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the n diagonal elements of the bidiagonal matrix B.
-            On exit, if INFO=0, the singular values of B.
-
-    E       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the elements of E contain the offdiagonal
-            elements of the bidiagonal matrix whose SVD is desired.
-            On exit, E has been destroyed.
-
-    U       (output) DOUBLE PRECISION array, dimension (LDU,N)
-            If  COMPQ = 'I', then:
-               On exit, if INFO = 0, U contains the left singular vectors
-               of the bidiagonal matrix.
-            For other values of COMPQ, U is not referenced.
-
-    LDU     (input) INTEGER
-            The leading dimension of the array U.  LDU >= 1.
-            If singular vectors are desired, then LDU >= max( 1, N ).
-
-    VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
-            If  COMPQ = 'I', then:
-               On exit, if INFO = 0, VT' contains the right singular
-               vectors of the bidiagonal matrix.
-            For other values of COMPQ, VT is not referenced.
+            The number of rows of the matrix V.  N >= 0.
 
-    LDVT    (input) INTEGER
-            The leading dimension of the array VT.  LDVT >= 1.
-            If singular vectors are desired, then LDVT >= max( 1, N ).
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            The integers ILO and IHI determined by CGEBAL.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
 
-    Q       (output) DOUBLE PRECISION array, dimension (LDQ)
-            If  COMPQ = 'P', then:
-               On exit, if INFO = 0, Q and IQ contain the left
-               and right singular vectors in a compact form,
-               requiring O(N log N) space instead of 2*N**2.
-               In particular, Q contains all the DOUBLE PRECISION data in
-               LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1))))
-               words of memory, where SMLSIZ is returned by ILAENV and
-               is equal to the maximum size of the subproblems at the
-               bottom of the computation tree (usually about 25).
-            For other values of COMPQ, Q is not referenced.
+    SCALE   (input) REAL array, dimension (N)
+            Details of the permutation and scaling factors, as returned
+            by CGEBAL.
 
-    IQ      (output) INTEGER array, dimension (LDIQ)
-            If  COMPQ = 'P', then:
-               On exit, if INFO = 0, Q and IQ contain the left
-               and right singular vectors in a compact form,
-               requiring O(N log N) space instead of 2*N**2.
-               In particular, IQ contains all INTEGER data in
-               LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1))))
-               words of memory, where SMLSIZ is returned by ILAENV and
-               is equal to the maximum size of the subproblems at the
-               bottom of the computation tree (usually about 25).
-            For other values of COMPQ, IQ is not referenced.
+    M       (input) INTEGER
+            The number of columns of the matrix V.  M >= 0.
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
-            If COMPQ = 'N' then LWORK >= (4 * N).
-            If COMPQ = 'P' then LWORK >= (6 * N).
-            If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).
+    V       (input/output) COMPLEX array, dimension (LDV,M)
+            On entry, the matrix of right or left eigenvectors to be
+            transformed, as returned by CHSEIN or CTREVC.
+            On exit, V is overwritten by the transformed eigenvectors.
 
-    IWORK   (workspace) INTEGER array, dimension (8*N)
+    LDV     (input) INTEGER
+            The leading dimension of the array V. LDV >= max(1,N).
 
     INFO    (output) INTEGER
-            = 0:  successful exit.
+            = 0:  successful exit
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The algorithm failed to compute an singular value.
-                  The update process of divide and conquer failed.
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
 
     =====================================================================
 
 
-       Test the input parameters.
+       Decode and Test the input parameters
 */
 
     /* Parameter adjustments */
-    --d__;
-    --e;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    --q;
-    --iq;
-    --work;
-    --iwork;
+    --scale;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
 
     /* Function Body */
-    *info = 0;
+    rightv = lsame_(side, "R");
+    leftv = lsame_(side, "L");
 
-    iuplo = 0;
-    if (lsame_(uplo, "U")) {
-	iuplo = 1;
-    }
-    if (lsame_(uplo, "L")) {
-	iuplo = 2;
-    }
-    if (lsame_(compq, "N")) {
-	icompq = 0;
-    } else if (lsame_(compq, "P")) {
-	icompq = 1;
-    } else if (lsame_(compq, "I")) {
-	icompq = 2;
-    } else {
-	icompq = -1;
-    }
-    if (iuplo == 0) {
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
 	*info = -1;
-    } else if (icompq < 0) {
+    } else if (! rightv && ! leftv) {
 	*info = -2;
     } else if (*n < 0) {
 	*info = -3;
-    } else if (*ldu < 1 || icompq == 2 && *ldu < *n) {
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+	*info = -4;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -5;
+    } else if (*m < 0) {
 	*info = -7;
-    } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) {
+    } else if (*ldv < max(1,*n)) {
 	*info = -9;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DBDSDC", &i__1);
+	xerbla_("CGEBAK", &i__1);
 	return 0;
     }
 
@@ -279,1485 +169,1764 @@ static real c_b3826 = 1.f;
     if (*n == 0) {
 	return 0;
     }
-    smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-    if (*n == 1) {
-	if (icompq == 1) {
-	    q[1] = d_sign(&c_b15, &d__[1]);
-	    q[smlsiz * *n + 1] = 1.;
-	} else if (icompq == 2) {
-	    u[u_dim1 + 1] = d_sign(&c_b15, &d__[1]);
-	    vt[vt_dim1 + 1] = 1.;
-	}
-	d__[1] = abs(d__[1]);
+    if (*m == 0) {
+	return 0;
+    }
+    if (lsame_(job, "N")) {
 	return 0;
     }
-    nm1 = *n - 1;
-
-/*
-       If matrix lower bidiagonal, rotate to be upper bidiagonal
-       by applying Givens rotations on the left
-*/
 
-    wstart = 1;
-    qstart = 3;
-    if (icompq == 1) {
-	dcopy_(n, &d__[1], &c__1, &q[1], &c__1);
-	i__1 = *n - 1;
-	dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1);
+    if (*ilo == *ihi) {
+	goto L30;
     }
-    if (iuplo == 2) {
-	qstart = 5;
-	wstart = (*n << 1) - 1;
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
-	    d__[i__] = r__;
-	    e[i__] = sn * d__[i__ + 1];
-	    d__[i__ + 1] = cs * d__[i__ + 1];
-	    if (icompq == 1) {
-		q[i__ + (*n << 1)] = cs;
-		q[i__ + *n * 3] = sn;
-	    } else if (icompq == 2) {
-		work[i__] = cs;
-		work[nm1 + i__] = -sn;
-	    }
+
+/*     Backward balance */
+
+    if (lsame_(job, "S") || lsame_(job, "B")) {
+
+	if (rightv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = scale[i__];
+		csscal_(m, &s, &v[i__ + v_dim1], ldv);
 /* L10: */
+	    }
 	}
-    }
 
-/*     If ICOMPQ = 0, use DLASDQ to compute the singular values. */
+	if (leftv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = 1.f / scale[i__];
+		csscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L20: */
+	    }
+	}
 
-    if (icompq == 0) {
-	dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
-		vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
-		wstart], info);
-	goto L40;
     }
 
 /*
-       If N is smaller than the minimum divide size SMLSIZ, then solve
-       the problem with another solver.
+       Backward permutation
+
+       For  I = ILO-1 step -1 until 1,
+                IHI+1 step 1 until N do --
 */
 
-    if (*n <= smlsiz) {
-	if (icompq == 2) {
-	    dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu);
-	    dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt);
-	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
-		    , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
-		    wstart], info);
-	} else if (icompq == 1) {
-	    iu = 1;
-	    ivt = iu + *n;
-	    dlaset_("A", n, n, &c_b29, &c_b15, &q[iu + (qstart - 1) * *n], n);
-	    dlaset_("A", n, n, &c_b29, &c_b15, &q[ivt + (qstart - 1) * *n], n);
-	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (
-		    qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[
-		    iu + (qstart - 1) * *n], n, &work[wstart], info);
+L30:
+    if (lsame_(job, "P") || lsame_(job, "B")) {
+	if (rightv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L40;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = scale[i__];
+		if (k == i__) {
+		    goto L40;
+		}
+		cswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L40:
+		;
+	    }
 	}
-	goto L40;
-    }
 
-    if (icompq == 2) {
-	dlaset_("A", n, n, &c_b29, &c_b15, &u[u_offset], ldu);
-	dlaset_("A", n, n, &c_b29, &c_b15, &vt[vt_offset], ldvt);
+	if (leftv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L50;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = scale[i__];
+		if (k == i__) {
+		    goto L50;
+		}
+		cswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L50:
+		;
+	    }
+	}
     }
 
-/*     Scale. */
+    return 0;
 
-    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.) {
-	return 0;
-    }
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, &ierr);
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1, &
-	    ierr);
+/*     End of CGEBAK */
 
-    eps = EPSILON;
+} /* cgebak_ */
 
-    mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) /
-	    log(2.)) + 1;
-    smlszp = smlsiz + 1;
+/* Subroutine */ int cgebal_(char *job, integer *n, complex *a, integer *lda,
+	integer *ilo, integer *ihi, real *scale, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1, r__2;
 
-    if (icompq == 1) {
-	iu = 1;
-	ivt = smlsiz + 1;
-	difl = ivt + smlszp;
-	difr = difl + mlvl;
-	z__ = difr + (mlvl << 1);
-	ic = z__ + mlvl;
-	is = ic + 1;
-	poles = is + 1;
-	givnum = poles + (mlvl << 1);
+    /* Builtin functions */
+    double r_imag(complex *), c_abs(complex *);
 
-	k = 1;
-	givptr = 2;
-	perm = 3;
-	givcol = perm + mlvl;
-    }
+    /* Local variables */
+    static real c__, f, g;
+    static integer i__, j, k, l, m;
+    static real r__, s, ca, ra;
+    static integer ica, ira, iexc;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static real sfmin1, sfmin2, sfmax1, sfmax2;
+    extern integer icamax_(integer *, complex *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *), xerbla_(char *, integer *);
+    static logical noconv;
 
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((d__1 = d__[i__], abs(d__1)) < eps) {
-	    d__[i__] = d_sign(&eps, &d__[i__]);
-	}
-/* L20: */
-    }
 
-    start = 1;
-    sqre = 0;
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-    i__1 = nm1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) {
 
-/*
-          Subproblem found. First determine its size and then
-          apply divide and conquer on it.
-*/
+    Purpose
+    =======
 
-	    if (i__ < nm1) {
+    CGEBAL balances a general complex matrix A.  This involves, first,
+    permuting A by a similarity transformation to isolate eigenvalues
+    in the first 1 to ILO-1 and last IHI+1 to N elements on the
+    diagonal; and second, applying a diagonal similarity transformation
+    to rows and columns ILO to IHI to make the rows and columns as
+    close in norm as possible.  Both steps are optional.
 
-/*        A subproblem with E(I) small for I < NM1. */
+    Balancing may reduce the 1-norm of the matrix, and improve the
+    accuracy of the computed eigenvalues and/or eigenvectors.
 
-		nsize = i__ - start + 1;
-	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
+    Arguments
+    =========
 
-/*        A subproblem with E(NM1) not too small but I = NM1. */
+    JOB     (input) CHARACTER*1
+            Specifies the operations to be performed on A:
+            = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
+                    for i = 1,...,N;
+            = 'P':  permute only;
+            = 'S':  scale only;
+            = 'B':  both permute and scale.
 
-		nsize = *n - start + 1;
-	    } else {
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-/*
-          A subproblem with E(NM1) small. This implies an
-          1-by-1 subproblem at D(N). Solve this 1-by-1 problem
-          first.
-*/
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the input matrix A.
+            On exit,  A is overwritten by the balanced matrix.
+            If JOB = 'N', A is not referenced.
+            See Further Details.
 
-		nsize = i__ - start + 1;
-		if (icompq == 2) {
-		    u[*n + *n * u_dim1] = d_sign(&c_b15, &d__[*n]);
-		    vt[*n + *n * vt_dim1] = 1.;
-		} else if (icompq == 1) {
-		    q[*n + (qstart - 1) * *n] = d_sign(&c_b15, &d__[*n]);
-		    q[*n + (smlsiz + qstart - 1) * *n] = 1.;
-		}
-		d__[*n] = (d__1 = d__[*n], abs(d__1));
-	    }
-	    if (icompq == 2) {
-		dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start +
-			start * u_dim1], ldu, &vt[start + start * vt_dim1],
-			ldvt, &smlsiz, &iwork[1], &work[wstart], info);
-	    } else {
-		dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[
-			start], &q[start + (iu + qstart - 2) * *n], n, &q[
-			start + (ivt + qstart - 2) * *n], &iq[start + k * *n],
-			 &q[start + (difl + qstart - 2) * *n], &q[start + (
-			difr + qstart - 2) * *n], &q[start + (z__ + qstart -
-			2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[
-			start + givptr * *n], &iq[start + givcol * *n], n, &
-			iq[start + perm * *n], &q[start + (givnum + qstart -
-			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
-			start + (is + qstart - 2) * *n], &work[wstart], &
-			iwork[1], info);
-		if (*info != 0) {
-		    return 0;
-		}
-	    }
-	    start = i__ + 1;
-	}
-/* L30: */
-    }
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-/*     Unscale */
+    ILO     (output) INTEGER
+    IHI     (output) INTEGER
+            ILO and IHI are set to integers such that on exit
+            A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
+            If JOB = 'N' or 'S', ILO = 1 and IHI = N.
 
-    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, &ierr);
-L40:
+    SCALE   (output) REAL array, dimension (N)
+            Details of the permutations and scaling factors applied to
+            A.  If P(j) is the index of the row and column interchanged
+            with row and column j and D(j) is the scaling factor
+            applied to row and column j, then
+            SCALE(j) = P(j)    for j = 1,...,ILO-1
+                     = D(j)    for j = ILO,...,IHI
+                     = P(j)    for j = IHI+1,...,N.
+            The order in which the interchanges are made is N to IHI+1,
+            then 1 to ILO-1.
 
-/*     Use Selection Sort to minimize swaps of singular vectors */
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
 
-    i__1 = *n;
-    for (ii = 2; ii <= i__1; ++ii) {
-	i__ = ii - 1;
-	kk = i__;
-	p = d__[i__];
-	i__2 = *n;
-	for (j = ii; j <= i__2; ++j) {
-	    if (d__[j] > p) {
-		kk = j;
-		p = d__[j];
-	    }
-/* L50: */
-	}
-	if (kk != i__) {
-	    d__[kk] = d__[i__];
-	    d__[i__] = p;
-	    if (icompq == 1) {
-		iq[i__] = kk;
-	    } else if (icompq == 2) {
-		dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &
-			c__1);
-		dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt);
+    Further Details
+    ===============
+
+    The permutations consist of row and column interchanges which put
+    the matrix in the form
+
+               ( T1   X   Y  )
+       P A P = (  0   B   Z  )
+               (  0   0   T2 )
+
+    where T1 and T2 are upper triangular matrices whose eigenvalues lie
+    along the diagonal.  The column indices ILO and IHI mark the starting
+    and ending columns of the submatrix B. Balancing consists of applying
+    a diagonal similarity transformation inv(D) * B * D to make the
+    1-norms of each row of B and its corresponding column nearly equal.
+    The output matrix is
+
+       ( T1     X*D          Y    )
+       (  0  inv(D)*B*D  inv(D)*Z ).
+       (  0      0           T2   )
+
+    Information about the permutations P and the diagonal matrix D is
+    returned in the vector SCALE.
+
+    This subroutine is based on the EISPACK routine CBAL.
+
+    Modified by Tzu-Yi Chen, Computer Science Division, University of
+      California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --scale;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEBAL", &i__1);
+	return 0;
+    }
+
+    k = 1;
+    l = *n;
+
+    if (*n == 0) {
+	goto L210;
+    }
+
+    if (lsame_(job, "N")) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scale[i__] = 1.f;
+/* L10: */
+	}
+	goto L210;
+    }
+
+    if (lsame_(job, "S")) {
+	goto L120;
+    }
+
+/*     Permutation to isolate eigenvalues if possible */
+
+    goto L50;
+
+/*     Row and column exchange. */
+
+L20:
+    scale[m] = (real) j;
+    if (j == m) {
+	goto L30;
+    }
+
+    cswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
+    i__1 = *n - k + 1;
+    cswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
+
+L30:
+    switch (iexc) {
+	case 1:  goto L40;
+	case 2:  goto L80;
+    }
+
+/*     Search for rows isolating an eigenvalue and push them down. */
+
+L40:
+    if (l == 1) {
+	goto L210;
+    }
+    --l;
+
+L50:
+    for (j = l; j >= 1; --j) {
+
+	i__1 = l;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (i__ == j) {
+		goto L60;
 	    }
-	} else if (icompq == 1) {
-	    iq[i__] = i__;
+	    i__2 = j + i__ * a_dim1;
+	    if (a[i__2].r != 0.f || r_imag(&a[j + i__ * a_dim1]) != 0.f) {
+		goto L70;
+	    }
+L60:
+	    ;
 	}
-/* L60: */
+
+	m = l;
+	iexc = 1;
+	goto L20;
+L70:
+	;
     }
 
-/*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */
+    goto L90;
 
-    if (icompq == 1) {
-	if (iuplo == 1) {
-	    iq[*n] = 1;
-	} else {
-	    iq[*n] = 0;
+/*     Search for columns isolating an eigenvalue and push them left. */
+
+L80:
+    ++k;
+
+L90:
+    i__1 = l;
+    for (j = k; j <= i__1; ++j) {
+
+	i__2 = l;
+	for (i__ = k; i__ <= i__2; ++i__) {
+	    if (i__ == j) {
+		goto L100;
+	    }
+	    i__3 = i__ + j * a_dim1;
+	    if (a[i__3].r != 0.f || r_imag(&a[i__ + j * a_dim1]) != 0.f) {
+		goto L110;
+	    }
+L100:
+	    ;
 	}
+
+	m = k;
+	iexc = 2;
+	goto L20;
+L110:
+	;
+    }
+
+L120:
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	scale[i__] = 1.f;
+/* L130: */
+    }
+
+    if (lsame_(job, "P")) {
+	goto L210;
     }
 
 /*
-       If B is lower bidiagonal, update U by those Givens rotations
-       which rotated B to be upper bidiagonal
+       Balance the submatrix in rows K to L.
+
+       Iterative loop for norm reduction
 */
 
-    if (iuplo == 2 && icompq == 2) {
-	dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu);
+    sfmin1 = slamch_("S") / slamch_("P");
+    sfmax1 = 1.f / sfmin1;
+    sfmin2 = sfmin1 * 8.f;
+    sfmax2 = 1.f / sfmin2;
+L140:
+    noconv = FALSE_;
+
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	c__ = 0.f;
+	r__ = 0.f;
+
+	i__2 = l;
+	for (j = k; j <= i__2; ++j) {
+	    if (j == i__) {
+		goto L150;
+	    }
+	    i__3 = j + i__ * a_dim1;
+	    c__ += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&a[j + i__
+		    * a_dim1]), dabs(r__2));
+	    i__3 = i__ + j * a_dim1;
+	    r__ += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&a[i__ + j
+		    * a_dim1]), dabs(r__2));
+L150:
+	    ;
+	}
+	ica = icamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
+	ca = c_abs(&a[ica + i__ * a_dim1]);
+	i__2 = *n - k + 1;
+	ira = icamax_(&i__2, &a[i__ + k * a_dim1], lda);
+	ra = c_abs(&a[i__ + (ira + k - 1) * a_dim1]);
+
+/*        Guard against zero C or R due to underflow. */
+
+	if (c__ == 0.f || r__ == 0.f) {
+	    goto L200;
+	}
+	g = r__ / 8.f;
+	f = 1.f;
+	s = c__ + r__;
+L160:
+/* Computing MAX */
+	r__1 = max(f,c__);
+/* Computing MIN */
+	r__2 = min(r__,g);
+	if (c__ >= g || dmax(r__1,ca) >= sfmax2 || dmin(r__2,ra) <= sfmin2) {
+	    goto L170;
+	}
+	f *= 8.f;
+	c__ *= 8.f;
+	ca *= 8.f;
+	r__ /= 8.f;
+	g /= 8.f;
+	ra /= 8.f;
+	goto L160;
+
+L170:
+	g = c__ / 8.f;
+L180:
+/* Computing MIN */
+	r__1 = min(f,c__), r__1 = min(r__1,g);
+	if (g < r__ || dmax(r__,ra) >= sfmax2 || dmin(r__1,ca) <= sfmin2) {
+	    goto L190;
+	}
+	f /= 8.f;
+	c__ /= 8.f;
+	g /= 8.f;
+	ca /= 8.f;
+	r__ *= 8.f;
+	ra *= 8.f;
+	goto L180;
+
+/*        Now balance. */
+
+L190:
+	if (c__ + r__ >= s * .95f) {
+	    goto L200;
+	}
+	if (f < 1.f && scale[i__] < 1.f) {
+	    if (f * scale[i__] <= sfmin1) {
+		goto L200;
+	    }
+	}
+	if (f > 1.f && scale[i__] > 1.f) {
+	    if (scale[i__] >= sfmax1 / f) {
+		goto L200;
+	    }
+	}
+	g = 1.f / f;
+	scale[i__] *= f;
+	noconv = TRUE_;
+
+	i__2 = *n - k + 1;
+	csscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
+	csscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
+
+L200:
+	;
+    }
+
+    if (noconv) {
+	goto L140;
     }
 
+L210:
+    *ilo = k;
+    *ihi = l;
+
     return 0;
 
-/*     End of DBDSDC */
+/*     End of CGEBAL */
 
-} /* dbdsdc_ */
+} /* cgebal_ */
 
-/* Subroutine */ int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer *
-	nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt,
-	integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer *
-	ldc, doublereal *work, integer *info)
+/* Subroutine */ int cgebd2_(integer *m, integer *n, complex *a, integer *lda,
+	 real *d__, real *e, complex *tauq, complex *taup, complex *work,
+	integer *info)
 {
     /* System generated locals */
-    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
-	    i__2;
-    doublereal d__1, d__2, d__3, d__4;
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    complex q__1;
 
     /* Builtin functions */
-    double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign(
-	    doublereal *, doublereal *);
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
-    static doublereal f, g, h__;
-    static integer i__, j, m;
-    static doublereal r__, cs;
-    static integer ll;
-    static doublereal sn, mu;
-    static integer nm1, nm12, nm13, lll;
-    static doublereal eps, sll, tol, abse;
-    static integer idir;
-    static doublereal abss;
-    static integer oldm;
-    static doublereal cosl;
-    static integer isub, iter;
-    static doublereal unfl, sinl, cosr, smin, smax, sinr;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *), dlas2_(
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *), dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    static doublereal oldcs;
-    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *);
-    static integer oldll;
-    static doublereal shift, sigmn, oldsn;
-    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer maxit;
-    static doublereal sminl, sigmx;
-    static logical lower;
-    extern /* Subroutine */ int dlasq1_(integer *, doublereal *, doublereal *,
-	     doublereal *, integer *), dlasv2_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *);
-
-    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *), xerbla_(char *,
-	    integer *);
-    static doublereal sminoa, thresh;
-    static logical rotate;
-    static doublereal sminlo, tolmul;
+    static integer i__;
+    static complex alpha;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *),
+	    clarfg_(integer *, complex *, complex *, integer *, complex *),
+	    clacgv_(integer *, complex *, integer *), xerbla_(char *, integer
+	    *);
 
 
 /*
     -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DBDSQR computes the singular value decomposition (SVD) of a real
-    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
-    denotes the transpose of P), where S is a diagonal matrix with
-    non-negative diagonal elements (the singular values of B), and Q
-    and P are orthogonal matrices.
-
-    The routine computes S, and optionally computes U * Q, P' * VT,
-    or Q' * C, for given real input matrices U, VT, and C.
+    CGEBD2 reduces a complex general m by n matrix A to upper or lower
+    real bidiagonal form B by a unitary transformation: Q' * A * P = B.
 
-    See "Computing  Small Singular Values of Bidiagonal Matrices With
-    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
-    LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
-    no. 5, pp. 873-912, Sept 1990) and
-    "Accurate singular values and differential qd algorithms," by
-    B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
-    Department, University of California at Berkeley, July 1992
-    for a detailed description of the algorithm.
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
 
     Arguments
     =========
 
-    UPLO    (input) CHARACTER*1
-            = 'U':  B is upper bidiagonal;
-            = 'L':  B is lower bidiagonal.
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
 
     N       (input) INTEGER
-            The order of the matrix B.  N >= 0.
+            The number of columns in the matrix A.  N >= 0.
 
-    NCVT    (input) INTEGER
-            The number of columns of the matrix VT. NCVT >= 0.
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the unitary matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the unitary matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors.
+            See Further Details.
 
-    NRU     (input) INTEGER
-            The number of rows of the matrix U. NRU >= 0.
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
 
-    NCC     (input) INTEGER
-            The number of columns of the matrix C. NCC >= 0.
+    D       (output) REAL array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
 
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the n diagonal elements of the bidiagonal matrix B.
-            On exit, if INFO=0, the singular values of B in decreasing
-            order.
+    E       (output) REAL array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
 
-    E       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the elements of E contain the
-            offdiagonal elements of the bidiagonal matrix whose SVD
-            is desired. On normal exit (INFO = 0), E is destroyed.
-            If the algorithm does not converge (INFO > 0), D and E
-            will contain the diagonal and superdiagonal elements of a
-            bidiagonal matrix orthogonally equivalent to the one given
-            as input. E(N) is used for workspace.
+    TAUQ    (output) COMPLEX array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix Q. See Further Details.
 
-    VT      (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
-            On entry, an N-by-NCVT matrix VT.
-            On exit, VT is overwritten by P' * VT.
-            VT is not referenced if NCVT = 0.
+    TAUP    (output) COMPLEX array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix P. See Further Details.
 
-    LDVT    (input) INTEGER
-            The leading dimension of the array VT.
-            LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
+    WORK    (workspace) COMPLEX array, dimension (max(M,N))
 
-    U       (input/output) DOUBLE PRECISION array, dimension (LDU, N)
-            On entry, an NRU-by-N matrix U.
-            On exit, U is overwritten by U * Q.
-            U is not referenced if NRU = 0.
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value.
 
-    LDU     (input) INTEGER
-            The leading dimension of the array U.  LDU >= max(1,NRU).
+    Further Details
+    ===============
 
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
-            On entry, an N-by-NCC matrix C.
-            On exit, C is overwritten by Q' * C.
-            C is not referenced if NCC = 0.
+    The matrices Q and P are represented as products of elementary
+    reflectors:
 
-    LDC     (input) INTEGER
-            The leading dimension of the array C.
-            LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
+    If m >= n,
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (4*N)
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  If INFO = -i, the i-th argument had an illegal value
-            > 0:  the algorithm did not converge; D and E contain the
-                  elements of a bidiagonal matrix which is orthogonally
-                  similar to the input matrix B;  if INFO = i, i
-                  elements of E have not converged to zero.
+    Each H(i) and G(i) has the form:
 
-    Internal Parameters
-    ===================
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
 
-    TOLMUL  DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
-            TOLMUL controls the convergence criterion of the QR loop.
-            If it is positive, TOLMUL*EPS is the desired relative
-               precision in the computed singular values.
-            If it is negative, abs(TOLMUL*EPS*sigma_max) is the
-               desired absolute accuracy in the computed singular
-               values (corresponds to relative accuracy
-               abs(TOLMUL*EPS) in the largest singular value.
-            abs(TOLMUL) should be between 1 and 1/EPS, and preferably
-               between 10 (for fast convergence) and .1/EPS
-               (for there to be some accuracy in the results).
-            Default is to lose at either one eighth or 2 of the
-               available decimal digits in each computed singular value
-               (whichever is smaller).
+    where tauq and taup are complex scalars, and v and u are complex
+    vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
+    A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
+    A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
 
-    MAXITR  INTEGER, default = 6
-            MAXITR controls the maximum number of passes of the
-            algorithm through its inner loop. The algorithms stops
-            (and so fails to converge) if the number of passes
-            through the inner loop exceeds MAXITR*N**2.
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are complex scalars, v and u are complex vectors;
+    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
 
     =====================================================================
 
 
-       Test the input parameters.
+       Test the input parameters
 */
 
     /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
     --d__;
     --e;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
+    --tauq;
+    --taup;
     --work;
 
     /* Function Body */
     *info = 0;
-    lower = lsame_(uplo, "L");
-    if (! lsame_(uplo, "U") && ! lower) {
+    if (*m < 0) {
 	*info = -1;
     } else if (*n < 0) {
 	*info = -2;
-    } else if (*ncvt < 0) {
-	*info = -3;
-    } else if (*nru < 0) {
+    } else if (*lda < max(1,*m)) {
 	*info = -4;
-    } else if (*ncc < 0) {
-	*info = -5;
-    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) {
-	*info = -9;
-    } else if (*ldu < max(1,*nru)) {
-	*info = -11;
-    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) {
-	*info = -13;
     }
-    if (*info != 0) {
+    if (*info < 0) {
 	i__1 = -(*info);
-	xerbla_("DBDSQR", &i__1);
-	return 0;
-    }
-    if (*n == 0) {
+	xerbla_("CGEBD2", &i__1);
 	return 0;
     }
-    if (*n == 1) {
-	goto L160;
-    }
 
-/*     ROTATE is true if any singular vectors desired, false otherwise */
-
-    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
+    if (*m >= *n) {
 
-/*     If no singular vectors desired, use qd algorithm */
+/*        Reduce to upper bidiagonal form */
 
-    if (! rotate) {
-	dlasq1_(n, &d__[1], &e[1], &work[1], info);
-	return 0;
-    }
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
 
-    nm1 = *n - 1;
-    nm12 = nm1 + nm1;
-    nm13 = nm12 + nm1;
-    idir = 0;
+/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
 
-/*     Get machine constants */
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, &
+		    tauq[i__]);
+	    i__2 = i__;
+	    d__[i__2] = alpha.r;
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = 1.f, a[i__2].i = 0.f;
 
-    eps = EPSILON;
-    unfl = SAFEMINIMUM;
+/*           Apply H(i)' to A(i:m,i+1:n) from the left */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    r_cnjg(&q__1, &tauq[i__]);
+	    clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &q__1,
+		     &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    i__2 = i__ + i__ * a_dim1;
+	    i__3 = i__;
+	    a[i__2].r = d__[i__3], a[i__2].i = 0.f;
+
+	    if (i__ < *n) {
 
 /*
-       If matrix lower bidiagonal, rotate to be upper bidiagonal
-       by applying Givens rotations on the left
+                Generate elementary reflector G(i) to annihilate
+                A(i,i+2:n)
 */
 
-    if (lower) {
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
-	    d__[i__] = r__;
-	    e[i__] = sn * d__[i__ + 1];
-	    d__[i__ + 1] = cs * d__[i__ + 1];
-	    work[i__] = cs;
-	    work[nm1 + i__] = sn;
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
+			taup[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Apply G(i) to A(i+1:m,i+1:n) from the right */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		clarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1],
+			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &work[1]);
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		i__3 = i__;
+		a[i__2].r = e[i__3], a[i__2].i = 0.f;
+	    } else {
+		i__2 = i__;
+		taup[i__2].r = 0.f, taup[i__2].i = 0.f;
+	    }
 /* L10: */
 	}
+    } else {
 
-/*        Update singular vectors if desired */
+/*        Reduce to lower bidiagonal form */
 
-	if (*nru > 0) {
-	    dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset],
-		    ldu);
-	}
-	if (*ncc > 0) {
-	    dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
-		     ldc);
-	}
-    }
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
 
-/*
-       Compute singular values to relative accuracy TOL
-       (By setting TOL to be negative, algorithm will compute
-       singular values to absolute accuracy ABS(TOL)*norm(input matrix))
+/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
 
-   Computing MAX
-   Computing MIN
+	    i__2 = *n - i__ + 1;
+	    clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
+		    taup[i__]);
+	    i__2 = i__;
+	    d__[i__2] = alpha.r;
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*           Apply G(i) to A(i+1:m,i:n) from the right */
+
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+/* Computing MIN */
+	    i__4 = i__ + 1;
+	    clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
+		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
+	    i__2 = *n - i__ + 1;
+	    clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	    i__2 = i__ + i__ * a_dim1;
+	    i__3 = i__;
+	    a[i__2].r = d__[i__3], a[i__2].i = 0.f;
+
+	    if (i__ < *m) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:m,i)
 */
-    d__3 = 100., d__4 = pow_dd(&eps, &c_b94);
-    d__1 = 10., d__2 = min(d__3,d__4);
-    tolmul = max(d__1,d__2);
-    tol = tolmul * eps;
 
-/*     Compute approximate maximum, minimum singular values */
+		i__2 = i__ + 1 + i__ * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1,
+			 &tauq[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + 1 + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
 
-    smax = 0.;
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1));
-	smax = max(d__2,d__3);
+/*              Apply H(i)' to A(i+1:m,i+1:n) from the left */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		r_cnjg(&q__1, &tauq[i__]);
+		clarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &q__1, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &
+			work[1]);
+		i__2 = i__ + 1 + i__ * a_dim1;
+		i__3 = i__;
+		a[i__2].r = e[i__3], a[i__2].i = 0.f;
+	    } else {
+		i__2 = i__;
+		tauq[i__2].r = 0.f, tauq[i__2].i = 0.f;
+	    }
 /* L20: */
+	}
     }
-    i__1 = *n - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
-	smax = max(d__2,d__3);
-/* L30: */
-    }
-    sminl = 0.;
-    if (tol >= 0.) {
+    return 0;
 
-/*        Relative accuracy desired */
+/*     End of CGEBD2 */
 
-	sminoa = abs(d__[1]);
-	if (sminoa == 0.) {
-	    goto L50;
-	}
-	mu = sminoa;
-	i__1 = *n;
-	for (i__ = 2; i__ <= i__1; ++i__) {
-	    mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1]
-		    , abs(d__1))));
-	    sminoa = min(sminoa,mu);
-	    if (sminoa == 0.) {
-		goto L50;
-	    }
-/* L40: */
-	}
-L50:
-	sminoa /= sqrt((doublereal) (*n));
-/* Computing MAX */
-	d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl;
-	thresh = max(d__1,d__2);
-    } else {
+} /* cgebd2_ */
 
-/*
-          Absolute accuracy desired
+/* Subroutine */ int cgebrd_(integer *m, integer *n, complex *a, integer *lda,
+	 real *d__, real *e, complex *tauq, complex *taup, complex *work,
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__, j, nb, nx;
+    static real ws;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *);
+    static integer nbmin, iinfo, minmn;
+    extern /* Subroutine */ int cgebd2_(integer *, integer *, complex *,
+	    integer *, real *, real *, complex *, complex *, complex *,
+	    integer *), clabrd_(integer *, integer *, integer *, complex *,
+	    integer *, real *, real *, complex *, complex *, complex *,
+	    integer *, complex *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwrkx, ldwrky, lwkopt;
+    static logical lquery;
 
-   Computing MAX
-*/
-	d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl;
-	thresh = max(d__1,d__2);
-    }
 
 /*
-       Prepare for main iteration loop for the singular values
-       (MAXIT is the maximum number of passes through the inner
-       loop permitted before nonconvergence signalled.)
-*/
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-    maxit = *n * 6 * *n;
-    iter = 0;
-    oldll = -1;
-    oldm = -1;
 
-/*     M points to last element of unconverged part of matrix */
+    Purpose
+    =======
 
-    m = *n;
+    CGEBRD reduces a general complex M-by-N matrix A to upper or lower
+    bidiagonal form B by a unitary transformation: Q**H * A * P = B.
 
-/*     Begin main iteration loop */
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
 
-L60:
+    Arguments
+    =========
 
-/*     Check for convergence or exceeding iteration count */
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
 
-    if (m <= 1) {
-	goto L160;
-    }
-    if (iter > maxit) {
-	goto L200;
-    }
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
 
-/*     Find diagonal block of matrix to work on */
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the M-by-N general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the unitary matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the unitary matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors.
+            See Further Details.
 
-    if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) {
-	d__[m] = 0.;
-    }
-    smax = (d__1 = d__[m], abs(d__1));
-    smin = smax;
-    i__1 = m - 1;
-    for (lll = 1; lll <= i__1; ++lll) {
-	ll = m - lll;
-	abss = (d__1 = d__[ll], abs(d__1));
-	abse = (d__1 = e[ll], abs(d__1));
-	if (tol < 0. && abss <= thresh) {
-	    d__[ll] = 0.;
-	}
-	if (abse <= thresh) {
-	    goto L80;
-	}
-	smin = min(smin,abss);
-/* Computing MAX */
-	d__1 = max(smax,abss);
-	smax = max(d__1,abse);
-/* L70: */
-    }
-    ll = 0;
-    goto L90;
-L80:
-    e[ll] = 0.;
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
 
-/*     Matrix splits since E(LL) = 0 */
+    D       (output) REAL array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
 
-    if (ll == m - 1) {
+    E       (output) REAL array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
 
-/*        Convergence of bottom singular value, return to top of loop */
+    TAUQ    (output) COMPLEX array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix Q. See Further Details.
 
-	--m;
-	goto L60;
-    }
-L90:
-    ++ll;
+    TAUP    (output) COMPLEX array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix P. See Further Details.
 
-/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
-    if (ll == m - 1) {
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,M,N).
+            For optimum performance LWORK >= (M+N)*NB, where NB
+            is the optimal blocksize.
 
-/*        2 by 2 block, handle separately */
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
 
-	dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
-		 &sinl, &cosl);
-	d__[m - 1] = sigmx;
-	e[m - 1] = 0.;
-	d__[m] = sigmn;
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
 
-/*        Compute singular vectors, if desired */
+    Further Details
+    ===============
 
-	if (*ncvt > 0) {
-	    drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &
-		    cosr, &sinr);
-	}
-	if (*nru > 0) {
-	    drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &
-		    c__1, &cosl, &sinl);
-	}
-	if (*ncc > 0) {
-	    drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &
-		    cosl, &sinl);
-	}
-	m += -2;
-	goto L60;
-    }
+    The matrices Q and P are represented as products of elementary
+    reflectors:
 
-/*
-       If working on new submatrix, choose shift direction
-       (from larger end diagonal element towards smaller)
-*/
+    If m >= n,
 
-    if (ll > oldm || m < oldll) {
-	if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) {
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
 
-/*           Chase bulge from top (big end) to bottom (small end) */
+    Each H(i) and G(i) has the form:
 
-	    idir = 1;
-	} else {
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
 
-/*           Chase bulge from bottom (big end) to top (small end) */
+    where tauq and taup are complex scalars, and v and u are complex
+    vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
+    A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
+    A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
 
-	    idir = 2;
-	}
-    }
+    If m < n,
 
-/*     Apply convergence tests */
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
 
-    if (idir == 1) {
+    Each H(i) and G(i) has the form:
 
-/*
-          Run convergence test in forward direction
-          First apply standard test to bottom of matrix
-*/
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
 
-	if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs(
-		d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh)
-		{
-	    e[m - 1] = 0.;
-	    goto L60;
-	}
+    where tauq and taup are complex scalars, and v and u are complex
+    vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in
+    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
 
-	if (tol >= 0.) {
+    The contents of A on exit are illustrated by the following examples:
 
-/*
-             If relative accuracy desired,
-             apply convergence criterion forward
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
 */
 
-	    mu = (d__1 = d__[ll], abs(d__1));
-	    sminl = mu;
-	    i__1 = m - 1;
-	    for (lll = ll; lll <= i__1; ++lll) {
-		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
-		    e[lll] = 0.;
-		    goto L60;
-		}
-		sminlo = sminl;
-		mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[
-			lll], abs(d__1))));
-		sminl = min(sminl,mu);
-/* L100: */
-	    }
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = ilaenv_(&c__1, "CGEBRD", " ", m, n, &c_n1, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = max(i__1,i__2);
+    lwkopt = (*m + *n) * nb;
+    r__1 = (real) lwkopt;
+    work[1].r = r__1, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = max(1,*m);
+	if (*lwork < max(i__1,*n) && ! lquery) {
+	    *info = -10;
 	}
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("CGEBRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
 
-    } else {
+/*     Quick return if possible */
+
+    minmn = min(*m,*n);
+    if (minmn == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    ws = (real) max(*m,*n);
+    ldwrkx = *m;
+    ldwrky = *n;
+
+    if (nb > 1 && nb < minmn) {
 
 /*
-          Run convergence test in backward direction
-          First apply standard test to top of matrix
+          Set the crossover point NX.
+
+   Computing MAX
 */
+	i__1 = nb, i__2 = ilaenv_(&c__3, "CGEBRD", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
 
-	if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1)
-		) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) {
-	    e[ll] = 0.;
-	    goto L60;
-	}
+/*        Determine when to switch from blocked to unblocked code. */
 
-	if (tol >= 0.) {
+	if (nx < minmn) {
+	    ws = (real) ((*m + *n) * nb);
+	    if ((real) (*lwork) < ws) {
 
 /*
-             If relative accuracy desired,
-             apply convergence criterion backward
+                Not enough work space for the optimal NB, consider using
+                a smaller block size.
 */
 
-	    mu = (d__1 = d__[m], abs(d__1));
-	    sminl = mu;
-	    i__1 = ll;
-	    for (lll = m - 1; lll >= i__1; --lll) {
-		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
-		    e[lll] = 0.;
-		    goto L60;
+		nbmin = ilaenv_(&c__2, "CGEBRD", " ", m, n, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		if (*lwork >= (*m + *n) * nbmin) {
+		    nb = *lwork / (*m + *n);
+		} else {
+		    nb = 1;
+		    nx = minmn;
 		}
-		sminlo = sminl;
-		mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll]
-			, abs(d__1))));
-		sminl = min(sminl,mu);
-/* L110: */
 	    }
 	}
+    } else {
+	nx = minmn;
     }
-    oldll = ll;
-    oldm = m;
 
-/*
-       Compute shift.  First, test if shifting would ruin relative
-       accuracy, and if so set the shift to zero.
+    i__1 = minmn - nx;
+    i__2 = nb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
 
-   Computing MAX
+/*
+          Reduce rows and columns i:i+ib-1 to bidiagonal form and return
+          the matrices X and Y which are needed to update the unreduced
+          part of the matrix
 */
-    d__1 = eps, d__2 = tol * .01;
-    if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) {
 
-/*        Use a zero shift to avoid loss of relative accuracy */
-
-	shift = 0.;
-    } else {
+	i__3 = *m - i__ + 1;
+	i__4 = *n - i__ + 1;
+	clabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
+		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx
+		* nb + 1], &ldwrky);
 
-/*        Compute the shift from 2-by-2 block at end of matrix */
+/*
+          Update the trailing submatrix A(i+ib:m,i+ib:n), using
+          an update of the form  A := A - V*Y' - X*U'
+*/
 
-	if (idir == 1) {
-	    sll = (d__1 = d__[ll], abs(d__1));
-	    dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
-	} else {
-	    sll = (d__1 = d__[m], abs(d__1));
-	    dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
-	}
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	q__1.r = -1.f, q__1.i = -0.f;
+	cgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, &
+		q__1, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb +
+		nb + 1], &ldwrky, &c_b56, &a[i__ + nb + (i__ + nb) * a_dim1],
+		lda);
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	q__1.r = -1.f, q__1.i = -0.f;
+	cgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &q__1, &
+		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
+		c_b56, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
 
-/*        Test if shift negligible, and if so set to zero */
+/*        Copy diagonal and off-diagonal elements of B back into A */
 
-	if (sll > 0.) {
-/* Computing 2nd power */
-	    d__1 = shift / sll;
-	    if (d__1 * d__1 < eps) {
-		shift = 0.;
+	if (*m >= *n) {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		i__4 = j + j * a_dim1;
+		i__5 = j;
+		a[i__4].r = d__[i__5], a[i__4].i = 0.f;
+		i__4 = j + (j + 1) * a_dim1;
+		i__5 = j;
+		a[i__4].r = e[i__5], a[i__4].i = 0.f;
+/* L10: */
+	    }
+	} else {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		i__4 = j + j * a_dim1;
+		i__5 = j;
+		a[i__4].r = d__[i__5], a[i__4].i = 0.f;
+		i__4 = j + 1 + j * a_dim1;
+		i__5 = j;
+		a[i__4].r = e[i__5], a[i__4].i = 0.f;
+/* L20: */
 	    }
 	}
+/* L30: */
     }
 
-/*     Increment iteration count */
+/*     Use unblocked code to reduce the remainder of the matrix */
 
-    iter = iter + m - ll;
+    i__2 = *m - i__ + 1;
+    i__1 = *n - i__ + 1;
+    cgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
+	    tauq[i__], &taup[i__], &work[1], &iinfo);
+    work[1].r = ws, work[1].i = 0.f;
+    return 0;
 
-/*     If SHIFT = 0, do simplified QR iteration */
+/*     End of CGEBRD */
 
-    if (shift == 0.) {
-	if (idir == 1) {
+} /* cgebrd_ */
 
-/*
-             Chase bulge from top to bottom
-             Save cosines and sines for later singular vector updates
-*/
+/* Subroutine */ int cgeev_(char *jobvl, char *jobvr, integer *n, complex *a,
+	integer *lda, complex *w, complex *vl, integer *ldvl, complex *vr,
+	integer *ldvr, complex *work, integer *lwork, real *rwork, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3, i__4;
+    real r__1, r__2;
+    complex q__1, q__2;
 
-	    cs = 1.;
-	    oldcs = 1.;
-	    i__1 = m - 1;
-	    for (i__ = ll; i__ <= i__1; ++i__) {
-		d__1 = d__[i__] * cs;
-		dlartg_(&d__1, &e[i__], &cs, &sn, &r__);
-		if (i__ > ll) {
-		    e[i__ - 1] = oldsn * r__;
-		}
-		d__1 = oldcs * r__;
-		d__2 = d__[i__ + 1] * sn;
-		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
-		work[i__ - ll + 1] = cs;
-		work[i__ - ll + 1 + nm1] = sn;
-		work[i__ - ll + 1 + nm12] = oldcs;
-		work[i__ - ll + 1 + nm13] = oldsn;
-/* L120: */
-	    }
-	    h__ = d__[m] * cs;
-	    d__[m] = h__ * oldcs;
-	    e[m - 1] = h__ * oldsn;
-
-/*           Update singular vectors */
-
-	    if (*ncvt > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
-			ll + vt_dim1], ldvt);
-	    }
-	    if (*nru > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
-			+ 1], &u[ll * u_dim1 + 1], ldu);
-	    }
-	    if (*ncc > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
-			+ 1], &c__[ll + c_dim1], ldc);
-	    }
-
-/*           Test convergence */
+    /* Builtin functions */
+    double sqrt(doublereal), r_imag(complex *);
+    void r_cnjg(complex *, complex *);
 
-	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
-		e[m - 1] = 0.;
-	    }
+    /* Local variables */
+    static integer i__, k, ihi;
+    static real scl;
+    static integer ilo;
+    static real dum[1], eps;
+    static complex tmp;
+    static integer ibal;
+    static char side[1];
+    static integer maxb;
+    static real anrm;
+    static integer ierr, itau, iwrk, nout;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern doublereal scnrm2_(integer *, complex *, integer *);
+    extern /* Subroutine */ int cgebak_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, complex *, integer *, integer *), cgebal_(char *, integer *, complex *, integer *,
+	    integer *, integer *, real *, integer *), slabad_(real *,
+	    real *);
+    static logical scalea;
+    extern doublereal clange_(char *, integer *, integer *, complex *,
+	    integer *, real *);
+    static real cscale;
+    extern /* Subroutine */ int cgehrd_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *, integer *),
+	     clascl_(char *, integer *, integer *, real *, real *, integer *,
+	    integer *, complex *, integer *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *), clacpy_(char *, integer *, integer *, complex *, integer *,
+	    complex *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical select[1];
+    static real bignum;
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int chseqr_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *), ctrevc_(char *,
+	    char *, logical *, integer *, complex *, integer *, complex *,
+	    integer *, complex *, integer *, integer *, integer *, complex *,
+	    real *, integer *), cunghr_(integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    integer *);
+    static integer minwrk, maxwrk;
+    static logical wantvl;
+    static real smlnum;
+    static integer hswork, irwork;
+    static logical lquery, wantvr;
 
-	} else {
 
 /*
-             Chase bulge from bottom to top
-             Save cosines and sines for later singular vector updates
-*/
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-	    cs = 1.;
-	    oldcs = 1.;
-	    i__1 = ll + 1;
-	    for (i__ = m; i__ >= i__1; --i__) {
-		d__1 = d__[i__] * cs;
-		dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__);
-		if (i__ < m) {
-		    e[i__] = oldsn * r__;
-		}
-		d__1 = oldcs * r__;
-		d__2 = d__[i__ - 1] * sn;
-		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
-		work[i__ - ll] = cs;
-		work[i__ - ll + nm1] = -sn;
-		work[i__ - ll + nm12] = oldcs;
-		work[i__ - ll + nm13] = -oldsn;
-/* L130: */
-	    }
-	    h__ = d__[ll] * cs;
-	    d__[ll] = h__ * oldcs;
-	    e[ll] = h__ * oldsn;
 
-/*           Update singular vectors */
+    Purpose
+    =======
 
-	    if (*ncvt > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
-			nm13 + 1], &vt[ll + vt_dim1], ldvt);
-	    }
-	    if (*nru > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
-			 u_dim1 + 1], ldu);
-	    }
-	    if (*ncc > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
-			ll + c_dim1], ldc);
-	    }
+    CGEEV computes for an N-by-N complex nonsymmetric matrix A, the
+    eigenvalues and, optionally, the left and/or right eigenvectors.
 
-/*           Test convergence */
+    The right eigenvector v(j) of A satisfies
+                     A * v(j) = lambda(j) * v(j)
+    where lambda(j) is its eigenvalue.
+    The left eigenvector u(j) of A satisfies
+                  u(j)**H * A = lambda(j) * u(j)**H
+    where u(j)**H denotes the conjugate transpose of u(j).
 
-	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
-		e[ll] = 0.;
-	    }
-	}
-    } else {
+    The computed eigenvectors are normalized to have Euclidean norm
+    equal to 1 and largest component real.
 
-/*        Use nonzero shift */
+    Arguments
+    =========
 
-	if (idir == 1) {
+    JOBVL   (input) CHARACTER*1
+            = 'N': left eigenvectors of A are not computed;
+            = 'V': left eigenvectors of are computed.
 
-/*
-             Chase bulge from top to bottom
-             Save cosines and sines for later singular vector updates
-*/
+    JOBVR   (input) CHARACTER*1
+            = 'N': right eigenvectors of A are not computed;
+            = 'V': right eigenvectors of A are computed.
 
-	    f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b15, &d__[
-		    ll]) + shift / d__[ll]);
-	    g = e[ll];
-	    i__1 = m - 1;
-	    for (i__ = ll; i__ <= i__1; ++i__) {
-		dlartg_(&f, &g, &cosr, &sinr, &r__);
-		if (i__ > ll) {
-		    e[i__ - 1] = r__;
-		}
-		f = cosr * d__[i__] + sinr * e[i__];
-		e[i__] = cosr * e[i__] - sinr * d__[i__];
-		g = sinr * d__[i__ + 1];
-		d__[i__ + 1] = cosr * d__[i__ + 1];
-		dlartg_(&f, &g, &cosl, &sinl, &r__);
-		d__[i__] = r__;
-		f = cosl * e[i__] + sinl * d__[i__ + 1];
-		d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
-		if (i__ < m - 1) {
-		    g = sinl * e[i__ + 1];
-		    e[i__ + 1] = cosl * e[i__ + 1];
-		}
-		work[i__ - ll + 1] = cosr;
-		work[i__ - ll + 1 + nm1] = sinr;
-		work[i__ - ll + 1 + nm12] = cosl;
-		work[i__ - ll + 1 + nm13] = sinl;
-/* L140: */
-	    }
-	    e[m - 1] = f;
+    N       (input) INTEGER
+            The order of the matrix A. N >= 0.
 
-/*           Update singular vectors */
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the N-by-N matrix A.
+            On exit, A has been overwritten.
 
-	    if (*ncvt > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
-			ll + vt_dim1], ldvt);
-	    }
-	    if (*nru > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
-			+ 1], &u[ll * u_dim1 + 1], ldu);
-	    }
-	    if (*ncc > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
-			+ 1], &c__[ll + c_dim1], ldc);
-	    }
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-/*           Test convergence */
+    W       (output) COMPLEX array, dimension (N)
+            W contains the computed eigenvalues.
 
-	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
-		e[m - 1] = 0.;
-	    }
+    VL      (output) COMPLEX array, dimension (LDVL,N)
+            If JOBVL = 'V', the left eigenvectors u(j) are stored one
+            after another in the columns of VL, in the same order
+            as their eigenvalues.
+            If JOBVL = 'N', VL is not referenced.
+            u(j) = VL(:,j), the j-th column of VL.
 
-	} else {
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= 1; if
+            JOBVL = 'V', LDVL >= N.
 
-/*
-             Chase bulge from bottom to top
-             Save cosines and sines for later singular vector updates
-*/
+    VR      (output) COMPLEX array, dimension (LDVR,N)
+            If JOBVR = 'V', the right eigenvectors v(j) are stored one
+            after another in the columns of VR, in the same order
+            as their eigenvalues.
+            If JOBVR = 'N', VR is not referenced.
+            v(j) = VR(:,j), the j-th column of VR.
 
-	    f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b15, &d__[m]
-		    ) + shift / d__[m]);
-	    g = e[m - 1];
-	    i__1 = ll + 1;
-	    for (i__ = m; i__ >= i__1; --i__) {
-		dlartg_(&f, &g, &cosr, &sinr, &r__);
-		if (i__ < m) {
-		    e[i__] = r__;
-		}
-		f = cosr * d__[i__] + sinr * e[i__ - 1];
-		e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
-		g = sinr * d__[i__ - 1];
-		d__[i__ - 1] = cosr * d__[i__ - 1];
-		dlartg_(&f, &g, &cosl, &sinl, &r__);
-		d__[i__] = r__;
-		f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
-		d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
-		if (i__ > ll + 1) {
-		    g = sinl * e[i__ - 2];
-		    e[i__ - 2] = cosl * e[i__ - 2];
-		}
-		work[i__ - ll] = cosr;
-		work[i__ - ll + nm1] = -sinr;
-		work[i__ - ll + nm12] = cosl;
-		work[i__ - ll + nm13] = -sinl;
-/* L150: */
-	    }
-	    e[ll] = f;
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= 1; if
+            JOBVR = 'V', LDVR >= N.
 
-/*           Test convergence */
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
-	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
-		e[ll] = 0.;
-	    }
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,2*N).
+            For good performance, LWORK must generally be larger.
 
-/*           Update singular vectors if desired */
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
 
-	    if (*ncvt > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
-			nm13 + 1], &vt[ll + vt_dim1], ldvt);
-	    }
-	    if (*nru > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
-			 u_dim1 + 1], ldu);
-	    }
-	    if (*ncc > 0) {
-		i__1 = m - ll + 1;
-		dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
-			ll + c_dim1], ldc);
-	    }
-	}
-    }
+    RWORK   (workspace) REAL array, dimension (2*N)
 
-/*     QR iteration finished, go back and check convergence */
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = i, the QR algorithm failed to compute all the
+                  eigenvalues, and no eigenvectors have been computed;
+                  elements and i+1:N of W contain eigenvalues which have
+                  converged.
 
-    goto L60;
+    =====================================================================
 
-/*     All singular values converged, so make them positive */
 
-L160:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (d__[i__] < 0.) {
-	    d__[i__] = -d__[i__];
+       Test the input arguments
+*/
 
-/*           Change sign of singular vectors, if desired */
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --w;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+    --rwork;
 
-	    if (*ncvt > 0) {
-		dscal_(ncvt, &c_b151, &vt[i__ + vt_dim1], ldvt);
-	    }
-	}
-/* L170: */
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvl = lsame_(jobvl, "V");
+    wantvr = lsame_(jobvr, "V");
+    if (! wantvl && ! lsame_(jobvl, "N")) {
+	*info = -1;
+    } else if (! wantvr && ! lsame_(jobvr, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
+	*info = -8;
+    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
+	*info = -10;
     }
 
 /*
-       Sort the singular values into decreasing order (insertion sort on
-       singular values, but only one transposition per singular vector)
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         CWorkspace refers to complex workspace, and RWorkspace to real
+         workspace. NB refers to the optimal block size for the
+         immediately following subroutine, as returned by ILAENV.
+         HSWORK refers to the workspace preferred by CHSEQR, as
+         calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+         the worst case.)
 */
 
-    i__1 = *n - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*        Scan for smallest D(I) */
-
-	isub = 1;
-	smin = d__[1];
-	i__2 = *n + 1 - i__;
-	for (j = 2; j <= i__2; ++j) {
-	    if (d__[j] <= smin) {
-		isub = j;
-		smin = d__[j];
-	    }
-/* L180: */
+    minwrk = 1;
+    if (*info == 0 && (*lwork >= 1 || lquery)) {
+	maxwrk = *n + *n * ilaenv_(&c__1, "CGEHRD", " ", n, &c__1, n, &c__0, (
+		ftnlen)6, (ftnlen)1);
+	if (! wantvl && ! wantvr) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n << 1;
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "CHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "CHSEQR", "EN", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = *n << 1;
+	    hswork = max(i__1,i__2);
+	    maxwrk = max(maxwrk,hswork);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n << 1;
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "CUNGHR",
+		    " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "CHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "CHSEQR", "SV", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = *n << 1;
+	    hswork = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = max(maxwrk,hswork), i__2 = *n << 1;
+	    maxwrk = max(i__1,i__2);
 	}
-	if (isub != *n + 1 - i__) {
+	work[1].r = (real) maxwrk, work[1].i = 0.f;
+    }
+    if (*lwork < minwrk && ! lquery) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEEV ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
 
-/*           Swap singular values and vectors */
+/*     Quick return if possible */
 
-	    d__[isub] = d__[*n + 1 - i__];
-	    d__[*n + 1 - i__] = smin;
-	    if (*ncvt > 0) {
-		dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ +
-			vt_dim1], ldvt);
-	    }
-	    if (*nru > 0) {
-		dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) *
-			u_dim1 + 1], &c__1);
-	    }
-	    if (*ncc > 0) {
-		dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ +
-			c_dim1], ldc);
-	    }
-	}
-/* L190: */
+    if (*n == 0) {
+	return 0;
     }
-    goto L220;
 
-/*     Maximum number of iterations exceeded, failure to converge */
+/*     Get machine constants */
 
-L200:
-    *info = 0;
-    i__1 = *n - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (e[i__] != 0.) {
-	    ++(*info);
-	}
-/* L210: */
+    eps = slamch_("P");
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = clange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	clascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
     }
-L220:
-    return 0;
 
-/*     End of DBDSQR */
+/*
+       Balance the matrix
+       (CWorkspace: none)
+       (RWorkspace: need N)
+*/
 
-} /* dbdsqr_ */
+    ibal = 1;
+    cgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &rwork[ibal], &ierr);
 
-/* Subroutine */ int dgebak_(char *job, char *side, integer *n, integer *ilo,
-	integer *ihi, doublereal *scale, integer *m, doublereal *v, integer *
-	ldv, integer *info)
-{
-    /* System generated locals */
-    integer v_dim1, v_offset, i__1;
+/*
+       Reduce to upper Hessenberg form
+       (CWorkspace: need 2*N, prefer N+N*NB)
+       (RWorkspace: none)
+*/
 
-    /* Local variables */
-    static integer i__, k;
-    static doublereal s;
-    static integer ii;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static logical leftv;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical rightv;
+    itau = 1;
+    iwrk = itau + *n;
+    i__1 = *lwork - iwrk + 1;
+    cgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
 
+    if (wantvl) {
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
+          Want left eigenvectors
+          Copy Householder vectors to VL
+*/
 
-    Purpose
-    =======
+	*(unsigned char *)side = 'L';
+	clacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
+		;
 
-    DGEBAK forms the right or left eigenvectors of a real general matrix
-    by backward transformation on the computed eigenvectors of the
-    balanced matrix output by DGEBAL.
+/*
+          Generate unitary matrix in VL
+          (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+          (RWorkspace: none)
+*/
 
-    Arguments
-    =========
+	i__1 = *lwork - iwrk + 1;
+	cunghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
 
-    JOB     (input) CHARACTER*1
-            Specifies the type of backward transformation required:
-            = 'N', do nothing, return immediately;
-            = 'P', do backward transformation for permutation only;
-            = 'S', do backward transformation for scaling only;
-            = 'B', do backward transformations for both permutation and
-                   scaling.
-            JOB must be the same as the argument JOB supplied to DGEBAL.
+/*
+          Perform QR iteration, accumulating Schur vectors in VL
+          (CWorkspace: need 1, prefer HSWORK (see comments) )
+          (RWorkspace: none)
+*/
 
-    SIDE    (input) CHARACTER*1
-            = 'R':  V contains right eigenvectors;
-            = 'L':  V contains left eigenvectors.
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	chseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vl[
+		vl_offset], ldvl, &work[iwrk], &i__1, info);
 
-    N       (input) INTEGER
-            The number of rows of the matrix V.  N >= 0.
+	if (wantvr) {
 
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            The integers ILO and IHI determined by DGEBAL.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+/*
+             Want left and right eigenvectors
+             Copy Schur vectors to VR
+*/
 
-    SCALE   (input) DOUBLE PRECISION array, dimension (N)
-            Details of the permutation and scaling factors, as returned
-            by DGEBAL.
+	    *(unsigned char *)side = 'B';
+	    clacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
+	}
 
-    M       (input) INTEGER
-            The number of columns of the matrix V.  M >= 0.
+    } else if (wantvr) {
 
-    V       (input/output) DOUBLE PRECISION array, dimension (LDV,M)
-            On entry, the matrix of right or left eigenvectors to be
-            transformed, as returned by DHSEIN or DTREVC.
-            On exit, V is overwritten by the transformed eigenvectors.
-
-    LDV     (input) INTEGER
-            The leading dimension of the array V. LDV >= max(1,N).
+/*
+          Want right eigenvectors
+          Copy Householder vectors to VR
+*/
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+	*(unsigned char *)side = 'R';
+	clacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
+		;
 
-    =====================================================================
+/*
+          Generate unitary matrix in VR
+          (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+          (RWorkspace: none)
+*/
 
+	i__1 = *lwork - iwrk + 1;
+	cunghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
 
-       Decode and Test the input parameters
+/*
+          Perform QR iteration, accumulating Schur vectors in VR
+          (CWorkspace: need 1, prefer HSWORK (see comments) )
+          (RWorkspace: none)
 */
 
-    /* Parameter adjustments */
-    --scale;
-    v_dim1 = *ldv;
-    v_offset = 1 + v_dim1;
-    v -= v_offset;
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	chseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[
+		vr_offset], ldvr, &work[iwrk], &i__1, info);
 
-    /* Function Body */
-    rightv = lsame_(side, "R");
-    leftv = lsame_(side, "L");
+    } else {
 
-    *info = 0;
-    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
-	    && ! lsame_(job, "B")) {
-	*info = -1;
-    } else if (! rightv && ! leftv) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*ilo < 1 || *ilo > max(1,*n)) {
-	*info = -4;
-    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
-	*info = -5;
-    } else if (*m < 0) {
-	*info = -7;
-    } else if (*ldv < max(1,*n)) {
-	*info = -9;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGEBAK", &i__1);
-	return 0;
+/*
+          Compute eigenvalues only
+          (CWorkspace: need 1, prefer HSWORK (see comments) )
+          (RWorkspace: none)
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	chseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[
+		vr_offset], ldvr, &work[iwrk], &i__1, info);
     }
 
-/*     Quick return if possible */
+/*     If INFO > 0 from CHSEQR, then quit */
 
-    if (*n == 0) {
-	return 0;
-    }
-    if (*m == 0) {
-	return 0;
-    }
-    if (lsame_(job, "N")) {
-	return 0;
+    if (*info > 0) {
+	goto L50;
     }
 
-    if (*ilo == *ihi) {
-	goto L30;
+    if (wantvl || wantvr) {
+
+/*
+          Compute left and/or right eigenvectors
+          (CWorkspace: need 2*N)
+          (RWorkspace: need 2*N)
+*/
+
+	irwork = ibal + *n;
+	ctrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl,
+		 &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &rwork[irwork],
+		&ierr);
     }
 
-/*     Backward balance */
+    if (wantvl) {
 
-    if (lsame_(job, "S") || lsame_(job, "B")) {
+/*
+          Undo balancing of left eigenvectors
+          (CWorkspace: none)
+          (RWorkspace: need N)
+*/
 
-	if (rightv) {
-	    i__1 = *ihi;
-	    for (i__ = *ilo; i__ <= i__1; ++i__) {
-		s = scale[i__];
-		dscal_(m, &s, &v[i__ + v_dim1], ldv);
+	cgebak_("B", "L", n, &ilo, &ihi, &rwork[ibal], n, &vl[vl_offset],
+		ldvl, &ierr);
+
+/*        Normalize left eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scl = 1.f / scnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+	    csscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+	    i__2 = *n;
+	    for (k = 1; k <= i__2; ++k) {
+		i__3 = k + i__ * vl_dim1;
+/* Computing 2nd power */
+		r__1 = vl[i__3].r;
+/* Computing 2nd power */
+		r__2 = r_imag(&vl[k + i__ * vl_dim1]);
+		rwork[irwork + k - 1] = r__1 * r__1 + r__2 * r__2;
 /* L10: */
 	    }
-	}
-
-	if (leftv) {
-	    i__1 = *ihi;
-	    for (i__ = *ilo; i__ <= i__1; ++i__) {
-		s = 1. / scale[i__];
-		dscal_(m, &s, &v[i__ + v_dim1], ldv);
+	    k = isamax_(n, &rwork[irwork], &c__1);
+	    r_cnjg(&q__2, &vl[k + i__ * vl_dim1]);
+	    r__1 = sqrt(rwork[irwork + k - 1]);
+	    q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1;
+	    tmp.r = q__1.r, tmp.i = q__1.i;
+	    cscal_(n, &tmp, &vl[i__ * vl_dim1 + 1], &c__1);
+	    i__2 = k + i__ * vl_dim1;
+	    i__3 = k + i__ * vl_dim1;
+	    r__1 = vl[i__3].r;
+	    q__1.r = r__1, q__1.i = 0.f;
+	    vl[i__2].r = q__1.r, vl[i__2].i = q__1.i;
 /* L20: */
-	    }
 	}
-
     }
 
-/*
-       Backward permutation
+    if (wantvr) {
 
-       For  I = ILO-1 step -1 until 1,
-                IHI+1 step 1 until N do --
+/*
+          Undo balancing of right eigenvectors
+          (CWorkspace: none)
+          (RWorkspace: need N)
 */
 
-L30:
-    if (lsame_(job, "P") || lsame_(job, "B")) {
-	if (rightv) {
-	    i__1 = *n;
-	    for (ii = 1; ii <= i__1; ++ii) {
-		i__ = ii;
-		if (i__ >= *ilo && i__ <= *ihi) {
-		    goto L40;
-		}
-		if (i__ < *ilo) {
-		    i__ = *ilo - ii;
-		}
-		k = (integer) scale[i__];
-		if (k == i__) {
-		    goto L40;
-		}
-		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
-L40:
-		;
+	cgebak_("B", "R", n, &ilo, &ihi, &rwork[ibal], n, &vr[vr_offset],
+		ldvr, &ierr);
+
+/*        Normalize right eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scl = 1.f / scnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+	    csscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+	    i__2 = *n;
+	    for (k = 1; k <= i__2; ++k) {
+		i__3 = k + i__ * vr_dim1;
+/* Computing 2nd power */
+		r__1 = vr[i__3].r;
+/* Computing 2nd power */
+		r__2 = r_imag(&vr[k + i__ * vr_dim1]);
+		rwork[irwork + k - 1] = r__1 * r__1 + r__2 * r__2;
+/* L30: */
 	    }
+	    k = isamax_(n, &rwork[irwork], &c__1);
+	    r_cnjg(&q__2, &vr[k + i__ * vr_dim1]);
+	    r__1 = sqrt(rwork[irwork + k - 1]);
+	    q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1;
+	    tmp.r = q__1.r, tmp.i = q__1.i;
+	    cscal_(n, &tmp, &vr[i__ * vr_dim1 + 1], &c__1);
+	    i__2 = k + i__ * vr_dim1;
+	    i__3 = k + i__ * vr_dim1;
+	    r__1 = vr[i__3].r;
+	    q__1.r = r__1, q__1.i = 0.f;
+	    vr[i__2].r = q__1.r, vr[i__2].i = q__1.i;
+/* L40: */
 	}
+    }
+
+/*     Undo scaling if necessary */
 
-	if (leftv) {
-	    i__1 = *n;
-	    for (ii = 1; ii <= i__1; ++ii) {
-		i__ = ii;
-		if (i__ >= *ilo && i__ <= *ihi) {
-		    goto L50;
-		}
-		if (i__ < *ilo) {
-		    i__ = *ilo - ii;
-		}
-		k = (integer) scale[i__];
-		if (k == i__) {
-		    goto L50;
-		}
-		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
 L50:
-		;
-	    }
+    if (scalea) {
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = max(i__3,1);
+	clascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[*info + 1]
+		, &i__2, &ierr);
+	if (*info > 0) {
+	    i__1 = ilo - 1;
+	    clascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[1], n,
+		     &ierr);
 	}
     }
 
+    work[1].r = (real) maxwrk, work[1].i = 0.f;
     return 0;
 
-/*     End of DGEBAK */
+/*     End of CGEEV */
 
-} /* dgebak_ */
+} /* cgeev_ */
 
-/* Subroutine */ int dgebal_(char *job, integer *n, doublereal *a, integer *
-	lda, integer *ilo, integer *ihi, doublereal *scale, integer *info)
+/* Subroutine */ int cgehd2_(integer *n, integer *ilo, integer *ihi, complex *
+	a, integer *lda, complex *tau, complex *work, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    doublereal d__1, d__2;
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    complex q__1;
 
-    /* Local variables */
-    static doublereal c__, f, g;
-    static integer i__, j, k, l, m;
-    static doublereal r__, s, ca, ra;
-    static integer ica, ira, iexc;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static doublereal sfmin1, sfmin2, sfmax1, sfmax2;
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
 
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static logical noconv;
+    /* Local variables */
+    static integer i__;
+    static complex alpha;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *),
+	    clarfg_(integer *, complex *, complex *, integer *, complex *),
+	    xerbla_(char *, integer *);
 
 
 /*
     -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DGEBAL balances a general real matrix A.  This involves, first,
-    permuting A by a similarity transformation to isolate eigenvalues
-    in the first 1 to ILO-1 and last IHI+1 to N elements on the
-    diagonal; and second, applying a diagonal similarity transformation
-    to rows and columns ILO to IHI to make the rows and columns as
-    close in norm as possible.  Both steps are optional.
-
-    Balancing may reduce the 1-norm of the matrix, and improve the
-    accuracy of the computed eigenvalues and/or eigenvectors.
+    CGEHD2 reduces a complex general matrix A to upper Hessenberg form H
+    by a unitary similarity transformation:  Q' * A * Q = H .
 
     Arguments
     =========
 
-    JOB     (input) CHARACTER*1
-            Specifies the operations to be performed on A:
-            = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
-                    for i = 1,...,N;
-            = 'P':  permute only;
-            = 'S':  scale only;
-            = 'B':  both permute and scale.
-
     N       (input) INTEGER
             The order of the matrix A.  N >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the input matrix A.
-            On exit,  A is overwritten by the balanced matrix.
-            If JOB = 'N', A is not referenced.
-            See Further Details.
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to CGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= max(1,N).
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the n by n general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the unitary matrix Q as a product of elementary
+            reflectors. See Further Details.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
-    ILO     (output) INTEGER
-    IHI     (output) INTEGER
-            ILO and IHI are set to integers such that on exit
-            A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
-            If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+    TAU     (output) COMPLEX array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
 
-    SCALE   (output) DOUBLE PRECISION array, dimension (N)
-            Details of the permutations and scaling factors applied to
-            A.  If P(j) is the index of the row and column interchanged
-            with row and column j and D(j) is the scaling factor
-            applied to row and column j, then
-            SCALE(j) = P(j)    for j = 1,...,ILO-1
-                     = D(j)    for j = ILO,...,IHI
-                     = P(j)    for j = IHI+1,...,N.
-            The order in which the interchanges are made is N to IHI+1,
-            then 1 to ILO-1.
+    WORK    (workspace) COMPLEX array, dimension (N)
 
     INFO    (output) INTEGER
-            = 0:  successful exit.
+            = 0:  successful exit
             < 0:  if INFO = -i, the i-th argument had an illegal value.
 
     Further Details
     ===============
 
-    The permutations consist of row and column interchanges which put
-    the matrix in the form
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
 
-               ( T1   X   Y  )
-       P A P = (  0   B   Z  )
-               (  0   0   T2 )
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
 
-    where T1 and T2 are upper triangular matrices whose eigenvalues lie
-    along the diagonal.  The column indices ILO and IHI mark the starting
-    and ending columns of the submatrix B. Balancing consists of applying
-    a diagonal similarity transformation inv(D) * B * D to make the
-    1-norms of each row of B and its corresponding column nearly equal.
-    The output matrix is
+    Each H(i) has the form
 
-       ( T1     X*D          Y    )
-       (  0  inv(D)*B*D  inv(D)*Z ).
-       (  0      0           T2   )
+       H(i) = I - tau * v * v'
 
-    Information about the permutations P and the diagonal matrix D is
-    returned in the vector SCALE.
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
 
-    This subroutine is based on the EISPACK routine BALANC.
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
 
-    Modified by Tzu-Yi Chen, Computer Science Division, University of
-      California at Berkeley, USA
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
 
     =====================================================================
 
@@ -1769,398 +1938,450 @@ L50:
     a_dim1 = *lda;
     a_offset = 1 + a_dim1;
     a -= a_offset;
-    --scale;
+    --tau;
+    --work;
 
     /* Function Body */
     *info = 0;
-    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
-	    && ! lsame_(job, "B")) {
+    if (*n < 0) {
 	*info = -1;
-    } else if (*n < 0) {
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
 	*info = -2;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -3;
     } else if (*lda < max(1,*n)) {
-	*info = -4;
+	*info = -5;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DGEBAL", &i__1);
+	xerbla_("CGEHD2", &i__1);
 	return 0;
     }
 
-    k = 1;
-    l = *n;
+    i__1 = *ihi - 1;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
 
-    if (*n == 0) {
-	goto L210;
-    }
+/*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
 
-    if (lsame_(job, "N")) {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    scale[i__] = 1.;
+	i__2 = i__ + 1 + i__ * a_dim1;
+	alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	i__2 = *ihi - i__;
+/* Computing MIN */
+	i__3 = i__ + 2;
+	clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[
+		i__]);
+	i__2 = i__ + 1 + i__ * a_dim1;
+	a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
+
+	i__2 = *ihi - i__;
+	clarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
+
+/*        Apply H(i)' to A(i+1:ihi,i+1:n) from the left */
+
+	i__2 = *ihi - i__;
+	i__3 = *n - i__;
+	r_cnjg(&q__1, &tau[i__]);
+	clarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &q__1,
+		 &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
+
+	i__2 = i__ + 1 + i__ * a_dim1;
+	a[i__2].r = alpha.r, a[i__2].i = alpha.i;
 /* L10: */
-	}
-	goto L210;
     }
 
-    if (lsame_(job, "S")) {
-	goto L120;
-    }
+    return 0;
 
-/*     Permutation to isolate eigenvalues if possible */
+/*     End of CGEHD2 */
 
-    goto L50;
+} /* cgehd2_ */
 
-/*     Row and column exchange. */
+/* Subroutine */ int cgehrd_(integer *n, integer *ilo, integer *ihi, complex *
+	a, integer *lda, complex *tau, complex *work, integer *lwork, integer
+	*info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    complex q__1;
 
-L20:
-    scale[m] = (doublereal) j;
-    if (j == m) {
-	goto L30;
-    }
+    /* Local variables */
+    static integer i__;
+    static complex t[4160]	/* was [65][64] */;
+    static integer ib;
+    static complex ei;
+    static integer nb, nh, nx, iws;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int cgehd2_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *), clarfb_(
+	    char *, char *, char *, char *, integer *, integer *, integer *,
+	    complex *, integer *, complex *, integer *, complex *, integer *,
+	    complex *, integer *), clahrd_(
+	    integer *, integer *, integer *, complex *, integer *, complex *,
+	    complex *, integer *, complex *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
 
-    dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
-    i__1 = *n - k + 1;
-    dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
 
-L30:
-    switch (iexc) {
-	case 1:  goto L40;
-	case 2:  goto L80;
-    }
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-/*     Search for rows isolating an eigenvalue and push them down. */
 
-L40:
-    if (l == 1) {
-	goto L210;
-    }
-    --l;
+    Purpose
+    =======
 
-L50:
-    for (j = l; j >= 1; --j) {
+    CGEHRD reduces a complex general matrix A to upper Hessenberg form H
+    by a unitary similarity transformation:  Q' * A * Q = H .
 
-	i__1 = l;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (i__ == j) {
-		goto L60;
-	    }
-	    if (a[j + i__ * a_dim1] != 0.) {
-		goto L70;
-	    }
-L60:
-	    ;
-	}
+    Arguments
+    =========
 
-	m = l;
-	iexc = 1;
-	goto L20;
-L70:
-	;
-    }
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-    goto L90;
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to CGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
 
-/*     Search for columns isolating an eigenvalue and push them left. */
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the N-by-N general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the unitary matrix Q as a product of elementary
+            reflectors. See Further Details.
 
-L80:
-    ++k;
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-L90:
-    i__1 = l;
-    for (j = k; j <= i__1; ++j) {
+    TAU     (output) COMPLEX array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+            zero.
 
-	i__2 = l;
-	for (i__ = k; i__ <= i__2; ++i__) {
-	    if (i__ == j) {
-		goto L100;
-	    }
-	    if (a[i__ + j * a_dim1] != 0.) {
-		goto L110;
-	    }
-L100:
-	    ;
-	}
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
-	m = k;
-	iexc = 2;
-	goto L20;
-L110:
-	;
-    }
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
 
-L120:
-    i__1 = l;
-    for (i__ = k; i__ <= i__1; ++i__) {
-	scale[i__] = 1.;
-/* L130: */
-    }
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
 
-    if (lsame_(job, "P")) {
-	goto L210;
-    }
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
 
-/*
-       Balance the submatrix in rows K to L.
+    Further Details
+    ===============
 
-       Iterative loop for norm reduction
-*/
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
 
-    sfmin1 = SAFEMINIMUM / PRECISION;
-    sfmax1 = 1. / sfmin1;
-    sfmin2 = sfmin1 * 8.;
-    sfmax2 = 1. / sfmin2;
-L140:
-    noconv = FALSE_;
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
 
-    i__1 = l;
-    for (i__ = k; i__ <= i__1; ++i__) {
-	c__ = 0.;
-	r__ = 0.;
+    Each H(i) has the form
 
-	i__2 = l;
-	for (j = k; j <= i__2; ++j) {
-	    if (j == i__) {
-		goto L150;
-	    }
-	    c__ += (d__1 = a[j + i__ * a_dim1], abs(d__1));
-	    r__ += (d__1 = a[i__ + j * a_dim1], abs(d__1));
-L150:
-	    ;
-	}
-	ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
-	ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1));
-	i__2 = *n - k + 1;
-	ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda);
-	ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1));
+       H(i) = I - tau * v * v'
 
-/*        Guard against zero C or R due to underflow. */
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
 
-	if (c__ == 0. || r__ == 0.) {
-	    goto L200;
-	}
-	g = r__ / 8.;
-	f = 1.;
-	s = c__ + r__;
-L160:
-/* Computing MAX */
-	d__1 = max(f,c__);
-/* Computing MIN */
-	d__2 = min(r__,g);
-	if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) {
-	    goto L170;
-	}
-	f *= 8.;
-	c__ *= 8.;
-	ca *= 8.;
-	r__ /= 8.;
-	g /= 8.;
-	ra /= 8.;
-	goto L160;
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
 
-L170:
-	g = c__ / 8.;
-L180:
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
 /* Computing MIN */
-	d__1 = min(f,c__), d__1 = min(d__1,g);
-	if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) {
-	    goto L190;
-	}
-	f /= 8.;
-	c__ /= 8.;
-	g /= 8.;
-	ca /= 8.;
-	r__ *= 8.;
-	ra *= 8.;
-	goto L180;
+    i__1 = 64, i__2 = ilaenv_(&c__1, "CGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
+    lwkopt = *n * nb;
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+	*info = -2;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEHRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
 
-/*        Now balance. */
+/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
 
-L190:
-	if (c__ + r__ >= s * .95) {
-	    goto L200;
-	}
-	if (f < 1. && scale[i__] < 1.) {
-	    if (f * scale[i__] <= sfmin1) {
-		goto L200;
-	    }
-	}
-	if (f > 1. && scale[i__] > 1.) {
-	    if (scale[i__] >= sfmax1 / f) {
-		goto L200;
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	tau[i__2].r = 0.f, tau[i__2].i = 0.f;
+/* L10: */
+    }
+    i__1 = *n - 1;
+    for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
+	i__2 = i__;
+	tau[i__2].r = 0.f, tau[i__2].i = 0.f;
+/* L20: */
+    }
+
+/*     Quick return if possible */
+
+    nh = *ihi - *ilo + 1;
+    if (nh <= 1) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    iws = 1;
+    if (nb > 1 && nb < nh) {
+
+/*
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "CGEHRD", " ", n, ilo, ihi, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < nh) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    iws = *n * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code.
+
+   Computing MAX
+*/
+		i__1 = 2, i__2 = ilaenv_(&c__2, "CGEHRD", " ", n, ilo, ihi, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+		if (*lwork >= *n * nbmin) {
+		    nb = *lwork / *n;
+		} else {
+		    nb = 1;
+		}
 	    }
 	}
-	g = 1. / f;
-	scale[i__] *= f;
-	noconv = TRUE_;
+    }
+    ldwork = *n;
 
-	i__2 = *n - k + 1;
-	dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
-	dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
+    if (nb < nbmin || nb >= nh) {
 
-L200:
-	;
-    }
+/*        Use unblocked code below */
 
-    if (noconv) {
-	goto L140;
+	i__ = *ilo;
+
+    } else {
+
+/*        Use blocked code */
+
+	i__1 = *ihi - 1 - nx;
+	i__2 = nb;
+	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = nb, i__4 = *ihi - i__;
+	    ib = min(i__3,i__4);
+
+/*
+             Reduce columns i:i+ib-1 to Hessenberg form, returning the
+             matrices V and T of the block reflector H = I - V*T*V'
+             which performs the reduction, and also the matrix Y = A*V*T
+*/
+
+	    clahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
+		    c__65, &work[1], &ldwork);
+
+/*
+             Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
+             right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+             to 1.
+*/
+
+	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
+	    ei.r = a[i__3].r, ei.i = a[i__3].i;
+	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
+	    a[i__3].r = 1.f, a[i__3].i = 0.f;
+	    i__3 = *ihi - i__ - ib + 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, &
+		    q__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda,
+		     &c_b56, &a[(i__ + ib) * a_dim1 + 1], lda);
+	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
+	    a[i__3].r = ei.r, a[i__3].i = ei.i;
+
+/*
+             Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
+             left
+*/
+
+	    i__3 = *ihi - i__;
+	    i__4 = *n - i__ - ib + 1;
+	    clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise", &
+		    i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &
+		    c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &
+		    ldwork);
+/* L30: */
+	}
     }
 
-L210:
-    *ilo = k;
-    *ihi = l;
+/*     Use unblocked code to reduce the rest of the matrix */
+
+    cgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    work[1].r = (real) iws, work[1].i = 0.f;
 
     return 0;
 
-/*     End of DGEBAL */
+/*     End of CGEHRD */
 
-} /* dgebal_ */
+} /* cgehrd_ */
 
-/* Subroutine */ int dgebd2_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
-	taup, doublereal *work, integer *info)
+/* Subroutine */ int cgelq2_(integer *m, integer *n, complex *a, integer *lda,
+	 complex *tau, complex *work, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
-    static integer i__;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *), dlarfg_(integer *, doublereal *,
-	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+    static integer i__, k;
+    static complex alpha;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *),
+	    clarfg_(integer *, complex *, complex *, integer *, complex *),
+	    clacgv_(integer *, complex *, integer *), xerbla_(char *, integer
+	    *);
 
 
 /*
     -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DGEBD2 reduces a real general m by n matrix A to upper or lower
-    bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
-
-    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+    CGELQ2 computes an LQ factorization of a complex m by n matrix A:
+    A = L * Q.
 
     Arguments
     =========
 
     M       (input) INTEGER
-            The number of rows in the matrix A.  M >= 0.
+            The number of rows of the matrix A.  M >= 0.
 
     N       (input) INTEGER
-            The number of columns in the matrix A.  N >= 0.
+            The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the m by n general matrix to be reduced.
-            On exit,
-            if m >= n, the diagonal and the first superdiagonal are
-              overwritten with the upper bidiagonal matrix B; the
-              elements below the diagonal, with the array TAUQ, represent
-              the orthogonal matrix Q as a product of elementary
-              reflectors, and the elements above the first superdiagonal,
-              with the array TAUP, represent the orthogonal matrix P as
-              a product of elementary reflectors;
-            if m < n, the diagonal and the first subdiagonal are
-              overwritten with the lower bidiagonal matrix B; the
-              elements below the first subdiagonal, with the array TAUQ,
-              represent the orthogonal matrix Q as a product of
-              elementary reflectors, and the elements above the diagonal,
-              with the array TAUP, represent the orthogonal matrix P as
-              a product of elementary reflectors.
-            See Further Details.
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m by min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the unitary matrix Q as a
+            product of elementary reflectors (see Further Details).
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
 
-    D       (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The diagonal elements of the bidiagonal matrix B:
-            D(i) = A(i,i).
-
-    E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
-            The off-diagonal elements of the bidiagonal matrix B:
-            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
-            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
-
-    TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
-            The scalar factors of the elementary reflectors which
-            represent the orthogonal matrix Q. See Further Details.
-
-    TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors which
-            represent the orthogonal matrix P. See Further Details.
+    TAU     (output) COMPLEX array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (max(M,N))
+    WORK    (workspace) COMPLEX array, dimension (M)
 
     INFO    (output) INTEGER
-            = 0: successful exit.
-            < 0: if INFO = -i, the i-th argument had an illegal value.
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
 
     Further Details
     ===============
 
-    The matrices Q and P are represented as products of elementary
-    reflectors:
-
-    If m >= n,
-
-       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
-
-    Each H(i) and G(i) has the form:
-
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
-
-    where tauq and taup are real scalars, and v and u are real vectors;
-    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
-    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
-    tauq is stored in TAUQ(i) and taup in TAUP(i).
-
-    If m < n,
-
-       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
-
-    Each H(i) and G(i) has the form:
-
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
-
-    where tauq and taup are real scalars, and v and u are real vectors;
-    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
-    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
-    tauq is stored in TAUQ(i) and taup in TAUP(i).
+    The matrix Q is represented as a product of elementary reflectors
 
-    The contents of A on exit are illustrated by the following examples:
+       Q = H(k)' . . . H(2)' H(1)', where k = min(m,n).
 
-    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+    Each H(i) has the form
 
-      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
-      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
-      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
-      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
-      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
-      (  v1  v2  v3  v4  v5 )
+       H(i) = I - tau * v * v'
 
-    where d and e denote diagonal and off-diagonal elements of B, vi
-    denotes an element of the vector defining H(i), and ui an element of
-    the vector defining G(i).
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
+    A(i,i+1:n), and tau in TAU(i).
 
     =====================================================================
 
 
-       Test the input parameters
+       Test the input arguments
 */
 
     /* Parameter adjustments */
     a_dim1 = *lda;
     a_offset = 1 + a_dim1;
     a -= a_offset;
-    --d__;
-    --e;
-    --tauq;
-    --taup;
+    --tau;
     --work;
 
     /* Function Body */
@@ -2172,150 +2393,69 @@ L210:
     } else if (*lda < max(1,*m)) {
 	*info = -4;
     }
-    if (*info < 0) {
+    if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DGEBD2", &i__1);
+	xerbla_("CGELQ2", &i__1);
 	return 0;
     }
 
-    if (*m >= *n) {
-
-/*        Reduce to upper bidiagonal form */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
-
-	    i__2 = *m - i__ + 1;
-/* Computing MIN */
-	    i__3 = i__ + 1;
-	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
-		    a_dim1], &c__1, &tauq[i__]);
-	    d__[i__] = a[i__ + i__ * a_dim1];
-	    a[i__ + i__ * a_dim1] = 1.;
-
-/*           Apply H(i) to A(i:m,i+1:n) from the left */
-
-	    i__2 = *m - i__ + 1;
-	    i__3 = *n - i__;
-	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[
-		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
-	    a[i__ + i__ * a_dim1] = d__[i__];
-
-	    if (i__ < *n) {
-
-/*
-                Generate elementary reflector G(i) to annihilate
-                A(i,i+2:n)
-*/
-
-		i__2 = *n - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
-			i__3,*n) * a_dim1], lda, &taup[i__]);
-		e[i__] = a[i__ + (i__ + 1) * a_dim1];
-		a[i__ + (i__ + 1) * a_dim1] = 1.;
-
-/*              Apply G(i) to A(i+1:m,i+1:n) from the right */
-
-		i__2 = *m - i__;
-		i__3 = *n - i__;
-		dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1],
-			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
-			lda, &work[1]);
-		a[i__ + (i__ + 1) * a_dim1] = e[i__];
-	    } else {
-		taup[i__] = 0.;
-	    }
-/* L10: */
-	}
-    } else {
-
-/*        Reduce to lower bidiagonal form */
+    k = min(*m,*n);
 
-	i__1 = *m;
-	for (i__ = 1; i__ <= i__1; ++i__) {
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
 
-/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
+/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
 
-	    i__2 = *n - i__ + 1;
+	i__2 = *n - i__ + 1;
+	clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	i__2 = i__ + i__ * a_dim1;
+	alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	i__2 = *n - i__ + 1;
 /* Computing MIN */
-	    i__3 = i__ + 1;
-	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
-		    a_dim1], lda, &taup[i__]);
-	    d__[i__] = a[i__ + i__ * a_dim1];
-	    a[i__ + i__ * a_dim1] = 1.;
+	i__3 = i__ + 1;
+	clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &tau[i__]
+		);
+	if (i__ < *m) {
 
-/*           Apply G(i) to A(i+1:m,i:n) from the right */
+/*           Apply H(i) to A(i+1:m,i:n) from the right */
 
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = 1.f, a[i__2].i = 0.f;
 	    i__2 = *m - i__;
 	    i__3 = *n - i__ + 1;
-/* Computing MIN */
-	    i__4 = i__ + 1;
-	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
-		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
-	    a[i__ + i__ * a_dim1] = d__[i__];
-
-	    if (i__ < *m) {
-
-/*
-                Generate elementary reflector H(i) to annihilate
-                A(i+2:m,i)
-*/
-
-		i__2 = *m - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
-			i__ * a_dim1], &c__1, &tauq[i__]);
-		e[i__] = a[i__ + 1 + i__ * a_dim1];
-		a[i__ + 1 + i__ * a_dim1] = 1.;
-
-/*              Apply H(i) to A(i+1:m,i+1:n) from the left */
-
-		i__2 = *m - i__;
-		i__3 = *n - i__;
-		dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
-			c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
-			lda, &work[1]);
-		a[i__ + 1 + i__ * a_dim1] = e[i__];
-	    } else {
-		tauq[i__] = 0.;
-	    }
-/* L20: */
+	    clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
+		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
 	}
+	i__2 = i__ + i__ * a_dim1;
+	a[i__2].r = alpha.r, a[i__2].i = alpha.i;
+	i__2 = *n - i__ + 1;
+	clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+/* L10: */
     }
     return 0;
 
-/*     End of DGEBD2 */
+/*     End of CGELQ2 */
 
-} /* dgebd2_ */
+} /* cgelq2_ */
 
-/* Subroutine */ int dgebrd_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
-	taup, doublereal *work, integer *lwork, integer *info)
+/* Subroutine */ int cgelqf_(integer *m, integer *n, complex *a, integer *lda,
+	 complex *tau, complex *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
-    static integer i__, j, nb, nx;
-    static doublereal ws;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer nbmin, iinfo, minmn;
-    extern /* Subroutine */ int dgebd2_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *), dlabrd_(integer *, integer *, integer *
-	    , doublereal *, integer *, doublereal *, doublereal *, doublereal
-	    *, doublereal *, doublereal *, integer *, doublereal *, integer *)
-	    , xerbla_(char *, integer *);
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int cgelq2_(integer *, integer *, complex *,
+	    integer *, complex *, complex *, integer *), clarfb_(char *, char
+	    *, char *, char *, integer *, integer *, integer *, complex *,
+	    integer *, complex *, integer *, complex *, integer *, complex *,
+	    integer *), clarft_(char *, char *
+	    , integer *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwrkx, ldwrky, lwkopt;
+    static integer ldwork, lwkopt;
     static logical lquery;
 
 
@@ -2329,66 +2469,40 @@ L210:
     Purpose
     =======
 
-    DGEBRD reduces a general real M-by-N matrix A to upper or lower
-    bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
-
-    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+    CGELQF computes an LQ factorization of a complex M-by-N matrix A:
+    A = L * Q.
 
     Arguments
     =========
 
     M       (input) INTEGER
-            The number of rows in the matrix A.  M >= 0.
+            The number of rows of the matrix A.  M >= 0.
 
     N       (input) INTEGER
-            The number of columns in the matrix A.  N >= 0.
+            The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the M-by-N general matrix to be reduced.
-            On exit,
-            if m >= n, the diagonal and the first superdiagonal are
-              overwritten with the upper bidiagonal matrix B; the
-              elements below the diagonal, with the array TAUQ, represent
-              the orthogonal matrix Q as a product of elementary
-              reflectors, and the elements above the first superdiagonal,
-              with the array TAUP, represent the orthogonal matrix P as
-              a product of elementary reflectors;
-            if m < n, the diagonal and the first subdiagonal are
-              overwritten with the lower bidiagonal matrix B; the
-              elements below the first subdiagonal, with the array TAUQ,
-              represent the orthogonal matrix Q as a product of
-              elementary reflectors, and the elements above the diagonal,
-              with the array TAUP, represent the orthogonal matrix P as
-              a product of elementary reflectors.
-            See Further Details.
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m-by-min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the unitary matrix Q as a
+            product of elementary reflectors (see Further Details).
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
 
-    D       (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The diagonal elements of the bidiagonal matrix B:
-            D(i) = A(i,i).
-
-    E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
-            The off-diagonal elements of the bidiagonal matrix B:
-            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
-            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
-
-    TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
-            The scalar factors of the elementary reflectors which
-            represent the orthogonal matrix Q. See Further Details.
-
-    TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors which
-            represent the orthogonal matrix P. See Further Details.
+    TAU     (output) COMPLEX array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
-            The length of the array WORK.  LWORK >= max(1,M,N).
-            For optimum performance LWORK >= (M+N)*NB, where NB
-            is the optimal blocksize.
+            The dimension of the array WORK.  LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is the
+            optimal blocksize.
 
             If LWORK = -1, then a workspace query is assumed; the routine
             only calculates the optimal size of the WORK array, returns
@@ -2397,79 +2511,42 @@ L210:
 
     INFO    (output) INTEGER
             = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            < 0:  if INFO = -i, the i-th argument had an illegal value
 
     Further Details
     ===============
 
-    The matrices Q and P are represented as products of elementary
-    reflectors:
-
-    If m >= n,
-
-       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
-
-    Each H(i) and G(i) has the form:
-
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
-
-    where tauq and taup are real scalars, and v and u are real vectors;
-    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
-    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
-    tauq is stored in TAUQ(i) and taup in TAUP(i).
-
-    If m < n,
-
-       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
-
-    Each H(i) and G(i) has the form:
-
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
-
-    where tauq and taup are real scalars, and v and u are real vectors;
-    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
-    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
-    tauq is stored in TAUQ(i) and taup in TAUP(i).
+    The matrix Q is represented as a product of elementary reflectors
 
-    The contents of A on exit are illustrated by the following examples:
+       Q = H(k)' . . . H(2)' H(1)', where k = min(m,n).
 
-    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+    Each H(i) has the form
 
-      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
-      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
-      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
-      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
-      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
-      (  v1  v2  v3  v4  v5 )
+       H(i) = I - tau * v * v'
 
-    where d and e denote diagonal and off-diagonal elements of B, vi
-    denotes an element of the vector defining H(i), and ui an element of
-    the vector defining G(i).
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
+    A(i,i+1:n), and tau in TAU(i).
 
     =====================================================================
 
 
-       Test the input parameters
+       Test the input arguments
 */
 
     /* Parameter adjustments */
     a_dim1 = *lda;
     a_offset = 1 + a_dim1;
     a -= a_offset;
-    --d__;
-    --e;
-    --tauq;
-    --taup;
+    --tau;
     --work;
 
     /* Function Body */
     *info = 0;
-/* Computing MAX */
-    i__1 = 1, i__2 = ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
-	    ftnlen)6, (ftnlen)1);
-    nb = max(i__1,i__2);
-    lwkopt = (*m + *n) * nb;
-    work[1] = (doublereal) lwkopt;
+    nb = ilaenv_(&c__1, "CGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *m * nb;
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
     lquery = *lwork == -1;
     if (*m < 0) {
 	*info = -1;
@@ -2477,16 +2554,12 @@ L210:
 	*info = -2;
     } else if (*lda < max(1,*m)) {
 	*info = -4;
-    } else /* if(complicated condition) */ {
-/* Computing MAX */
-	i__1 = max(1,*m);
-	if (*lwork < max(i__1,*n) && ! lquery) {
-	    *info = -10;
-	}
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -7;
     }
-    if (*info < 0) {
+    if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DGEBRD", &i__1);
+	xerbla_("CGELQF", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -2494,290 +2567,184 @@ L210:
 
 /*     Quick return if possible */
 
-    minmn = min(*m,*n);
-    if (minmn == 0) {
-	work[1] = 1.;
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
 	return 0;
     }
 
-    ws = (doublereal) max(*m,*n);
-    ldwrkx = *m;
-    ldwrky = *n;
-
-    if (nb > 1 && nb < minmn) {
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < k) {
 
 /*
-          Set the crossover point NX.
+          Determine when to cross over from blocked to unblocked code.
 
    Computing MAX
 */
-	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
+	i__1 = 0, i__2 = ilaenv_(&c__3, "CGELQF", " ", m, n, &c_n1, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = max(i__1,i__2);
+	if (nx < k) {
 
-/*        Determine when to switch from blocked to unblocked code. */
+/*           Determine if workspace is large enough for blocked code. */
 
-	if (nx < minmn) {
-	    ws = (doublereal) ((*m + *n) * nb);
-	    if ((doublereal) (*lwork) < ws) {
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
 
 /*
-                Not enough work space for the optimal NB, consider using
-                a smaller block size.
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
 */
 
-		nbmin = ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
-			ftnlen)6, (ftnlen)1);
-		if (*lwork >= (*m + *n) * nbmin) {
-		    nb = *lwork / (*m + *n);
-		} else {
-		    nb = 1;
-		    nx = minmn;
-		}
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "CGELQF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
 	    }
 	}
-    } else {
-	nx = minmn;
     }
 
-    i__1 = minmn - nx;
-    i__2 = nb;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
 
 /*
-          Reduce rows and columns i:i+nb-1 to bidiagonal form and return
-          the matrices X and Y which are needed to update the unreduced
-          part of the matrix
+             Compute the LQ factorization of the current block
+             A(i:i+ib-1,i:n)
 */
 
-	i__3 = *m - i__ + 1;
-	i__4 = *n - i__ + 1;
-	dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
-		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx
-		* nb + 1], &ldwrky);
+	    i__3 = *n - i__ + 1;
+	    cgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *m) {
 
 /*
-          Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
-          of the form  A := A - V*Y' - X*U'
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
 */
 
-	i__3 = *m - i__ - nb + 1;
-	i__4 = *n - i__ - nb + 1;
-	dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b151, &a[
-		i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], &
-		ldwrky, &c_b15, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
-	i__3 = *m - i__ - nb + 1;
-	i__4 = *n - i__ - nb + 1;
-	dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b151, &
-		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
-		c_b15, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+		i__3 = *n - i__ + 1;
+		clarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 
-/*        Copy diagonal and off-diagonal elements of B back into A */
+/*              Apply H to A(i+ib:m,i:n) from the right */
 
-	if (*m >= *n) {
-	    i__3 = i__ + nb - 1;
-	    for (j = i__; j <= i__3; ++j) {
-		a[j + j * a_dim1] = d__[j];
-		a[j + (j + 1) * a_dim1] = e[j];
-/* L10: */
-	    }
-	} else {
-	    i__3 = i__ + nb - 1;
-	    for (j = i__; j <= i__3; ++j) {
-		a[j + j * a_dim1] = d__[j];
-		a[j + 1 + j * a_dim1] = e[j];
-/* L20: */
+		i__3 = *m - i__ - ib + 1;
+		i__4 = *n - i__ + 1;
+		clarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
+			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
+			1], &ldwork);
 	    }
+/* L10: */
 	}
-/* L30: */
+    } else {
+	i__ = 1;
     }
 
-/*     Use unblocked code to reduce the remainder of the matrix */
+/*     Use unblocked code to factor the last or only block. */
 
-    i__2 = *m - i__ + 1;
-    i__1 = *n - i__ + 1;
-    dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
-	    tauq[i__], &taup[i__], &work[1], &iinfo);
-    work[1] = ws;
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	cgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1].r = (real) iws, work[1].i = 0.f;
     return 0;
 
-/*     End of DGEBRD */
+/*     End of CGELQF */
 
-} /* dgebrd_ */
+} /* cgelqf_ */
 
-/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal *
-	a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl,
-	integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work,
-	integer *lwork, integer *info)
+/* Subroutine */ int cgeqr2_(integer *m, integer *n, complex *a, integer *lda,
+	 complex *tau, complex *work, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
-	    i__2, i__3, i__4;
-    doublereal d__1, d__2;
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    complex q__1;
 
     /* Builtin functions */
-    double sqrt(doublereal);
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
     static integer i__, k;
-    static doublereal r__, cs, sn;
-    static integer ihi;
-    static doublereal scl;
-    static integer ilo;
-    static doublereal dum[1], eps;
-    static integer ibal;
-    static char side[1];
-    static integer maxb;
-    static doublereal anrm;
-    static integer ierr, itau;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static integer iwrk, nout;
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern doublereal dlapy2_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_(
-	    char *, char *, integer *, integer *, integer *, doublereal *,
-	    integer *, doublereal *, integer *, integer *),
-	    dgebal_(char *, integer *, doublereal *, integer *, integer *,
-	    integer *, doublereal *, integer *);
-    static logical scalea;
-
-    static doublereal cscale;
-    extern doublereal dlange_(char *, integer *, integer *, doublereal *,
-	    integer *, doublereal *);
-    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    integer *), dlascl_(char *, integer *, integer *, doublereal *,
-	    doublereal *, integer *, integer *, doublereal *, integer *,
-	    integer *);
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *),
-	    dlartg_(doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *), xerbla_(char *, integer *);
-    static logical select[1];
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static doublereal bignum;
-    extern /* Subroutine */ int dorghr_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    integer *), dhseqr_(char *, char *, integer *, integer *, integer
-	    *, doublereal *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, integer *), dtrevc_(char *, char *, logical *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, integer *, integer *, doublereal *, integer *);
-    static integer minwrk, maxwrk;
-    static logical wantvl;
-    static doublereal smlnum;
-    static integer hswork;
-    static logical lquery, wantvr;
+    static complex alpha;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *),
+	    clarfg_(integer *, complex *, complex *, integer *, complex *),
+	    xerbla_(char *, integer *);
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       December 8, 1999
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DGEEV computes for an N-by-N real nonsymmetric matrix A, the
-    eigenvalues and, optionally, the left and/or right eigenvectors.
-
-    The right eigenvector v(j) of A satisfies
-                     A * v(j) = lambda(j) * v(j)
-    where lambda(j) is its eigenvalue.
-    The left eigenvector u(j) of A satisfies
-                  u(j)**H * A = lambda(j) * u(j)**H
-    where u(j)**H denotes the conjugate transpose of u(j).
-
-    The computed eigenvectors are normalized to have Euclidean norm
-    equal to 1 and largest component real.
+    CGEQR2 computes a QR factorization of a complex m by n matrix A:
+    A = Q * R.
 
     Arguments
     =========
 
-    JOBVL   (input) CHARACTER*1
-            = 'N': left eigenvectors of A are not computed;
-            = 'V': left eigenvectors of A are computed.
-
-    JOBVR   (input) CHARACTER*1
-            = 'N': right eigenvectors of A are not computed;
-            = 'V': right eigenvectors of A are computed.
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
 
     N       (input) INTEGER
-            The order of the matrix A. N >= 0.
+            The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the N-by-N matrix A.
-            On exit, A has been overwritten.
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(m,n) by n upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the unitary matrix Q as a
+            product of elementary reflectors (see Further Details).
 
     LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+            The leading dimension of the array A.  LDA >= max(1,M).
 
-    WR      (output) DOUBLE PRECISION array, dimension (N)
-    WI      (output) DOUBLE PRECISION array, dimension (N)
-            WR and WI contain the real and imaginary parts,
-            respectively, of the computed eigenvalues.  Complex
-            conjugate pairs of eigenvalues appear consecutively
-            with the eigenvalue having the positive imaginary part
-            first.
+    TAU     (output) COMPLEX array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
 
-    VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
-            If JOBVL = 'V', the left eigenvectors u(j) are stored one
-            after another in the columns of VL, in the same order
-            as their eigenvalues.
-            If JOBVL = 'N', VL is not referenced.
-            If the j-th eigenvalue is real, then u(j) = VL(:,j),
-            the j-th column of VL.
-            If the j-th and (j+1)-st eigenvalues form a complex
-            conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
-            u(j+1) = VL(:,j) - i*VL(:,j+1).
+    WORK    (workspace) COMPLEX array, dimension (N)
 
-    LDVL    (input) INTEGER
-            The leading dimension of the array VL.  LDVL >= 1; if
-            JOBVL = 'V', LDVL >= N.
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
 
-    VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
-            If JOBVR = 'V', the right eigenvectors v(j) are stored one
-            after another in the columns of VR, in the same order
-            as their eigenvalues.
-            If JOBVR = 'N', VR is not referenced.
-            If the j-th eigenvalue is real, then v(j) = VR(:,j),
-            the j-th column of VR.
-            If the j-th and (j+1)-st eigenvalues form a complex
-            conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
-            v(j+1) = VR(:,j) - i*VR(:,j+1).
+    Further Details
+    ===============
 
-    LDVR    (input) INTEGER
-            The leading dimension of the array VR.  LDVR >= 1; if
-            JOBVR = 'V', LDVR >= N.
+    The matrix Q is represented as a product of elementary reflectors
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
 
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,3*N), and
-            if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N.  For good
-            performance, LWORK must generally be larger.
+    Each H(i) has the form
 
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+       H(i) = I - tau * v * v'
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = i, the QR algorithm failed to compute all the
-                  eigenvalues, and no eigenvectors have been computed;
-                  elements i+1:N of WR and WI contain eigenvalues which
-                  have converged.
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
 
     =====================================================================
 
@@ -2789,5211 +2756,6005 @@ L210:
     a_dim1 = *lda;
     a_offset = 1 + a_dim1;
     a -= a_offset;
-    --wr;
-    --wi;
-    vl_dim1 = *ldvl;
-    vl_offset = 1 + vl_dim1;
-    vl -= vl_offset;
-    vr_dim1 = *ldvr;
-    vr_offset = 1 + vr_dim1;
-    vr -= vr_offset;
+    --tau;
     --work;
 
     /* Function Body */
     *info = 0;
-    lquery = *lwork == -1;
-    wantvl = lsame_(jobvl, "V");
-    wantvr = lsame_(jobvr, "V");
-    if (! wantvl && ! lsame_(jobvl, "N")) {
+    if (*m < 0) {
 	*info = -1;
-    } else if (! wantvr && ! lsame_(jobvr, "N")) {
-	*info = -2;
     } else if (*n < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
-	*info = -9;
-    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
-	*info = -11;
-    }
-
-/*
-       Compute workspace
-        (Note: Comments in the code beginning "Workspace:" describe the
-         minimal amount of workspace needed at that point in the code,
-         as well as the preferred amount for good performance.
-         NB refers to the optimal block size for the immediately
-         following subroutine, as returned by ILAENV.
-         HSWORK refers to the workspace preferred by DHSEQR, as
-         calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
-         the worst case.)
-*/
-
-    minwrk = 1;
-    if (*info == 0 && (*lwork >= 1 || lquery)) {
-	maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, &
-		c__0, (ftnlen)6, (ftnlen)1);
-	if (! wantvl && ! wantvr) {
-/* Computing MAX */
-	    i__1 = 1, i__2 = *n * 3;
-	    minwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "DHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "EN", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
-/* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = *n << 1;
-	    hswork = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
-		    hswork;
-	    maxwrk = max(i__1,i__2);
-	} else {
-/* Computing MAX */
-	    i__1 = 1, i__2 = *n << 2;
-	    minwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "DOR"
-		    "GHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "DHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "SV", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
-/* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = *n << 1;
-	    hswork = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
-		    hswork;
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n << 2;
-	    maxwrk = max(i__1,i__2);
-	}
-	work[1] = (doublereal) maxwrk;
-    }
-    if (*lwork < minwrk && ! lquery) {
-	*info = -13;
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DGEEV ", &i__1);
-	return 0;
-    } else if (lquery) {
+	xerbla_("CGEQR2", &i__1);
 	return 0;
     }
 
-/*     Quick return if possible */
+    k = min(*m,*n);
 
-    if (*n == 0) {
-	return 0;
-    }
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
 
-/*     Get machine constants */
+/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
 
-    eps = PRECISION;
-    smlnum = SAFEMINIMUM;
-    bignum = 1. / smlnum;
-    dlabad_(&smlnum, &bignum);
-    smlnum = sqrt(smlnum) / eps;
-    bignum = 1. / smlnum;
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	clarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1]
+		, &c__1, &tau[i__]);
+	if (i__ < *n) {
 
-/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+/*           Apply H(i)' to A(i:m,i+1:n) from the left */
 
-    anrm = dlange_("M", n, n, &a[a_offset], lda, dum);
-    scalea = FALSE_;
-    if (anrm > 0. && anrm < smlnum) {
-	scalea = TRUE_;
-	cscale = smlnum;
-    } else if (anrm > bignum) {
-	scalea = TRUE_;
-	cscale = bignum;
-    }
-    if (scalea) {
-	dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
-		ierr);
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = 1.f, a[i__2].i = 0.f;
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    r_cnjg(&q__1, &tau[i__]);
+	    clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &q__1,
+		     &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = alpha.r, a[i__2].i = alpha.i;
+	}
+/* L10: */
     }
+    return 0;
 
-/*
-       Balance the matrix
-       (Workspace: need N)
-*/
+/*     End of CGEQR2 */
 
-    ibal = 1;
-    dgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
+} /* cgeqr2_ */
 
-/*
-       Reduce to upper Hessenberg form
-       (Workspace: need 3*N, prefer 2*N+N*NB)
-*/
+/* Subroutine */ int cgeqrf_(integer *m, integer *n, complex *a, integer *lda,
+	 complex *tau, complex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
-    itau = ibal + *n;
-    iwrk = itau + *n;
-    i__1 = *lwork - iwrk + 1;
-    dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
-	     &ierr);
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int cgeqr2_(integer *, integer *, complex *,
+	    integer *, complex *, complex *, integer *), clarfb_(char *, char
+	    *, char *, char *, integer *, integer *, integer *, complex *,
+	    integer *, complex *, integer *, complex *, integer *, complex *,
+	    integer *), clarft_(char *, char *
+	    , integer *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
 
-    if (wantvl) {
 
 /*
-          Want left eigenvectors
-          Copy Householder vectors to VL
-*/
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-	*(unsigned char *)side = 'L';
-	dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
-		;
 
-/*
-          Generate orthogonal matrix in VL
-          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
-*/
+    Purpose
+    =======
 
-	i__1 = *lwork - iwrk + 1;
-	dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
-		 &i__1, &ierr);
+    CGEQRF computes a QR factorization of a complex M-by-N matrix A:
+    A = Q * R.
 
-/*
-          Perform QR iteration, accumulating Schur vectors in VL
-          (Workspace: need N+1, prefer N+HSWORK (see comments) )
-*/
+    Arguments
+    =========
 
-	iwrk = itau;
-	i__1 = *lwork - iwrk + 1;
-	dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
-		vl[vl_offset], ldvl, &work[iwrk], &i__1, info);
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
 
-	if (wantvr) {
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
 
-/*
-             Want left and right eigenvectors
-             Copy Schur vectors to VR
-*/
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the unitary matrix Q as a
+            product of min(m,n) elementary reflectors (see Further
+            Details).
 
-	    *(unsigned char *)side = 'B';
-	    dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
-	}
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
 
-    } else if (wantvr) {
+    TAU     (output) COMPLEX array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
 
-/*
-          Want right eigenvectors
-          Copy Householder vectors to VR
-*/
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
-	*(unsigned char *)side = 'R';
-	dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
-		;
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is
+            the optimal blocksize.
 
-/*
-          Generate orthogonal matrix in VR
-          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
-*/
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
 
-	i__1 = *lwork - iwrk + 1;
-	dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
-		 &i__1, &ierr);
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
 
-/*
-          Perform QR iteration, accumulating Schur vectors in VR
-          (Workspace: need N+1, prefer N+HSWORK (see comments) )
-*/
+    Further Details
+    ===============
 
-	iwrk = itau;
-	i__1 = *lwork - iwrk + 1;
-	dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
-		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+    The matrix Q is represented as a product of elementary reflectors
 
-    } else {
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
 
-/*
-          Compute eigenvalues only
-          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
 */
 
-	iwrk = itau;
-	i__1 = *lwork - iwrk + 1;
-	dhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
-		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "CGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *n * nb;
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEQRF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
     }
 
-/*     If INFO > 0 from DHSEQR, then quit */
+/*     Quick return if possible */
 
-    if (*info > 0) {
-	goto L50;
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
     }
 
-    if (wantvl || wantvr) {
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < k) {
 
 /*
-          Compute left and/or right eigenvectors
-          (Workspace: need 4*N)
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
 */
+	i__1 = 0, i__2 = ilaenv_(&c__3, "CGEQRF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
 
-	dtrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl,
-		 &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr);
-    }
+/*           Determine if workspace is large enough for blocked code. */
 
-    if (wantvl) {
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
 
 /*
-          Undo balancing of left eigenvectors
-          (Workspace: need N)
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
 */
 
-	dgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl,
-		 &ierr);
-
-/*        Normalize left eigenvectors and make largest component real */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (wi[i__] == 0.) {
-		scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
-		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
-	    } else if (wi[i__] > 0.) {
-		d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
-		d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
-		scl = 1. / dlapy2_(&d__1, &d__2);
-		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
-		dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
-		i__2 = *n;
-		for (k = 1; k <= i__2; ++k) {
-/* Computing 2nd power */
-		    d__1 = vl[k + i__ * vl_dim1];
-/* Computing 2nd power */
-		    d__2 = vl[k + (i__ + 1) * vl_dim1];
-		    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
-/* L10: */
-		}
-		k = idamax_(n, &work[iwrk], &c__1);
-		dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1],
-			&cs, &sn, &r__);
-		drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) *
-			vl_dim1 + 1], &c__1, &cs, &sn);
-		vl[k + (i__ + 1) * vl_dim1] = 0.;
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "CGEQRF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
 	    }
-/* L20: */
 	}
     }
 
-    if (wantvr) {
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
 
 /*
-          Undo balancing of right eigenvectors
-          (Workspace: need N)
+             Compute the QR factorization of the current block
+             A(i:m,i:i+ib-1)
 */
 
-	dgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr,
-		 &ierr);
+	    i__3 = *m - i__ + 1;
+	    cgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *n) {
 
-/*        Normalize right eigenvectors and make largest component real */
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
 
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (wi[i__] == 0.) {
-		scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
-		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
-	    } else if (wi[i__] > 0.) {
-		d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
-		d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
-		scl = 1. / dlapy2_(&d__1, &d__2);
-		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
-		dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
-		i__2 = *n;
-		for (k = 1; k <= i__2; ++k) {
-/* Computing 2nd power */
-		    d__1 = vr[k + i__ * vr_dim1];
-/* Computing 2nd power */
-		    d__2 = vr[k + (i__ + 1) * vr_dim1];
-		    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
-/* L30: */
-		}
-		k = idamax_(n, &work[iwrk], &c__1);
-		dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1],
-			&cs, &sn, &r__);
-		drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) *
-			vr_dim1 + 1], &c__1, &cs, &sn);
-		vr[k + (i__ + 1) * vr_dim1] = 0.;
+		i__3 = *m - i__ + 1;
+		clarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H' to A(i:m,i+ib:n) from the left */
+
+		i__3 = *m - i__ + 1;
+		i__4 = *n - i__ - ib + 1;
+		clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise"
+			, &i__3, &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &
+			work[1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda,
+			&work[ib + 1], &ldwork);
 	    }
-/* L40: */
+/* L10: */
 	}
+    } else {
+	i__ = 1;
     }
 
-/*     Undo scaling if necessary */
+/*     Use unblocked code to factor the last or only block. */
 
-L50:
-    if (scalea) {
-	i__1 = *n - *info;
-/* Computing MAX */
-	i__3 = *n - *info;
-	i__2 = max(i__3,1);
-	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info +
-		1], &i__2, &ierr);
-	i__1 = *n - *info;
-/* Computing MAX */
-	i__3 = *n - *info;
-	i__2 = max(i__3,1);
-	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info +
-		1], &i__2, &ierr);
-	if (*info > 0) {
-	    i__1 = ilo - 1;
-	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1],
-		    n, &ierr);
-	    i__1 = ilo - 1;
-	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1],
-		    n, &ierr);
-	}
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	cgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
     }
 
-    work[1] = (doublereal) maxwrk;
+    work[1].r = (real) iws, work[1].i = 0.f;
     return 0;
 
-/*     End of DGEEV */
+/*     End of CGEQRF */
 
-} /* dgeev_ */
+} /* cgeqrf_ */
 
-/* Subroutine */ int dgehd2_(integer *n, integer *ilo, integer *ihi,
-	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+/* Subroutine */ int cgesdd_(char *jobz, integer *m, integer *n, complex *a,
+	integer *lda, real *s, complex *u, integer *ldu, complex *vt, integer
+	*ldvt, complex *work, integer *lwork, real *rwork, integer *iwork,
 	integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2, i__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
 
     /* Local variables */
-    static integer i__;
-    static doublereal aii;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *), dlarfg_(integer *, doublereal *,
-	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+    static integer i__, ie, il, ir, iu, blk;
+    static real dum[1], eps;
+    static integer iru, ivt, iscl;
+    static real anrm;
+    static integer idum[1], ierr, itau, irvt;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    static integer chunk, minmn, wrkbl, itaup, itauq;
+    static logical wntqa;
+    static integer nwork;
+    extern /* Subroutine */ int clacp2_(char *, integer *, integer *, real *,
+	    integer *, complex *, integer *);
+    static logical wntqn, wntqo, wntqs;
+    static integer mnthr1, mnthr2;
+    extern /* Subroutine */ int cgebrd_(integer *, integer *, complex *,
+	    integer *, real *, real *, complex *, complex *, complex *,
+	    integer *, integer *);
+    extern doublereal clange_(char *, integer *, integer *, complex *,
+	    integer *, real *);
+    extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *,
+	    integer *, complex *, complex *, integer *, integer *), clacrm_(
+	    integer *, integer *, complex *, integer *, real *, integer *,
+	    complex *, integer *, real *), clarcm_(integer *, integer *, real
+	    *, integer *, complex *, integer *, complex *, integer *, real *),
+	     clascl_(char *, integer *, integer *, real *, real *, integer *,
+	    integer *, complex *, integer *, integer *), sbdsdc_(char
+	    *, char *, integer *, real *, real *, real *, integer *, real *,
+	    integer *, real *, integer *, real *, integer *, integer *), cgeqrf_(integer *, integer *, complex *, integer
+	    *, complex *, complex *, integer *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
+	    *, integer *, complex *, integer *), claset_(char *,
+	    integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int cungbr_(char *, integer *, integer *, integer
+	    *, complex *, integer *, complex *, complex *, integer *, integer
+	    *);
+    static real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *), cunmbr_(char *, char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *), cunglq_(
+	    integer *, integer *, integer *, complex *, integer *, complex *,
+	    complex *, integer *, integer *);
+    static integer ldwrkl;
+    extern /* Subroutine */ int cungqr_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *, integer *);
+    static integer ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt;
+    static real smlnum;
+    static logical wntqas, lquery;
+    static integer nrwork;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
+    -- LAPACK driver routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+       October 31, 1999
 
 
     Purpose
     =======
 
-    DGEHD2 reduces a real general matrix A to upper Hessenberg form H by
-    an orthogonal similarity transformation:  Q' * A * Q = H .
+    CGESDD computes the singular value decomposition (SVD) of a complex
+    M-by-N matrix A, optionally computing the left and/or right singular
+    vectors, by using divide-and-conquer method. The SVD is written
+
+         A = U * SIGMA * conjugate-transpose(V)
+
+    where SIGMA is an M-by-N matrix which is zero except for its
+    min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
+    V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
+    are the singular values of A; they are real and non-negative, and
+    are returned in descending order.  The first min(m,n) columns of
+    U and V are the left and right singular vectors of A.
+
+    Note that the routine returns VT = V**H, not V.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
 
     Arguments
     =========
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+    JOBZ    (input) CHARACTER*1
+            Specifies options for computing all or part of the matrix U:
+            = 'A':  all M columns of U and all N rows of V**H are
+                    returned in the arrays U and VT;
+            = 'S':  the first min(M,N) columns of U and the first
+                    min(M,N) rows of V**H are returned in the arrays U
+                    and VT;
+            = 'O':  If M >= N, the first N columns of U are overwritten
+                    on the array A and all rows of V**H are returned in
+                    the array VT;
+                    otherwise, all columns of U are returned in the
+                    array U and the first M rows of V**H are overwritten
+                    in the array VT;
+            = 'N':  no columns of U or rows of V**H are computed.
 
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that A is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to DGEBAL; otherwise they should be
-            set to 1 and N respectively. See Further Details.
-            1 <= ILO <= IHI <= max(1,N).
+    M       (input) INTEGER
+            The number of rows of the input matrix A.  M >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the n by n general matrix to be reduced.
-            On exit, the upper triangle and the first subdiagonal of A
-            are overwritten with the upper Hessenberg matrix H, and the
-            elements below the first subdiagonal, with the array TAU,
-            represent the orthogonal matrix Q as a product of elementary
-            reflectors. See Further Details.
+    N       (input) INTEGER
+            The number of columns of the input matrix A.  N >= 0.
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit,
+            if JOBZ = 'O',  A is overwritten with the first N columns
+                            of U (the left singular vectors, stored
+                            columnwise) if M >= N;
+                            A is overwritten with the first M rows
+                            of V**H (the right singular vectors, stored
+                            rowwise) otherwise.
+            if JOBZ .ne. 'O', the contents of A are destroyed.
 
-    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
-            The scalar factors of the elementary reflectors (see Further
-            Details).
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+    S       (output) REAL array, dimension (min(M,N))
+            The singular values of A, sorted so that S(i) >= S(i+1).
 
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+    U       (output) COMPLEX array, dimension (LDU,UCOL)
+            UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
+            UCOL = min(M,N) if JOBZ = 'S'.
+            If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
+            unitary matrix U;
+            if JOBZ = 'S', U contains the first min(M,N) columns of U
+            (the left singular vectors, stored columnwise);
+            if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
 
-    Further Details
-    ===============
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= 1; if
+            JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
 
-    The matrix Q is represented as a product of (ihi-ilo) elementary
-    reflectors
+    VT      (output) COMPLEX array, dimension (LDVT,N)
+            If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
+            N-by-N unitary matrix V**H;
+            if JOBZ = 'S', VT contains the first min(M,N) rows of
+            V**H (the right singular vectors, stored rowwise);
+            if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
 
-       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.  LDVT >= 1; if
+            JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
+            if JOBZ = 'S', LDVT >= min(M,N).
 
-    Each H(i) has the form
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
-       H(i) = I - tau * v * v'
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= 1.
+            if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N).
+            if JOBZ = 'O',
+                  LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+            if JOBZ = 'S' or 'A',
+                  LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+            For good performance, LWORK should generally be larger.
+            If LWORK < 0 but other input arguments are legal, WORK(1)
+            returns the optimal LWORK.
 
-    where tau is a real scalar, and v is a real vector with
-    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
-    exit in A(i+2:ihi,i), and tau in TAU(i).
+    RWORK   (workspace) REAL array, dimension (LRWORK)
+            If JOBZ = 'N', LRWORK >= 7*min(M,N).
+            Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N)
 
-    The contents of A are illustrated by the following example, with
-    n = 7, ilo = 2 and ihi = 6:
+    IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
 
-    on entry,                        on exit,
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The updating process of SBDSDC did not converge.
 
-    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
-    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
-    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
-    (                         a )    (                          a )
+    Further Details
+    ===============
 
-    where a denotes an element of the original matrix A, h denotes a
-    modified element of the upper Hessenberg matrix H, and vi denotes an
-    element of the vector defining H(i).
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
 
     =====================================================================
 
 
-       Test the input parameters
+       Test the input arguments
 */
 
     /* Parameter adjustments */
     a_dim1 = *lda;
     a_offset = 1 + a_dim1;
     a -= a_offset;
-    --tau;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
     --work;
+    --rwork;
+    --iwork;
 
     /* Function Body */
     *info = 0;
-    if (*n < 0) {
+    minmn = min(*m,*n);
+    mnthr1 = (integer) (minmn * 17.f / 9.f);
+    mnthr2 = (integer) (minmn * 5.f / 3.f);
+    wntqa = lsame_(jobz, "A");
+    wntqs = lsame_(jobz, "S");
+    wntqas = wntqa || wntqs;
+    wntqo = lsame_(jobz, "O");
+    wntqn = lsame_(jobz, "N");
+    minwrk = 1;
+    maxwrk = 1;
+    lquery = *lwork == -1;
+
+    if (! (wntqa || wntqs || wntqo || wntqn)) {
 	*info = -1;
-    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+    } else if (*m < 0) {
 	*info = -2;
-    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+    } else if (*n < 0) {
 	*info = -3;
-    } else if (*lda < max(1,*n)) {
+    } else if (*lda < max(1,*m)) {
 	*info = -5;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGEHD2", &i__1);
-	return 0;
+    } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < *
+	    m) {
+	*info = -8;
+    } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn ||
+	    wntqo && *m >= *n && *ldvt < *n) {
+	*info = -10;
     }
 
-    i__1 = *ihi - 1;
-    for (i__ = *ilo; i__ <= i__1; ++i__) {
-
-/*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
-
-	i__2 = *ihi - i__;
-/* Computing MIN */
-	i__3 = i__ + 2;
-	dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
-		a_dim1], &c__1, &tau[i__]);
-	aii = a[i__ + 1 + i__ * a_dim1];
-	a[i__ + 1 + i__ * a_dim1] = 1.;
-
-/*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
-
-	i__2 = *ihi - i__;
-	dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
-		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
-
-/*        Apply H(i) to A(i+1:ihi,i+1:n) from the left */
-
-	i__2 = *ihi - i__;
-	i__3 = *n - i__;
-	dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
-		i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
-
-	a[i__ + 1 + i__ * a_dim1] = aii;
-/* L10: */
-    }
-
-    return 0;
-
-/*     End of DGEHD2 */
-
-} /* dgehd2_ */
-
-/* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi,
-	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
-	integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__;
-    static doublereal t[4160]	/* was [65][64] */;
-    static integer ib;
-    static doublereal ei;
-    static integer nb, nh, nx, iws;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer nbmin, iinfo;
-    extern /* Subroutine */ int dgehd2_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *),
-	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, integer *), dlahrd_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         CWorkspace refers to complex workspace, and RWorkspace to
+         real workspace. NB refers to the optimal block size for the
+         immediately following subroutine, as returned by ILAENV.)
+*/
 
+    if (*info == 0 && *m > 0 && *n > 0) {
+	if (*m >= *n) {
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+             There is no complex work space needed for bidiagonal SVD
+             The real work space needed for bidiagonal SVD is BDSPAC,
+             BDSPAC = 3*N*N + 4*N
+*/
 
+	    if (*m >= mnthr1) {
+		if (wntqn) {
 
-    Purpose
-    =======
+/*                 Path 1 (M much larger than N, JOBZ='N') */
 
-    DGEHRD reduces a real general matrix A to upper Hessenberg form H by
-    an orthogonal similarity transformation:  Q' * A * Q = H .
+		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(&
+			    c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl;
+		    minwrk = *n * 3;
+		} else if (wntqo) {
 
-    Arguments
-    =========
+/*                 Path 2 (M much larger than N, JOBZ='O') */
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(&
+			    c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *m * *n + *n * *n + wrkbl;
+		    minwrk = (*n << 1) * *n + *n * 3;
+		} else if (wntqs) {
 
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that A is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to DGEBAL; otherwise they should be
-            set to 1 and N respectively. See Further Details.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+/*                 Path 3 (M much larger than N, JOBZ='S') */
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the N-by-N general matrix to be reduced.
-            On exit, the upper triangle and the first subdiagonal of A
-            are overwritten with the upper Hessenberg matrix H, and the
-            elements below the first subdiagonal, with the array TAU,
-            represent the orthogonal matrix Q as a product of elementary
-            reflectors. See Further Details.
+		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(&
+			    c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *n * *n + wrkbl;
+		    minwrk = *n * *n + *n * 3;
+		} else if (wntqa) {
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+/*                 Path 4 (M much larger than N, JOBZ='A') */
 
-    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
-            The scalar factors of the elementary reflectors (see Further
-            Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
-            zero.
+		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "CUNGQR",
+			    " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(&
+			    c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *n * *n + wrkbl;
+		    minwrk = *n * *n + (*n << 1) + *m;
+		}
+	    } else if (*m >= mnthr2) {
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+/*              Path 5 (M much larger than N, but not as much as MNTHR1) */
 
-    LWORK   (input) INTEGER
-            The length of the array WORK.  LWORK >= max(1,N).
-            For optimum performance LWORK >= N*NB, where NB is the
-            optimal blocksize.
+		maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		minwrk = (*n << 1) + *m;
+		if (wntqo) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		    maxwrk += *m * *n;
+		    minwrk += *n * *n;
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		}
+	    } else {
 
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+/*              Path 6 (M at least N, but not much larger) */
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+		maxwrk = (*n << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		minwrk = (*n << 1) + *m;
+		if (wntqo) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		    maxwrk += *m * *n;
+		    minwrk += *n * *n;
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*n << 1) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*n << 1) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		}
+	    }
+	} else {
 
-    Further Details
-    ===============
+/*
+             There is no complex work space needed for bidiagonal SVD
+             The real work space needed for bidiagonal SVD is BDSPAC,
+             BDSPAC = 3*M*M + 4*M
+*/
 
-    The matrix Q is represented as a product of (ihi-ilo) elementary
-    reflectors
+	    if (*n >= mnthr1) {
+		if (wntqn) {
 
-       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+/*                 Path 1t (N much larger than M, JOBZ='N') */
 
-    Each H(i) has the form
+		    maxwrk = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + (*m << 1) * ilaenv_(&
+			    c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3;
+		} else if (wntqo) {
 
-       H(i) = I - tau * v * v'
+/*                 Path 2t (N much larger than M, JOBZ='O') */
 
-    where tau is a real scalar, and v is a real vector with
-    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
-    exit in A(i+2:ihi,i), and tau in TAU(i).
+		    wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "CUNGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(&
+			    c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *m * *n + *m * *m + wrkbl;
+		    minwrk = (*m << 1) * *m + *m * 3;
+		} else if (wntqs) {
 
-    The contents of A are illustrated by the following example, with
-    n = 7, ilo = 2 and ihi = 6:
+/*                 Path 3t (N much larger than M, JOBZ='S') */
 
-    on entry,                        on exit,
+		    wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "CUNGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(&
+			    c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *m * *m + wrkbl;
+		    minwrk = *m * *m + *m * 3;
+		} else if (wntqa) {
 
-    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
-    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
-    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
-    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
-    (                         a )    (                          a )
+/*                 Path 4t (N much larger than M, JOBZ='A') */
 
-    where a denotes an element of the original matrix A, h denotes a
-    modified element of the upper Hessenberg matrix H, and vi denotes an
-    element of the vector defining H(i).
+		    wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "CUNGLQ",
+			    " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*m << 1) + (*m << 1) * ilaenv_(&
+			    c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *m * *m + wrkbl;
+		    minwrk = *m * *m + (*m << 1) + *n;
+		}
+	    } else if (*n >= mnthr2) {
 
-    =====================================================================
+/*              Path 5t (N much larger than M, but not as much as MNTHR1) */
 
+		maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		minwrk = (*m << 1) + *n;
+		if (wntqo) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		    maxwrk += *m * *n;
+		    minwrk += *m * *m;
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		}
+	    } else {
 
-       Test the input parameters
-*/
+/*              Path 6t (N greater than M, but not much larger) */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
+		maxwrk = (*m << 1) + (*m + *n) * ilaenv_(&c__1, "CGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		minwrk = (*m << 1) + *n;
+		if (wntqo) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", m, n, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		    maxwrk += *m * *n;
+		    minwrk += *m * *m;
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "PRC", m, n, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "PRC", n, n, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		}
+	    }
+	}
+	maxwrk = max(maxwrk,minwrk);
+	work[1].r = (real) maxwrk, work[1].i = 0.f;
+    }
 
-    /* Function Body */
-    *info = 0;
-/* Computing MIN */
-    i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
-	    ftnlen)6, (ftnlen)1);
-    nb = min(i__1,i__2);
-    lwkopt = *n * nb;
-    work[1] = (doublereal) lwkopt;
-    lquery = *lwork == -1;
-    if (*n < 0) {
-	*info = -1;
-    } else if (*ilo < 1 || *ilo > max(1,*n)) {
-	*info = -2;
-    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*lwork < max(1,*n) && ! lquery) {
-	*info = -8;
+    if (*lwork < minwrk && ! lquery) {
+	*info = -13;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DGEHRD", &i__1);
+	xerbla_("CGESDD", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
     }
 
-/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
-
-    i__1 = *ilo - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	tau[i__] = 0.;
-/* L10: */
-    }
-    i__1 = *n - 1;
-    for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
-	tau[i__] = 0.;
-/* L20: */
-    }
-
 /*     Quick return if possible */
 
-    nh = *ihi - *ilo + 1;
-    if (nh <= 1) {
-	work[1] = 1.;
+    if (*m == 0 || *n == 0) {
+	if (*lwork >= 1) {
+	    work[1].r = 1.f, work[1].i = 0.f;
+	}
 	return 0;
     }
 
-/*
-       Determine the block size.
+/*     Get machine constants */
 
-   Computing MIN
-*/
-    i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
-	    ftnlen)6, (ftnlen)1);
-    nb = min(i__1,i__2);
-    nbmin = 2;
-    iws = 1;
-    if (nb > 1 && nb < nh) {
+    eps = slamch_("P");
+    smlnum = sqrt(slamch_("S")) / eps;
+    bignum = 1.f / smlnum;
 
-/*
-          Determine when to cross over from blocked to unblocked code
-          (last block is always handled by unblocked code).
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
 
-   Computing MAX
+    anrm = clange_("M", m, n, &a[a_offset], lda, dum);
+    iscl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+	iscl = 1;
+	clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
+		ierr);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+    if (*m >= *n) {
+
+/*
+          A has at least as many rows as columns. If A has sufficiently
+          more rows than columns, first reduce using the QR
+          decomposition (if sufficient workspace available)
 */
-	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < nh) {
 
-/*           Determine if workspace is large enough for blocked code. */
+	if (*m >= mnthr1) {
 
-	    iws = *n * nb;
-	    if (*lwork < iws) {
+	    if (wntqn) {
 
 /*
-                Not enough workspace to use optimal NB:  determine the
-                minimum value of NB, and reduce NB or force use of
-                unblocked code.
-
-   Computing MAX
+                Path 1 (M much larger than N, JOBZ='N')
+                No singular vectors to be computed
 */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, &
-			c_n1, (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-		if (*lwork >= *n * nbmin) {
-		    nb = *lwork / *n;
-		} else {
-		    nb = 1;
-		}
-	    }
-	}
-    }
-    ldwork = *n;
-
-    if (nb < nbmin || nb >= nh) {
 
-/*        Use unblocked code below */
+		itau = 1;
+		nwork = itau + *n;
 
-	i__ = *ilo;
+/*
+                Compute A=Q*R
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: need 0)
+*/
 
-    } else {
+		i__1 = *lwork - nwork + 1;
+		cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
 
-/*        Use blocked code */
+/*              Zero out below R */
 
-	i__1 = *ihi - 1 - nx;
-	i__2 = nb;
-	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__3 = nb, i__4 = *ihi - i__;
-	    ib = min(i__3,i__4);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		claset_("L", &i__1, &i__2, &c_b55, &c_b55, &a[a_dim1 + 2],
+			lda);
+		ie = 1;
+		itauq = 1;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
 
 /*
-             Reduce columns i:i+ib-1 to Hessenberg form, returning the
-             matrices V and T of the block reflector H = I - V*T*V'
-             which performs the reduction, and also the matrix Y = A*V*T
+                Bidiagonalize R in A
+                (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+                (RWorkspace: need N)
 */
 
-	    dlahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
-		    c__65, &work[1], &ldwork);
+		i__1 = *lwork - nwork + 1;
+		cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nrwork = ie + *n;
 
 /*
-             Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
-             right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
-             to 1.
+                Perform bidiagonal SVD, compute singular values only
+                (CWorkspace: 0)
+                (RWorkspace: need BDSPAC)
 */
 
-	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
-	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.;
-	    i__3 = *ihi - i__ - ib + 1;
-	    dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b151, &
-		    work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
-		    c_b15, &a[(i__ + ib) * a_dim1 + 1], lda);
-	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
+		sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+
+	    } else if (wntqo) {
 
 /*
-             Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
-             left
+                Path 2 (M much larger than N, JOBZ='O')
+                N left singular vectors to be overwritten on A and
+                N right singular vectors to be computed in VT
 */
 
-	    i__3 = *ihi - i__;
-	    i__4 = *n - i__ - ib + 1;
-	    dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
-		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[
-		    i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork);
-/* L30: */
-	}
-    }
+		iu = 1;
 
-/*     Use unblocked code to reduce the rest of the matrix */
+/*              WORK(IU) is N by N */
 
-    dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
-    work[1] = (doublereal) iws;
+		ldwrku = *n;
+		ir = iu + ldwrku * *n;
+		if (*lwork >= *m * *n + *n * *n + *n * 3) {
 
-    return 0;
+/*                 WORK(IR) is M by N */
 
-/*     End of DGEHRD */
+		    ldwrkr = *m;
+		} else {
+		    ldwrkr = (*lwork - *n * *n - *n * 3) / *n;
+		}
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
 
-} /* dgehrd_ */
+/*
+                Compute A=Q*R
+                (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB)
+                (RWorkspace: 0)
+*/
 
-/* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublereal *tau, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
+		i__1 = *lwork - nwork + 1;
+		cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
 
-    /* Local variables */
-    static integer i__, k;
-    static doublereal aii;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *), dlarfg_(integer *, doublereal *,
-	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+/*              Copy R to WORK( IR ), zeroing out below it */
 
+		clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		claset_("L", &i__1, &i__2, &c_b55, &c_b55, &work[ir + 1], &
+			ldwrkr);
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+                Generate Q in A
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: 0)
+*/
 
+		i__1 = *lwork - nwork + 1;
+		cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
 
-    Purpose
-    =======
+/*
+                Bidiagonalize R in WORK(IR)
+                (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB)
+                (RWorkspace: need N)
+*/
 
-    DGELQ2 computes an LQ factorization of a real m by n matrix A:
-    A = L * Q.
+		i__1 = *lwork - nwork + 1;
+		cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
 
-    Arguments
-    =========
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of R in WORK(IRU) and computing right singular vectors
+                of R in WORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
+		iru = ie + *n;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
+/*
+                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+                Overwrite WORK(IU) by the left singular vectors of R
+                (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the m by n matrix A.
-            On exit, the elements on and below the diagonal of the array
-            contain the m by min(m,n) lower trapezoidal matrix L (L is
-            lower triangular if m <= n); the elements above the diagonal,
-            with the array TAU, represent the orthogonal matrix Q as a
-            product of elementary reflectors (see Further Details).
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors (see Further
-            Details).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (M)
+		clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &work[iu], &ldwrku, &work[nwork], &i__1, &
+			ierr);
 
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by the right singular vectors of R
+                (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
 
-    Further Details
-    ===============
+		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
 
-    The matrix Q is represented as a product of elementary reflectors
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IU), storing result in WORK(IR) and copying to A
+                (CWorkspace: need 2*N*N, prefer N*N+M*N)
+                (RWorkspace: 0)
+*/
 
-       Q = H(k) . . . H(2) H(1), where k = min(m,n).
+		i__1 = *m;
+		i__2 = ldwrkr;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *m - i__ + 1;
+		    chunk = min(i__3,ldwrkr);
+		    cgemm_("N", "N", &chunk, n, n, &c_b56, &a[i__ + a_dim1],
+			    lda, &work[iu], &ldwrku, &c_b55, &work[ir], &
+			    ldwrkr);
+		    clacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+			    a_dim1], lda);
+/* L10: */
+		}
 
-    Each H(i) has the form
+	    } else if (wntqs) {
 
-       H(i) = I - tau * v * v'
+/*
+                Path 3 (M much larger than N, JOBZ='S')
+                N left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
 
-    where tau is a real scalar, and v is a real vector with
-    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
-    and tau in TAU(i).
+		ir = 1;
 
-    =====================================================================
+/*              WORK(IR) is N by N */
 
+		ldwrkr = *n;
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
 
-       Test the input arguments
+/*
+                Compute A=Q*R
+                (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+                (RWorkspace: 0)
 */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
+		i__2 = *lwork - nwork + 1;
+		cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
 
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGELQ2", &i__1);
-	return 0;
-    }
+/*              Copy R to WORK(IR), zeroing out below it */
 
-    k = min(*m,*n);
+		clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		claset_("L", &i__2, &i__1, &c_b55, &c_b55, &work[ir + 1], &
+			ldwrkr);
 
-    i__1 = k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
+/*
+                Generate Q in A
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: 0)
+*/
 
-/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
+		i__2 = *lwork - nwork + 1;
+		cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
 
-	i__2 = *n - i__ + 1;
-/* Computing MIN */
-	i__3 = i__ + 1;
-	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1]
-		, lda, &tau[i__]);
-	if (i__ < *m) {
+/*
+                Bidiagonalize R in WORK(IR)
+                (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+                (RWorkspace: need N)
+*/
 
-/*           Apply H(i) to A(i+1:m,i:n) from the right */
+		i__2 = *lwork - nwork + 1;
+		cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
 
-	    aii = a[i__ + i__ * a_dim1];
-	    a[i__ + i__ * a_dim1] = 1.;
-	    i__2 = *m - i__;
-	    i__3 = *n - i__ + 1;
-	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
-		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
-	    a[i__ + i__ * a_dim1] = aii;
-	}
-/* L10: */
-    }
-    return 0;
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
-/*     End of DGELQ2 */
+		iru = ie + *n;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-} /* dgelq2_ */
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of R
+                (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
 
-/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+		clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
 
-    /* Local variables */
-    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *), dlarfb_(char *,
-	     char *, char *, char *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of R
+                (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
 
+		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
+                Multiply Q in A by left singular vectors of R in
+                WORK(IR), storing result in U
+                (CWorkspace: need N*N)
+                (RWorkspace: 0)
+*/
 
-    Purpose
-    =======
+		clacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
+		cgemm_("N", "N", m, n, n, &c_b56, &a[a_offset], lda, &work[ir]
+			, &ldwrkr, &c_b55, &u[u_offset], ldu);
 
-    DGELQF computes an LQ factorization of a real M-by-N matrix A:
-    A = L * Q.
+	    } else if (wntqa) {
 
-    Arguments
-    =========
+/*
+                Path 4 (M much larger than N, JOBZ='A')
+                M left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
 
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
+		iu = 1;
 
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
+/*              WORK(IU) is N by N */
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the M-by-N matrix A.
-            On exit, the elements on and below the diagonal of the array
-            contain the m-by-min(m,n) lower trapezoidal matrix L (L is
-            lower triangular if m <= n); the elements above the diagonal,
-            with the array TAU, represent the orthogonal matrix Q as a
-            product of elementary reflectors (see Further Details).
+		ldwrku = *n;
+		itau = iu + ldwrku * *n;
+		nwork = itau + *n;
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
+/*
+                Compute A=Q*R, copying result to U
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: 0)
+*/
 
-    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors (see Further
-            Details).
+		i__2 = *lwork - nwork + 1;
+		cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+/*
+                Generate Q in U
+                (CWorkspace: need N+M, prefer N+M*NB)
+                (RWorkspace: 0)
+*/
 
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,M).
-            For optimum performance LWORK >= M*NB, where NB is the
-            optimal blocksize.
+		i__2 = *lwork - nwork + 1;
+		cungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
+			 &i__2, &ierr);
 
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+/*              Produce R in A, zeroing out below it */
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		claset_("L", &i__2, &i__1, &c_b55, &c_b55, &a[a_dim1 + 2],
+			lda);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
 
-    Further Details
-    ===============
+/*
+                Bidiagonalize R in A
+                (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+                (RWorkspace: need N)
+*/
 
-    The matrix Q is represented as a product of elementary reflectors
+		i__2 = *lwork - nwork + 1;
+		cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+		iru = ie + *n;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
 
-       Q = H(k) . . . H(2) H(1), where k = min(m,n).
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
-    Each H(i) has the form
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-       H(i) = I - tau * v * v'
+/*
+                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+                Overwrite WORK(IU) by left singular vectors of R
+                (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
 
-    where tau is a real scalar, and v is a real vector with
-    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
-    and tau in TAU(i).
+		clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
+			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			ierr);
 
-    =====================================================================
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of R
+                (CWorkspace: need 3*N, prefer 2*N+N*NB)
+                (RWorkspace: 0)
+*/
 
+		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
 
-       Test the input arguments
+/*
+                Multiply Q in U by left singular vectors of R in
+                WORK(IU), storing result in A
+                (CWorkspace: need N*N)
+                (RWorkspace: 0)
 */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
+		cgemm_("N", "N", m, n, n, &c_b56, &u[u_offset], ldu, &work[iu]
+			, &ldwrku, &c_b55, &a[a_offset], lda);
 
-    /* Function Body */
-    *info = 0;
-    nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
-	    1);
-    lwkopt = *m * nb;
-    work[1] = (doublereal) lwkopt;
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    } else if (*lwork < max(1,*m) && ! lquery) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGELQF", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
+/*              Copy left singular vectors of A from A to U */
 
-/*     Quick return if possible */
+		clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
 
-    k = min(*m,*n);
-    if (k == 0) {
-	work[1] = 1.;
-	return 0;
-    }
+	    }
 
-    nbmin = 2;
-    nx = 0;
-    iws = *m;
-    if (nb > 1 && nb < k) {
+	} else if (*m >= mnthr2) {
 
 /*
-          Determine when to cross over from blocked to unblocked code.
+             MNTHR2 <= M < MNTHR1
 
-   Computing MAX
+             Path 5 (M much larger than N, but not as much as MNTHR1)
+             Reduce to bidiagonal form without QR decomposition, use
+             CUNGBR and matrix multiplication to compute singular vectors
 */
-	i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < k) {
-
-/*           Determine if workspace is large enough for blocked code. */
 
-	    ldwork = *m;
-	    iws = ldwork * nb;
-	    if (*lwork < iws) {
+	    ie = 1;
+	    nrwork = ie + *n;
+	    itauq = 1;
+	    itaup = itauq + *n;
+	    nwork = itaup + *n;
 
 /*
-                Not enough workspace to use optimal NB:  reduce NB and
-                determine the minimum value of NB.
+             Bidiagonalize A
+             (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+             (RWorkspace: need N)
 */
 
-		nb = *lwork / ldwork;
-/* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, &
-			c_n1, (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-	    }
-	}
-    }
-
-    if (nb >= nbmin && nb < k && nx < k) {
+	    i__2 = *lwork - nwork + 1;
+	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
 
-/*        Use blocked code initially */
+/*
+                Compute singular values only
+                (Cworkspace: 0)
+                (Rworkspace: need BDSPAC)
+*/
 
-	i__1 = k - nx;
-	i__2 = nb;
-	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__3 = k - i__ + 1;
-	    ib = min(i__3,nb);
+		sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+	    } else if (wntqo) {
+		iu = nwork;
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
 
 /*
-             Compute the LQ factorization of the current block
-             A(i:i+ib-1,i:n)
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
 */
 
-	    i__3 = *n - i__ + 1;
-	    dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
-		    1], &iinfo);
-	    if (i__ + ib <= *m) {
+		clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__2, &ierr);
 
 /*
-                Form the triangular factor of the block reflector
-                H = H(i) H(i+1) . . . H(i+ib-1)
+                Generate Q in A
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: 0)
 */
 
-		i__3 = *n - i__ + 1;
-		dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ *
-			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+		i__2 = *lwork - nwork + 1;
+		cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[
+			nwork], &i__2, &ierr);
 
-/*              Apply H to A(i+ib:m,i:n) from the right */
+		if (*lwork >= *m * *n + *n * 3) {
 
-		i__3 = *m - i__ - ib + 1;
-		i__4 = *n - i__ + 1;
-		dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
-			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
-			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
-			1], &ldwork);
-	    }
-/* L10: */
-	}
-    } else {
-	i__ = 1;
-    }
+/*                 WORK( IU ) is M by N */
 
-/*     Use unblocked code to factor the last or only block. */
+		    ldwrku = *m;
+		} else {
 
-    if (i__ <= k) {
-	i__2 = *m - i__ + 1;
-	i__1 = *n - i__ + 1;
-	dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
-		, &iinfo);
-    }
+/*                 WORK(IU) is LDWRKU by N */
 
-    work[1] = (doublereal) iws;
-    return 0;
+		    ldwrku = (*lwork - *n * 3) / *n;
+		}
+		nwork = iu + ldwrku * *n;
 
-/*     End of DGELQF */
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
-} /* dgelqf_ */
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-/* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs,
-	doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *
-	s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork,
-	 integer *iwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in WORK(IU), copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need 3*N*N)
+*/
 
-    /* Builtin functions */
-    double log(doublereal);
+		clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &work[iu]
+			, &ldwrku, &rwork[nrwork]);
+		clacpy_("F", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt);
 
-    /* Local variables */
-    static integer ie, il, mm;
-    static doublereal eps, anrm, bnrm;
-    static integer itau, nlvl, iascl, ibscl;
-    static doublereal sfmin;
-    static integer minmn, maxmn, itaup, itauq, mnthr, nwork;
-    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebrd_(
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
-	     integer *);
-    extern doublereal dlamch_(char *), dlange_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *);
-    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *, integer *),
-	    dlalsd_(char *, integer *, integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *, integer *), dlascl_(char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    integer *, doublereal *, integer *, integer *), dgeqrf_(
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *, integer *), dlacpy_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *), xerbla_(char *,
-	    integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static doublereal bignum;
-    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *,
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, integer *);
-    static integer wlalsd;
-    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, integer *);
-    static integer ldwork;
-    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, integer *);
-    static integer minwrk, maxwrk;
-    static doublereal smlnum;
-    static logical lquery;
-    static integer smlsiz;
+/*
+                Multiply Q in A by real matrix RWORK(IRU), storing the
+                result in WORK(IU), copying to A
+                (CWorkspace: need N*N, prefer M*N)
+                (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
+*/
+
+		nrwork = irvt;
+		i__2 = *m;
+		i__1 = ldwrku;
+		for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			i__1) {
+/* Computing MIN */
+		    i__3 = *m - i__ + 1;
+		    chunk = min(i__3,ldwrku);
+		    clacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], n,
+			    &work[iu], &ldwrku, &rwork[nrwork]);
+		    clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ +
+			    a_dim1], lda);
+/* L20: */
+		}
 
+	    } else if (wntqs) {
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
 
+		clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__1, &ierr);
 
-    Purpose
-    =======
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
 
-    DGELSD computes the minimum-norm solution to a real linear least
-    squares problem:
-        minimize 2-norm(| b - A*x |)
-    using the singular value decomposition (SVD) of A. A is an M-by-N
-    matrix which may be rank-deficient.
+		clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cungbr_("Q", m, n, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__1, &ierr);
 
-    Several right hand side vectors b and solution vectors x can be
-    handled in a single call; they are stored as the columns of the
-    M-by-NRHS right hand side matrix B and the N-by-NRHS solution
-    matrix X.
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
-    The problem is solved in three steps:
-    (1) Reduce the coefficient matrix A to bidiagonal form with
-        Householder transformations, reducing the original problem
-        into a "bidiagonal least squares problem" (BLS)
-    (2) Solve the BLS using a divide and conquer approach.
-    (3) Apply back all the Householder tranformations to solve
-        the original least squares problem.
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-    The effective rank of A is determined by treating as zero those
-    singular values which are less than RCOND times the largest singular
-    value.
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in A, copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need 3*N*N)
+*/
 
-    The divide and conquer algorithm makes very mild assumptions about
-    floating point arithmetic. It will work on machines with a guard
-    digit in add/subtract, or on those binary machines without guard
-    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
-    Cray-2. It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
+		clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[
+			a_offset], lda, &rwork[nrwork]);
+		clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
 
-    Arguments
-    =========
+/*
+                Multiply Q in U by real matrix RWORK(IRU), storing the
+                result in A, copying to U
+                (CWorkspace: need 0)
+                (Rworkspace: need N*N+2*M*N)
+*/
 
-    M       (input) INTEGER
-            The number of rows of A. M >= 0.
+		nrwork = irvt;
+		clacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset],
+			 lda, &rwork[nrwork]);
+		clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+	    } else {
 
-    N       (input) INTEGER
-            The number of columns of A. N >= 0.
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
 
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrices B and X. NRHS >= 0.
+		clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__1, &ierr);
 
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the M-by-N matrix A.
-            On exit, A has been destroyed.
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
+		clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__1, &ierr);
 
-    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
-            On entry, the M-by-NRHS right hand side matrix B.
-            On exit, B is overwritten by the N-by-NRHS solution
-            matrix X.  If m >= n and RANK = n, the residual
-            sum-of-squares for the solution in the i-th column is given
-            by the sum of squares of elements n+1:m in that column.
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
-    LDB     (input) INTEGER
-            The leading dimension of the array B. LDB >= max(1,max(M,N)).
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-    S       (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The singular values of A in decreasing order.
-            The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in A, copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need 3*N*N)
+*/
 
-    RCOND   (input) DOUBLE PRECISION
-            RCOND is used to determine the effective rank of A.
-            Singular values S(i) <= RCOND*S(1) are treated as zero.
-            If RCOND < 0, machine precision is used instead.
+		clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[
+			a_offset], lda, &rwork[nrwork]);
+		clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
 
-    RANK    (output) INTEGER
-            The effective rank of A, i.e., the number of singular values
-            which are greater than RCOND*S(1).
+/*
+                Multiply Q in U by real matrix RWORK(IRU), storing the
+                result in A, copying to U
+                (CWorkspace: 0)
+                (Rworkspace: need 3*N*N)
+*/
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+		nrwork = irvt;
+		clacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset],
+			 lda, &rwork[nrwork]);
+		clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+	    }
 
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK must be at least 1.
-            The exact minimum amount of workspace needed depends on M,
-            N and NRHS. As long as LWORK is at least
-                12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
-            if M is greater than or equal to N or
-                12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
-            if M is less than N, the code will execute correctly.
-            SMLSIZ is returned by ILAENV and is equal to the maximum
-            size of the subproblems at the bottom of the computation
-            tree (usually about 25), and
-               NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
-            For good performance, LWORK should generally be larger.
+	} else {
 
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+/*
+             M .LT. MNTHR2
 
-    IWORK   (workspace) INTEGER array, dimension (LIWORK)
-            LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
-            where MINMN = MIN( M,N ).
+             Path 6 (M at least N, but not much larger)
+             Reduce to bidiagonal form without QR decomposition
+             Use CUNMBR to compute singular vectors
+*/
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  the algorithm for computing the SVD failed to converge;
-                  if INFO = i, i off-diagonal elements of an intermediate
-                  bidiagonal form did not converge to zero.
+	    ie = 1;
+	    nrwork = ie + *n;
+	    itauq = 1;
+	    itaup = itauq + *n;
+	    nwork = itaup + *n;
 
-    Further Details
-    ===============
+/*
+             Bidiagonalize A
+             (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+             (RWorkspace: need N)
+*/
 
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
+	    i__1 = *lwork - nwork + 1;
+	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__1, &ierr);
+	    if (wntqn) {
 
-    =====================================================================
+/*
+                Compute singular values only
+                (Cworkspace: 0)
+                (Rworkspace: need BDSPAC)
+*/
 
+		sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+	    } else if (wntqo) {
+		iu = nwork;
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		if (*lwork >= *m * *n + *n * 3) {
 
-       Test the input arguments.
-*/
+/*                 WORK( IU ) is M by N */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    --s;
-    --work;
-    --iwork;
+		    ldwrku = *m;
+		} else {
 
-    /* Function Body */
-    *info = 0;
-    minmn = min(*m,*n);
-    maxmn = max(*m,*n);
-    mnthr = ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
-	    ftnlen)1);
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*nrhs < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*m)) {
-	*info = -5;
-    } else if (*ldb < max(1,maxmn)) {
-	*info = -7;
-    }
+/*                 WORK( IU ) is LDWRKU by N */
 
-    smlsiz = ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
+		    ldwrku = (*lwork - *n * 3) / *n;
+		}
+		nwork = iu + ldwrku * *n;
 
 /*
-       Compute workspace.
-       (Note: Comments in the code beginning "Workspace:" describe the
-       minimal amount of workspace needed at that point in the code,
-       as well as the preferred amount for good performance.
-       NB refers to the optimal block size for the immediately
-       following subroutine, as returned by ILAENV.)
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
 */
 
-    minwrk = 1;
-    minmn = max(1,minmn);
-/* Computing MAX */
-    i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) /
-	    log(2.)) + 1;
-    nlvl = max(i__1,0);
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-    if (*info == 0) {
-	maxwrk = 0;
-	mm = *m;
-	if (*m >= *n && *m >= mnthr) {
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: need 0)
+*/
 
-/*           Path 1a - overdetermined, with many more rows than columns. */
+		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
 
-	    mm = *n;
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m,
-		    n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "DORMQR", "LT",
-		    m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
-	    maxwrk = max(i__1,i__2);
-	}
-	if (*m >= *n) {
+		if (*lwork >= *m * *n + *n * 3) {
 
 /*
-             Path 1 - overdetermined or exactly determined.
-
-   Computing MAX
+                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+                Overwrite WORK(IU) by left singular vectors of A, copying
+                to A
+                (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB)
+                (Rworkspace: need 0)
 */
-	    i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "DGEBRD"
-		    , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "DORMBR",
-		    "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "DORMBR",
-		     "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
-	    maxwrk = max(i__1,i__2);
-/* Computing 2nd power */
-	    i__1 = smlsiz + 1;
-	    wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * *
-		    nrhs + i__1 * i__1;
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2),
-		    i__2 = *n * 3 + wlalsd;
-	    minwrk = max(i__1,i__2);
-	}
-	if (*n > *m) {
-/* Computing 2nd power */
-	    i__1 = smlsiz + 1;
-	    wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * *
-		    nrhs + i__1 * i__1;
-	    if (*n >= mnthr) {
+
+		    claset_("F", m, n, &c_b55, &c_b55, &work[iu], &ldwrku);
+		    clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
+		    i__1 = *lwork - nwork + 1;
+		    cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			    itauq], &work[iu], &ldwrku, &work[nwork], &i__1, &
+			    ierr);
+		    clacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
+		} else {
 
 /*
-                Path 2a - underdetermined, with many more columns
-                than rows.
+                   Generate Q in A
+                   (Cworkspace: need 2*N, prefer N+N*NB)
+                   (Rworkspace: need 0)
 */
 
-		maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1,
-			&c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) *
-			ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (
-			ftnlen)6, (ftnlen)1);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(&
-			c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, (
-			ftnlen)3);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) *
-			ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1, (
-			ftnlen)6, (ftnlen)3);
-		maxwrk = max(i__1,i__2);
-		if (*nrhs > 1) {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
-		    maxwrk = max(i__1,i__2);
-		} else {
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
-		    maxwrk = max(i__1,i__2);
-		}
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "DORMLQ",
-			"LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd;
-		maxwrk = max(i__1,i__2);
-	    } else {
-
-/*              Path 2 - remaining underdetermined cases. */
+		    i__1 = *lwork - nwork + 1;
+		    cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
+			    work[nwork], &i__1, &ierr);
 
-		maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "DGEBRD", " ", m,
-			 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "DORMBR"
-			, "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR",
-			"PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
-		maxwrk = max(i__1,i__2);
-	    }
-/* Computing MAX */
-	    i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2),
-		    i__2 = *m * 3 + wlalsd;
-	    minwrk = max(i__1,i__2);
-	}
-	minwrk = min(minwrk,maxwrk);
-	work[1] = (doublereal) maxwrk;
-	if (*lwork < minwrk && ! lquery) {
-	    *info = -12;
-	}
-    }
+/*
+                   Multiply Q in A by real matrix RWORK(IRU), storing the
+                   result in WORK(IU), copying to A
+                   (CWorkspace: need N*N, prefer M*N)
+                   (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
+*/
 
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGELSD", &i__1);
-	return 0;
-    } else if (lquery) {
-	goto L10;
-    }
+		    nrwork = irvt;
+		    i__1 = *m;
+		    i__2 = ldwrku;
+		    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			     i__2) {
+/* Computing MIN */
+			i__3 = *m - i__ + 1;
+			chunk = min(i__3,ldwrku);
+			clacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru],
+				 n, &work[iu], &ldwrku, &rwork[nrwork]);
+			clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ +
+				a_dim1], lda);
+/* L30: */
+		    }
+		}
 
-/*     Quick return if possible. */
+	    } else if (wntqs) {
 
-    if (*m == 0 || *n == 0) {
-	*rank = 0;
-	return 0;
-    }
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
-/*     Get machine parameters. */
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-    eps = PRECISION;
-    sfmin = SAFEMINIMUM;
-    smlnum = sfmin / eps;
-    bignum = 1. / smlnum;
-    dlabad_(&smlnum, &bignum);
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (CWorkspace: need 3*N, prefer 2*N+N*NB)
+                (RWorkspace: 0)
+*/
 
-/*     Scale A if max entry outside range [SMLNUM,BIGNUM]. */
+		claset_("F", m, n, &c_b55, &c_b55, &u[u_offset], ldu);
+		clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
 
-    anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]);
-    iascl = 0;
-    if (anrm > 0. && anrm < smlnum) {
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (CWorkspace: need 3*N, prefer 2*N+N*NB)
+                (RWorkspace: 0)
+*/
 
-/*        Scale matrix norm up to SMLNUM. */
+		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+	    } else {
 
-	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda,
-		info);
-	iascl = 1;
-    } else if (anrm > bignum) {
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
-/*        Scale matrix norm down to BIGNUM. */
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda,
-		info);
-	iascl = 2;
-    } else if (anrm == 0.) {
+/*              Set the right corner of U to identity matrix */
 
-/*        Matrix all zero. Return zero solution. */
+		claset_("F", m, m, &c_b55, &c_b55, &u[u_offset], ldu);
+		i__2 = *m - *n;
+		i__1 = *m - *n;
+		claset_("F", &i__2, &i__1, &c_b55, &c_b56, &u[*n + 1 + (*n +
+			1) * u_dim1], ldu);
 
-	i__1 = max(*m,*n);
-	dlaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[b_offset], ldb);
-	dlaset_("F", &minmn, &c__1, &c_b29, &c_b29, &s[1], &c__1);
-	*rank = 0;
-	goto L10;
-    }
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+                (RWorkspace: 0)
+*/
 
-/*     Scale B if max entry outside range [SMLNUM,BIGNUM]. */
+		clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
 
-    bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
-    ibscl = 0;
-    if (bnrm > 0. && bnrm < smlnum) {
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (CWorkspace: need 3*N, prefer 2*N+N*NB)
+                (RWorkspace: 0)
+*/
 
-/*        Scale matrix norm up to SMLNUM. */
+		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+	    }
 
-	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
-		 info);
-	ibscl = 1;
-    } else if (bnrm > bignum) {
+	}
 
-/*        Scale matrix norm down to BIGNUM. */
+    } else {
 
-	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
-		 info);
-	ibscl = 2;
-    }
+/*
+          A has more columns than rows. If A has sufficiently more
+          columns than rows, first reduce using the LQ decomposition
+          (if sufficient workspace available)
+*/
 
-/*     If M < N make sure certain entries of B are zero. */
+	if (*n >= mnthr1) {
 
-    if (*m < *n) {
-	i__1 = *n - *m;
-	dlaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[*m + 1 + b_dim1], ldb);
-    }
+	    if (wntqn) {
 
-/*     Overdetermined case. */
+/*
+                Path 1t (N much larger than M, JOBZ='N')
+                No singular vectors to be computed
+*/
 
-    if (*m >= *n) {
+		itau = 1;
+		nwork = itau + *m;
 
-/*        Path 1 - overdetermined or exactly determined. */
+/*
+                Compute A=L*Q
+                (CWorkspace: need 2*M, prefer M+M*NB)
+                (RWorkspace: 0)
+*/
 
-	mm = *m;
-	if (*m >= mnthr) {
+		i__2 = *lwork - nwork + 1;
+		cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
 
-/*           Path 1a - overdetermined, with many more rows than columns. */
+/*              Zero out above L */
 
-	    mm = *n;
-	    itau = 1;
-	    nwork = itau + *n;
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		claset_("U", &i__2, &i__1, &c_b55, &c_b55, &a[(a_dim1 << 1) +
+			1], lda);
+		ie = 1;
+		itauq = 1;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
 
 /*
-             Compute A=Q*R.
-             (Workspace: need 2*N, prefer N+N*NB)
+                Bidiagonalize L in A
+                (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+                (RWorkspace: need M)
 */
 
-	    i__1 = *lwork - nwork + 1;
-	    dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
-		     info);
+		i__2 = *lwork - nwork + 1;
+		cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+		nrwork = ie + *m;
 
 /*
-             Multiply B by transpose(Q).
-             (Workspace: need N+NRHS, prefer N+NRHS*NB)
+                Perform bidiagonal SVD, compute singular values only
+                (CWorkspace: 0)
+                (RWorkspace: need BDSPAC)
 */
 
-	    i__1 = *lwork - nwork + 1;
-	    dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
-		    b_offset], ldb, &work[nwork], &i__1, info);
-
-/*           Zero out below R. */
-
-	    if (*n > 1) {
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &a[a_dim1 + 2],
-			lda);
-	    }
-	}
+		sbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
 
-	ie = 1;
-	itauq = ie + *n;
-	itaup = itauq + *n;
-	nwork = itaup + *n;
+	    } else if (wntqo) {
 
 /*
-          Bidiagonalize R in A.
-          (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
+                Path 2t (N much larger than M, JOBZ='O')
+                M right singular vectors to be overwritten on A and
+                M left singular vectors to be computed in U
 */
 
-	i__1 = *lwork - nwork + 1;
-	dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
-		work[itaup], &work[nwork], &i__1, info);
-
-/*
-          Multiply B by transpose of left bidiagonalizing vectors of R.
-          (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
-*/
+		ivt = 1;
+		ldwkvt = *m;
 
-	i__1 = *lwork - nwork + 1;
-	dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq],
-		&b[b_offset], ldb, &work[nwork], &i__1, info);
+/*              WORK(IVT) is M by M */
 
-/*        Solve the bidiagonal least squares problem. */
+		il = ivt + ldwkvt * *m;
+		if (*lwork >= *m * *n + *m * *m + *m * 3) {
 
-	dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb,
-		rcond, rank, &work[nwork], &iwork[1], info);
-	if (*info != 0) {
-	    goto L10;
-	}
+/*                 WORK(IL) M by N */
 
-/*        Multiply B by right bidiagonalizing vectors of R. */
+		    ldwrkl = *m;
+		    chunk = *n;
+		} else {
 
-	i__1 = *lwork - nwork + 1;
-	dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
-		b[b_offset], ldb, &work[nwork], &i__1, info);
+/*                 WORK(IL) is M by CHUNK */
 
-    } else /* if(complicated condition) */ {
-/* Computing MAX */
-	i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max(
-		i__1,*nrhs), i__2 = *n - *m * 3;
-	if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) {
+		    ldwrkl = *m;
+		    chunk = (*lwork - *m * *m - *m * 3) / *m;
+		}
+		itau = il + ldwrkl * chunk;
+		nwork = itau + *m;
 
 /*
-          Path 2a - underdetermined, with many more columns than rows
-          and sufficient workspace for an efficient algorithm.
+                Compute A=L*Q
+                (CWorkspace: need 2*M, prefer M+M*NB)
+                (RWorkspace: 0)
 */
 
-	    ldwork = *m;
+		i__2 = *lwork - nwork + 1;
+		cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy L to WORK(IL), zeroing about above it */
+
+		clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		claset_("U", &i__2, &i__1, &c_b55, &c_b55, &work[il + ldwrkl],
+			 &ldwrkl);
+
 /*
-   Computing MAX
-   Computing MAX
+                Generate Q in A
+                (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+                (RWorkspace: 0)
 */
-	    i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 =
-		    max(i__3,*nrhs), i__4 = *n - *m * 3;
-	    i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda +
-		    *m + *m * *nrhs;
-	    if (*lwork >= max(i__1,i__2)) {
-		ldwork = *lda;
-	    }
-	    itau = 1;
-	    nwork = *m + 1;
+
+		i__2 = *lwork - nwork + 1;
+		cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
 
 /*
-          Compute A=L*Q.
-          (Workspace: need 2*M, prefer M+M*NB)
+                Bidiagonalize L in WORK(IL)
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+                (RWorkspace: need M)
 */
 
-	    i__1 = *lwork - nwork + 1;
-	    dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
-		     info);
-	    il = nwork;
+		i__2 = *lwork - nwork + 1;
+		cgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
 
-/*        Copy L to WORK(IL), zeroing out above its diagonal. */
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
-	    dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
-	    i__1 = *m - 1;
-	    i__2 = *m - 1;
-	    dlaset_("U", &i__1, &i__2, &c_b29, &c_b29, &work[il + ldwork], &
-		    ldwork);
-	    ie = il + ldwork * *m;
-	    itauq = ie + *m;
-	    itaup = itauq + *m;
-	    nwork = itaup + *m;
+		iru = ie + *m;
+		irvt = iru + *m * *m;
+		nrwork = irvt + *m * *m;
+		sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
 /*
-          Bidiagonalize L in WORK(IL).
-          (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
+                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+                Overwrite WORK(IU) by the left singular vectors of L
+                (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+                (RWorkspace: 0)
 */
 
-	    i__1 = *lwork - nwork + 1;
-	    dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq],
-		    &work[itaup], &work[nwork], &i__1, info);
+		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
 
 /*
-          Multiply B by transpose of left bidiagonalizing vectors of L.
-          (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+                Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+                Overwrite WORK(IVT) by the right singular vectors of L
+                (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+                (RWorkspace: 0)
 */
 
-	    i__1 = *lwork - nwork + 1;
-	    dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
-		    itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);
+		clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
+			ierr);
 
-/*        Solve the bidiagonal least squares problem. */
+/*
+                Multiply right singular vectors of L in WORK(IL) by Q
+                in A, storing result in WORK(IL) and copying to A
+                (CWorkspace: need 2*M*M, prefer M*M+M*N))
+                (RWorkspace: 0)
+*/
 
-	    dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
-		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
-	    if (*info != 0) {
-		goto L10;
-	    }
+		i__2 = *n;
+		i__1 = chunk;
+		for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			i__1) {
+/* Computing MIN */
+		    i__3 = *n - i__ + 1;
+		    blk = min(i__3,chunk);
+		    cgemm_("N", "N", m, &blk, m, &c_b56, &work[ivt], m, &a[
+			    i__ * a_dim1 + 1], lda, &c_b55, &work[il], &
+			    ldwrkl);
+		    clacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
+			    + 1], lda);
+/* L40: */
+		}
 
-/*        Multiply B by right bidiagonalizing vectors of L. */
+	    } else if (wntqs) {
 
-	    i__1 = *lwork - nwork + 1;
-	    dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
-		    itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);
+/*
+               Path 3t (N much larger than M, JOBZ='S')
+               M right singular vectors to be computed in VT and
+               M left singular vectors to be computed in U
+*/
 
-/*        Zero out below first M rows of B. */
+		il = 1;
 
-	    i__1 = *n - *m;
-	    dlaset_("F", &i__1, nrhs, &c_b29, &c_b29, &b[*m + 1 + b_dim1],
-		    ldb);
-	    nwork = itau + *m;
+/*              WORK(IL) is M by M */
+
+		ldwrkl = *m;
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
 
 /*
-          Multiply transpose(Q) by B.
-          (Workspace: need M+NRHS, prefer M+NRHS*NB)
+                Compute A=L*Q
+                (CWorkspace: need 2*M, prefer M+M*NB)
+                (RWorkspace: 0)
 */
 
-	    i__1 = *lwork - nwork + 1;
-	    dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
-		    b_offset], ldb, &work[nwork], &i__1, info);
+		i__1 = *lwork - nwork + 1;
+		cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
 
-	} else {
+/*              Copy L to WORK(IL), zeroing out above it */
 
-/*        Path 2 - remaining underdetermined cases. */
+		clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		claset_("U", &i__1, &i__2, &c_b55, &c_b55, &work[il + ldwrkl],
+			 &ldwrkl);
 
-	    ie = 1;
-	    itauq = ie + *m;
-	    itaup = itauq + *m;
-	    nwork = itaup + *m;
+/*
+                Generate Q in A
+                (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
 
 /*
-          Bidiagonalize A.
-          (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+                Bidiagonalize L in WORK(IL)
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+                (RWorkspace: need M)
 */
 
-	    i__1 = *lwork - nwork + 1;
-	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
-		    work[itaup], &work[nwork], &i__1, info);
+		i__1 = *lwork - nwork + 1;
+		cgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
 
 /*
-          Multiply B by transpose of left bidiagonalizing vectors.
-          (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
 */
 
-	    i__1 = *lwork - nwork + 1;
-	    dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
-		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+		iru = ie + *m;
+		irvt = iru + *m * *m;
+		nrwork = irvt + *m * *m;
+		sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-/*        Solve the bidiagonal least squares problem. */
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of L
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+                (RWorkspace: 0)
+*/
 
-	    dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
-		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
-	    if (*info != 0) {
-		goto L10;
-	    }
+		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
 
-/*        Multiply B by right bidiagonalizing vectors of A. */
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by left singular vectors of L
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+                (RWorkspace: 0)
+*/
 
-	    i__1 = *lwork - nwork + 1;
-	    dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
-		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+		clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
 
-	}
-    }
+/*
+                Copy VT to WORK(IL), multiply right singular vectors of L
+                in WORK(IL) by Q in A, storing result in VT
+                (CWorkspace: need M*M)
+                (RWorkspace: 0)
+*/
 
-/*     Undo scaling. */
+		clacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
+		cgemm_("N", "N", m, n, m, &c_b56, &work[il], &ldwrkl, &a[
+			a_offset], lda, &c_b55, &vt[vt_offset], ldvt);
 
-    if (iascl == 1) {
-	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
-		 info);
-	dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
-		minmn, info);
-    } else if (iascl == 2) {
-	dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
-		 info);
-	dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
-		minmn, info);
-    }
-    if (ibscl == 1) {
-	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
-		 info);
-    } else if (ibscl == 2) {
-	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
-		 info);
-    }
+	    } else if (wntqa) {
 
-L10:
-    work[1] = (doublereal) maxwrk;
-    return 0;
+/*
+                Path 9t (N much larger than M, JOBZ='A')
+                N right singular vectors to be computed in VT and
+                M left singular vectors to be computed in U
+*/
 
-/*     End of DGELSD */
+		ivt = 1;
 
-} /* dgelsd_ */
+/*              WORK(IVT) is M by M */
 
-/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublereal *tau, doublereal *work, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
+		ldwkvt = *m;
+		itau = ivt + ldwkvt * *m;
+		nwork = itau + *m;
 
-    /* Local variables */
-    static integer i__, k;
-    static doublereal aii;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *), dlarfg_(integer *, doublereal *,
-	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+/*
+                Compute A=L*Q, copying result to VT
+                (CWorkspace: need 2*M, prefer M+M*NB)
+                (RWorkspace: 0)
+*/
 
+		i__1 = *lwork - nwork + 1;
+		cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+		clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+                Generate Q in VT
+                (CWorkspace: need M+N, prefer M+N*NB)
+                (RWorkspace: 0)
+*/
 
+		i__1 = *lwork - nwork + 1;
+		cunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
+			nwork], &i__1, &ierr);
 
-    Purpose
-    =======
+/*              Produce L in A, zeroing out above it */
 
-    DGEQR2 computes a QR factorization of a real m by n matrix A:
-    A = Q * R.
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		claset_("U", &i__1, &i__2, &c_b55, &c_b55, &a[(a_dim1 << 1) +
+			1], lda);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
 
-    Arguments
-    =========
+/*
+                Bidiagonalize L in A
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+                (RWorkspace: need M)
+*/
 
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
+		i__1 = *lwork - nwork + 1;
+		cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
 
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the m by n matrix A.
-            On exit, the elements on and above the diagonal of the array
-            contain the min(m,n) by n upper trapezoidal matrix R (R is
-            upper triangular if m >= n); the elements below the diagonal,
-            with the array TAU, represent the orthogonal matrix Q as a
-            product of elementary reflectors (see Further Details).
+		iru = ie + *m;
+		irvt = iru + *m * *m;
+		nrwork = irvt + *m * *m;
+		sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of L
+                (CWorkspace: need 3*M, prefer 2*M+M*NB)
+                (RWorkspace: 0)
+*/
 
-    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors (see Further
-            Details).
+		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+                Overwrite WORK(IVT) by right singular vectors of L
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+                (RWorkspace: 0)
+*/
 
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -i, the i-th argument had an illegal value
+		clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", m, m, m, &a[a_offset], lda, &work[
+			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__1, &
+			ierr);
 
-    Further Details
-    ===============
+/*
+                Multiply right singular vectors of L in WORK(IVT) by
+                Q in VT, storing result in A
+                (CWorkspace: need M*M)
+                (RWorkspace: 0)
+*/
 
-    The matrix Q is represented as a product of elementary reflectors
+		cgemm_("N", "N", m, n, m, &c_b56, &work[ivt], &ldwkvt, &vt[
+			vt_offset], ldvt, &c_b55, &a[a_offset], lda);
 
-       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+/*              Copy right singular vectors of A from A to VT */
 
-    Each H(i) has the form
+		clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
 
-       H(i) = I - tau * v * v'
+	    }
 
-    where tau is a real scalar, and v is a real vector with
-    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
-    and tau in TAU(i).
+	} else if (*n >= mnthr2) {
 
-    =====================================================================
+/*
+             MNTHR2 <= N < MNTHR1
+
+             Path 5t (N much larger than M, but not as much as MNTHR1)
+             Reduce to bidiagonal form without QR decomposition, use
+             CUNGBR and matrix multiplication to compute singular vectors
+*/
 
 
-       Test the input arguments
+	    ie = 1;
+	    nrwork = ie + *m;
+	    itauq = 1;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+             Bidiagonalize A
+             (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+             (RWorkspace: M)
 */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
+	    i__1 = *lwork - nwork + 1;
+	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__1, &ierr);
 
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGEQR2", &i__1);
-	return 0;
-    }
+	    if (wntqn) {
 
-    k = min(*m,*n);
+/*
+                Compute singular values only
+                (Cworkspace: 0)
+                (Rworkspace: need BDSPAC)
+*/
 
-    i__1 = k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
+		sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+	    } else if (wntqo) {
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		ivt = nwork;
 
-/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
 
-	i__2 = *m - i__ + 1;
-/* Computing MIN */
-	i__3 = i__ + 1;
-	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1]
-		, &c__1, &tau[i__]);
-	if (i__ < *n) {
+		clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__1, &ierr);
 
-/*           Apply H(i) to A(i:m,i+1:n) from the left */
+/*
+                Generate P**H in A
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
 
-	    aii = a[i__ + i__ * a_dim1];
-	    a[i__ + i__ * a_dim1] = 1.;
-	    i__2 = *m - i__ + 1;
-	    i__3 = *n - i__;
-	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
-		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
-	    a[i__ + i__ * a_dim1] = aii;
-	}
-/* L10: */
-    }
-    return 0;
+		i__1 = *lwork - nwork + 1;
+		cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
+			nwork], &i__1, &ierr);
 
-/*     End of DGEQR2 */
+		ldwkvt = *m;
+		if (*lwork >= *m * *n + *m * 3) {
 
-} /* dgeqr2_ */
+/*                 WORK( IVT ) is M by N */
 
-/* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+		    nwork = ivt + ldwkvt * *n;
+		    chunk = *n;
+		} else {
 
-    /* Local variables */
-    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *), dlarfb_(char *,
-	     char *, char *, char *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer ldwork, lwkopt;
-    static logical lquery;
+/*                 WORK( IVT ) is M by CHUNK */
 
+		    chunk = (*lwork - *m * 3) / *m;
+		    nwork = ivt + ldwkvt * chunk;
+		}
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
+		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-    Purpose
-    =======
+/*
+                Multiply Q in U by real matrix RWORK(IRVT)
+                storing the result in WORK(IVT), copying to U
+                (Cworkspace: need 0)
+                (Rworkspace: need 2*M*M)
+*/
 
-    DGEQRF computes a QR factorization of a real M-by-N matrix A:
-    A = Q * R.
+		clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &work[ivt], &
+			ldwkvt, &rwork[nrwork]);
+		clacpy_("F", m, m, &work[ivt], &ldwkvt, &u[u_offset], ldu);
 
-    Arguments
-    =========
+/*
+                Multiply RWORK(IRVT) by P**H in A, storing the
+                result in WORK(IVT), copying to A
+                (CWorkspace: need M*M, prefer M*N)
+                (Rworkspace: need 2*M*M, prefer 2*M*N)
+*/
 
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
+		nrwork = iru;
+		i__1 = *n;
+		i__2 = chunk;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *n - i__ + 1;
+		    blk = min(i__3,chunk);
+		    clarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1],
+			    lda, &work[ivt], &ldwkvt, &rwork[nrwork]);
+		    clacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ *
+			    a_dim1 + 1], lda);
+/* L50: */
+		}
+	    } else if (wntqs) {
 
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the M-by-N matrix A.
-            On exit, the elements on and above the diagonal of the array
-            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
-            upper triangular if m >= n); the elements below the diagonal,
-            with the array TAU, represent the orthogonal matrix Q as a
-            product of min(m,n) elementary reflectors (see Further
-            Details).
+		clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__2, &ierr);
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
 
-    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The scalar factors of the elementary reflectors (see Further
-            Details).
+		clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cungbr_("P", m, n, m, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__2, &ierr);
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,N).
-            For optimum performance LWORK >= N*NB, where NB is
-            the optimal blocksize.
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+/*
+                Multiply Q in U by real matrix RWORK(IRU), storing the
+                result in A, copying to U
+                (CWorkspace: need 0)
+                (Rworkspace: need 3*M*M)
+*/
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
+		clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset],
+			 lda, &rwork[nrwork]);
+		clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
 
-    Further Details
-    ===============
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in A, copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need M*M+2*M*N)
+*/
 
-    The matrix Q is represented as a product of elementary reflectors
+		nrwork = iru;
+		clarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[
+			a_offset], lda, &rwork[nrwork]);
+		clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+	    } else {
 
-       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
 
-    Each H(i) has the form
+		clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__2, &ierr);
 
-       H(i) = I - tau * v * v'
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
 
-    where tau is a real scalar, and v is a real vector with
-    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
-    and tau in TAU(i).
+		clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cungbr_("P", n, n, m, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__2, &ierr);
 
-    =====================================================================
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
 
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
-       Test the input arguments
+/*
+                Multiply Q in U by real matrix RWORK(IRU), storing the
+                result in A, copying to U
+                (CWorkspace: need 0)
+                (Rworkspace: need 3*M*M)
 */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --tau;
-    --work;
-
-    /* Function Body */
-    *info = 0;
-    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
-	    1);
-    lwkopt = *n * nb;
-    work[1] = (doublereal) lwkopt;
-    lquery = *lwork == -1;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    } else if (*lwork < max(1,*n) && ! lquery) {
-	*info = -7;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGEQRF", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
+		clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset],
+			 lda, &rwork[nrwork]);
+		clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
 
-/*     Quick return if possible */
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in A, copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need M*M+2*M*N)
+*/
 
-    k = min(*m,*n);
-    if (k == 0) {
-	work[1] = 1.;
-	return 0;
-    }
+		clarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[
+			a_offset], lda, &rwork[nrwork]);
+		clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+	    }
 
-    nbmin = 2;
-    nx = 0;
-    iws = *n;
-    if (nb > 1 && nb < k) {
+	} else {
 
 /*
-          Determine when to cross over from blocked to unblocked code.
+             N .LT. MNTHR2
 
-   Computing MAX
+             Path 6t (N greater than M, but not much larger)
+             Reduce to bidiagonal form without LQ decomposition
+             Use CUNMBR to compute singular vectors
 */
-	i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, (
-		ftnlen)6, (ftnlen)1);
-	nx = max(i__1,i__2);
-	if (nx < k) {
 
-/*           Determine if workspace is large enough for blocked code. */
+	    ie = 1;
+	    nrwork = ie + *m;
+	    itauq = 1;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
 
-	    ldwork = *n;
-	    iws = ldwork * nb;
-	    if (*lwork < iws) {
+/*
+             Bidiagonalize A
+             (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+             (RWorkspace: M)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
 
 /*
-                Not enough workspace to use optimal NB:  reduce NB and
-                determine the minimum value of NB.
+                Compute singular values only
+                (Cworkspace: 0)
+                (Rworkspace: need BDSPAC)
 */
 
-		nb = *lwork / ldwork;
-/* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, &
-			c_n1, (ftnlen)6, (ftnlen)1);
-		nbmin = max(i__1,i__2);
-	    }
-	}
-    }
+		sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+	    } else if (wntqo) {
+		ldwkvt = *m;
+		ivt = nwork;
+		if (*lwork >= *m * *n + *m * 3) {
 
-    if (nb >= nbmin && nb < k && nx < k) {
+/*                 WORK( IVT ) is M by N */
 
-/*        Use blocked code initially */
+		    claset_("F", m, n, &c_b55, &c_b55, &work[ivt], &ldwkvt);
+		    nwork = ivt + ldwkvt * *n;
+		} else {
 
-	i__1 = k - nx;
-	i__2 = nb;
-	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-/* Computing MIN */
-	    i__3 = k - i__ + 1;
-	    ib = min(i__3,nb);
+/*                 WORK( IVT ) is M by CHUNK */
+
+		    chunk = (*lwork - *m * 3) / *m;
+		    nwork = ivt + ldwkvt * chunk;
+		}
 
 /*
-             Compute the QR factorization of the current block
-             A(i:m,i:i+ib-1)
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
 */
 
-	    i__3 = *m - i__ + 1;
-	    dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
-		    1], &iinfo);
-	    if (i__ + ib <= *n) {
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
 
 /*
-                Form the triangular factor of the block reflector
-                H = H(i) H(i+1) . . . H(i+ib-1)
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: need 0)
 */
 
-		i__3 = *m - i__ + 1;
-		dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ *
-			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
 
-/*              Apply H' to A(i:m,i+ib:n) from the left */
+		if (*lwork >= *m * *n + *m * 3) {
 
-		i__3 = *m - i__ + 1;
-		i__4 = *n - i__ - ib + 1;
-		dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
-			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
-			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib
-			+ 1], &ldwork);
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+                Overwrite WORK(IVT) by right singular vectors of A,
+                copying to A
+                (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB)
+                (Rworkspace: need 0)
+*/
+
+		    clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt);
+		    i__2 = *lwork - nwork + 1;
+		    cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[
+			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2,
+			    &ierr);
+		    clacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate P**H in A
+                   (Cworkspace: need 2*M, prefer M+M*NB)
+                   (Rworkspace: need 0)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
+			    work[nwork], &i__2, &ierr);
+
+/*
+                   Multiply Q in A by real matrix RWORK(IRU), storing the
+                   result in WORK(IU), copying to A
+                   (CWorkspace: need M*M, prefer M*N)
+                   (Rworkspace: need 3*M*M, prefer M*M+2*M*N)
+*/
+
+		    nrwork = iru;
+		    i__2 = *n;
+		    i__1 = chunk;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *n - i__ + 1;
+			blk = min(i__3,chunk);
+			clarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1]
+				, lda, &work[ivt], &ldwkvt, &rwork[nrwork]);
+			clacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ *
+				a_dim1 + 1], lda);
+/* L60: */
+		    }
+		}
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (CWorkspace: need 3*M, prefer 2*M+M*NB)
+                (RWorkspace: M*M)
+*/
+
+		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (CWorkspace: need 3*M, prefer 2*M+M*NB)
+                (RWorkspace: M*M)
+*/
+
+		claset_("F", m, n, &c_b55, &c_b55, &vt[vt_offset], ldvt);
+		clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+
+		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (CWorkspace: need 3*M, prefer 2*M+M*NB)
+                (RWorkspace: M*M)
+*/
+
+		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+
+/*              Set the right corner of VT to identity matrix */
+
+		i__1 = *n - *m;
+		i__2 = *n - *m;
+		claset_("F", &i__1, &i__2, &c_b55, &c_b56, &vt[*m + 1 + (*m +
+			1) * vt_dim1], ldvt);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+                (RWorkspace: M*M)
+*/
+
+		claset_("F", n, n, &c_b55, &c_b55, &vt[vt_offset], ldvt);
+		clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
 	    }
-/* L10: */
+
 	}
-    } else {
-	i__ = 1;
+
     }
 
-/*     Use unblocked code to factor the last or only block. */
+/*     Undo scaling if necessary */
 
-    if (i__ <= k) {
-	i__2 = *m - i__ + 1;
-	i__1 = *n - i__ + 1;
-	dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
-		, &iinfo);
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (anrm < smlnum) {
+	    slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
     }
 
-    work[1] = (doublereal) iws;
+/*     Return optimal workspace in WORK(1) */
+
+    work[1].r = (real) maxwrk, work[1].i = 0.f;
+
     return 0;
 
-/*     End of DGEQRF */
+/*     End of CGESDD */
 
-} /* dgeqrf_ */
+} /* cgesdd_ */
 
-/* Subroutine */ int dgesdd_(char *jobz, integer *m, integer *n, doublereal *
-	a, integer *lda, doublereal *s, doublereal *u, integer *ldu,
-	doublereal *vt, integer *ldvt, doublereal *work, integer *lwork,
-	integer *iwork, integer *info)
+/* Subroutine */ int cgesv_(integer *n, integer *nrhs, complex *a, integer *
+	lda, integer *ipiv, complex *b, integer *ldb, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
-	    i__2, i__3;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
 
     /* Local variables */
-    static integer i__, ie, il, ir, iu, blk;
-    static doublereal dum[1], eps;
-    static integer ivt, iscl;
-    static doublereal anrm;
-    static integer idum[1], ierr, itau;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    extern logical lsame_(char *, char *);
-    static integer chunk, minmn, wrkbl, itaup, itauq, mnthr;
-    static logical wntqa;
-    static integer nwork;
-    static logical wntqn, wntqo, wntqs;
-    extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal
-	    *, doublereal *, doublereal *, integer *, doublereal *, integer *,
-	     doublereal *, integer *, doublereal *, integer *, integer *), dgebrd_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *, integer *);
-    extern doublereal dlamch_(char *), dlange_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *);
-    static integer bdspac;
-    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *, integer *),
-	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *,
-	    integer *, integer *, doublereal *, integer *, integer *),
-	     dgeqrf_(integer *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *, integer *), dlacpy_(char *,
-	     integer *, integer *, doublereal *, integer *, doublereal *,
-	    integer *), dlaset_(char *, integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, integer *),
-	    xerbla_(char *, integer *), dorgbr_(char *, integer *,
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static doublereal bignum;
-    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *,
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, integer *), dorglq_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    integer *), dorgqr_(integer *, integer *, integer *, doublereal *,
-	     integer *, doublereal *, doublereal *, integer *, integer *);
-    static integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt;
-    static doublereal smlnum;
-    static logical wntqas, lquery;
+    extern /* Subroutine */ int cgetrf_(integer *, integer *, complex *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *), cgetrs_(char *, integer *, integer *, complex *, integer
+	    *, integer *, complex *, integer *, integer *);
 
 
 /*
     -- LAPACK driver routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+       March 31, 1993
 
 
     Purpose
     =======
 
-    DGESDD computes the singular value decomposition (SVD) of a real
-    M-by-N matrix A, optionally computing the left and right singular
-    vectors.  If singular vectors are desired, it uses a
-    divide-and-conquer algorithm.
+    CGESV computes the solution to a complex system of linear equations
+       A * X = B,
+    where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
 
-    The SVD is written
+    The LU decomposition with partial pivoting and row interchanges is
+    used to factor A as
+       A = P * L * U,
+    where P is a permutation matrix, L is unit lower triangular, and U is
+    upper triangular.  The factored form of A is then used to solve the
+    system of equations A * X = B.
 
-         A = U * SIGMA * transpose(V)
+    Arguments
+    =========
 
-    where SIGMA is an M-by-N matrix which is zero except for its
-    min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
-    V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
-    are the singular values of A; they are real and non-negative, and
-    are returned in descending order.  The first min(m,n) columns of
-    U and V are the left and right singular vectors of A.
+    N       (input) INTEGER
+            The number of linear equations, i.e., the order of the
+            matrix A.  N >= 0.
 
-    Note that the routine returns VT = V**T, not V.
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
 
-    The divide and conquer algorithm makes very mild assumptions about
-    floating point arithmetic. It will work on machines with a guard
-    digit in add/subtract, or on those binary machines without guard
-    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
-    Cray-2. It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the N-by-N coefficient matrix A.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
 
-    Arguments
-    =========
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-    JOBZ    (input) CHARACTER*1
-            Specifies options for computing all or part of the matrix U:
-            = 'A':  all M columns of U and all N rows of V**T are
-                    returned in the arrays U and VT;
-            = 'S':  the first min(M,N) columns of U and the first
-                    min(M,N) rows of V**T are returned in the arrays U
-                    and VT;
-            = 'O':  If M >= N, the first N columns of U are overwritten
-                    on the array A and all rows of V**T are returned in
-                    the array VT;
-                    otherwise, all columns of U are returned in the
-                    array U and the first M rows of V**T are overwritten
-                    in the array VT;
-            = 'N':  no columns of U or rows of V**T are computed.
+    IPIV    (output) INTEGER array, dimension (N)
+            The pivot indices that define the permutation matrix P;
+            row i of the matrix was interchanged with row IPIV(i).
 
-    M       (input) INTEGER
-            The number of rows of the input matrix A.  M >= 0.
+    B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+            On entry, the N-by-NRHS matrix of right hand side matrix B.
+            On exit, if INFO = 0, the N-by-NRHS solution matrix X.
 
-    N       (input) INTEGER
-            The number of columns of the input matrix A.  N >= 0.
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the M-by-N matrix A.
-            On exit,
-            if JOBZ = 'O',  A is overwritten with the first N columns
-                            of U (the left singular vectors, stored
-                            columnwise) if M >= N;
-                            A is overwritten with the first M rows
-                            of V**T (the right singular vectors, stored
-                            rowwise) otherwise.
-            if JOBZ .ne. 'O', the contents of A are destroyed.
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+                  has been completed, but the factor U is exactly
+                  singular, so the solution could not be computed.
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
+    =====================================================================
 
-    S       (output) DOUBLE PRECISION array, dimension (min(M,N))
-            The singular values of A, sorted so that S(i) >= S(i+1).
 
-    U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
-            UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
-            UCOL = min(M,N) if JOBZ = 'S'.
-            If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
-            orthogonal matrix U;
-            if JOBZ = 'S', U contains the first min(M,N) columns of U
-            (the left singular vectors, stored columnwise);
-            if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
+       Test the input parameters.
+*/
 
-    LDU     (input) INTEGER
-            The leading dimension of the array U.  LDU >= 1; if
-            JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
 
-    VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
-            If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
-            N-by-N orthogonal matrix V**T;
-            if JOBZ = 'S', VT contains the first min(M,N) rows of
-            V**T (the right singular vectors, stored rowwise);
-            if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*nrhs < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGESV ", &i__1);
+	return 0;
+    }
 
-    LDVT    (input) INTEGER
-            The leading dimension of the array VT.  LDVT >= 1; if
-            JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
-            if JOBZ = 'S', LDVT >= min(M,N).
+/*     Compute the LU factorization of A. */
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
+    cgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
+    if (*info == 0) {
 
-    LWORK   (input) INTEGER
-            The dimension of the array WORK. LWORK >= 1.
-            If JOBZ = 'N',
-              LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)).
-            If JOBZ = 'O',
-              LWORK >= 3*min(M,N)*min(M,N) +
-                       max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)).
-            If JOBZ = 'S' or 'A'
-              LWORK >= 3*min(M,N)*min(M,N) +
-                       max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)).
-            For good performance, LWORK should generally be larger.
-            If LWORK < 0 but other input arguments are legal, WORK(1)
-            returns the optimal LWORK.
+/*        Solve the system A*X = B, overwriting B with X. */
 
-    IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
+	cgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
+		b_offset], ldb, info);
+    }
+    return 0;
 
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  DBDSDC did not converge, updating process failed.
+/*     End of CGESV */
 
-    Further Details
-    ===============
+} /* cgesv_ */
 
-    Based on contributions by
-       Ming Gu and Huan Ren, Computer Science Division, University of
-       California at Berkeley, USA
+/* Subroutine */ int cgetf2_(integer *m, integer *n, complex *a, integer *lda,
+	 integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    complex q__1;
+
+    /* Builtin functions */
+    void c_div(complex *, complex *, complex *);
+
+    /* Local variables */
+    static integer j, jp;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *), cgeru_(integer *, integer *, complex *, complex *,
+	    integer *, complex *, integer *, complex *, integer *), cswap_(
+	    integer *, complex *, integer *, complex *, integer *);
+    extern integer icamax_(integer *, complex *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CGETF2 computes an LU factorization of a general m-by-n matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the m by n matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, U(k,k) is exactly zero. The factorization
+                 has been completed, but the factor U is exactly
+                 singular, and division by zero will occur if it is used
+                 to solve a system of equations.
 
     =====================================================================
 
 
-       Test the input arguments
+       Test the input parameters.
 */
 
     /* Parameter adjustments */
     a_dim1 = *lda;
     a_offset = 1 + a_dim1;
     a -= a_offset;
-    --s;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    vt_dim1 = *ldvt;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    --work;
-    --iwork;
+    --ipiv;
 
     /* Function Body */
     *info = 0;
-    minmn = min(*m,*n);
-    mnthr = (integer) (minmn * 11. / 6.);
-    wntqa = lsame_(jobz, "A");
-    wntqs = lsame_(jobz, "S");
-    wntqas = wntqa || wntqs;
-    wntqo = lsame_(jobz, "O");
-    wntqn = lsame_(jobz, "N");
-    minwrk = 1;
-    maxwrk = 1;
-    lquery = *lwork == -1;
-
-    if (! (wntqa || wntqs || wntqo || wntqn)) {
+    if (*m < 0) {
 	*info = -1;
-    } else if (*m < 0) {
-	*info = -2;
     } else if (*n < 0) {
-	*info = -3;
+	*info = -2;
     } else if (*lda < max(1,*m)) {
-	*info = -5;
-    } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < *
-	    m) {
-	*info = -8;
-    } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn ||
-	    wntqo && *m >= *n && *ldvt < *n) {
-	*info = -10;
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGETF2", &i__1);
+	return 0;
     }
 
-/*
-       Compute workspace
-        (Note: Comments in the code beginning "Workspace:" describe the
-         minimal amount of workspace needed at that point in the code,
-         as well as the preferred amount for good performance.
-         NB refers to the optimal block size for the immediately
-         following subroutine, as returned by ILAENV.)
-*/
+/*     Quick return if possible */
 
-    if (*info == 0 && *m > 0 && *n > 0) {
-	if (*m >= *n) {
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
 
-/*           Compute space needed for DBDSDC */
+    i__1 = min(*m,*n);
+    for (j = 1; j <= i__1; ++j) {
 
-	    if (wntqn) {
-		bdspac = *n * 7;
-	    } else {
-		bdspac = *n * 3 * *n + (*n << 2);
-	    }
-	    if (*m >= mnthr) {
-		if (wntqn) {
+/*        Find pivot and test for singularity. */
 
-/*                 Path 1 (M much larger than N, JOBZ='N') */
+	i__2 = *m - j + 1;
+	jp = j - 1 + icamax_(&i__2, &a[j + j * a_dim1], &c__1);
+	ipiv[j] = jp;
+	i__2 = jp + j * a_dim1;
+	if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
 
-		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
-			    "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
-			    ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = bdspac + *n;
-		} else if (wntqo) {
+/*           Apply the interchange to columns 1:N. */
 
-/*                 Path 2 (M much larger than N, JOBZ='O') */
+	    if (jp != j) {
+		cswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
+	    }
 
-		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR",
-			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
-			    "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
-			    ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + (*n << 1) * *n;
-		    minwrk = bdspac + (*n << 1) * *n + *n * 3;
-		} else if (wntqs) {
+/*           Compute elements J+1:M of J-th column. */
 
-/*                 Path 3 (M much larger than N, JOBZ='S') */
+	    if (j < *m) {
+		i__2 = *m - j;
+		c_div(&q__1, &c_b56, &a[j + j * a_dim1]);
+		cscal_(&i__2, &q__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
 
-		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR",
-			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
-			    "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
-			    ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + *n * *n;
-		    minwrk = bdspac + *n * *n + *n * 3;
-		} else if (wntqa) {
+	} else if (*info == 0) {
 
-/*                 Path 4 (M much larger than N, JOBZ='A') */
+	    *info = j;
+	}
 
-		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "DORGQR",
-			    " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
-			    "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
-			    ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + *n * *n;
-		    minwrk = bdspac + *n * *n + *n * 3;
-		}
-	    } else {
+	if (j < min(*m,*n)) {
 
-/*              Path 5 (M at least N, but not much larger) */
+/*           Update trailing submatrix. */
 
-		wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m,
-			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-		if (wntqn) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *n * 3 + max(*m,bdspac);
-		} else if (wntqo) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + *m * *n;
-/* Computing MAX */
-		    i__1 = *m, i__2 = *n * *n + bdspac;
-		    minwrk = *n * 3 + max(i__1,i__2);
-		} else if (wntqs) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *n * 3 + max(*m,bdspac);
-		} else if (wntqa) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = maxwrk, i__2 = bdspac + *n * 3;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *n * 3 + max(*m,bdspac);
-		}
-	    }
-	} else {
+	    i__2 = *m - j;
+	    i__3 = *n - j;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgeru_(&i__2, &i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, &a[j +
+		    (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda)
+		    ;
+	}
+/* L10: */
+    }
+    return 0;
 
-/*           Compute space needed for DBDSDC */
+/*     End of CGETF2 */
 
-	    if (wntqn) {
-		bdspac = *m * 7;
-	    } else {
-		bdspac = *m * 3 * *m + (*m << 2);
-	    }
-	    if (*n >= mnthr) {
-		if (wntqn) {
+} /* cgetf2_ */
 
-/*                 Path 1t (N much larger than M, JOBZ='N') */
+/* Subroutine */ int cgetrf_(integer *m, integer *n, complex *a, integer *lda,
+	 integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1;
 
-		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
-			    "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
-			    ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = bdspac + *m;
-		} else if (wntqo) {
+    /* Local variables */
+    static integer i__, j, jb, nb;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *);
+    static integer iinfo;
+    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *), cgetf2_(integer *,
+	    integer *, complex *, integer *, integer *, integer *), xerbla_(
+	    char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int claswp_(integer *, complex *, integer *,
+	    integer *, integer *, integer *, integer *);
 
-/*                 Path 2t (N much larger than M, JOBZ='O') */
 
-		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ",
-			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
-			    "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
-			    ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + (*m << 1) * *m;
-		    minwrk = bdspac + (*m << 1) * *m + *m * 3;
-		} else if (wntqs) {
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-/*                 Path 3t (N much larger than M, JOBZ='S') */
 
-		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ",
-			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
-			    "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
-			    ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + *m * *m;
-		    minwrk = bdspac + *m * *m + *m * 3;
-		} else if (wntqa) {
+    Purpose
+    =======
 
-/*                 Path 4t (N much larger than M, JOBZ='A') */
+    CGETRF computes an LU factorization of a general M-by-N matrix A
+    using partial pivoting with row interchanges.
 
-		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
-			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "DORGLQ",
-			    " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
-			    "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
-			    ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + *m * *m;
-		    minwrk = bdspac + *m * *m + *m * 3;
-		}
-	    } else {
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
 
-/*              Path 5t (N greater than M, but not much larger) */
+    This is the right-looking Level 3 BLAS version of the algorithm.
 
-		wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m,
-			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-		if (wntqn) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *m * 3 + max(*n,bdspac);
-		} else if (wntqo) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl + *m * *n;
-/* Computing MAX */
-		    i__1 = *n, i__2 = *m * *m + bdspac;
-		    minwrk = *m * 3 + max(i__1,i__2);
-		} else if (wntqs) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *m * 3 + max(*n,bdspac);
-		} else if (wntqa) {
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
-			    , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		    wrkbl = max(i__1,i__2);
-/* Computing MAX */
-		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
-		    maxwrk = max(i__1,i__2);
-		    minwrk = *m * 3 + max(*n,bdspac);
-		}
-	    }
-	}
-	work[1] = (doublereal) maxwrk;
-    }
+    Arguments
+    =========
 
-    if (*lwork < minwrk && ! lquery) {
-	*info = -12;
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the M-by-N matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+                  has been completed, but the factor U is exactly
+                  singular, and division by zero will occur if it is used
+                  to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DGESDD", &i__1);
-	return 0;
-    } else if (lquery) {
+	xerbla_("CGETRF", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
     if (*m == 0 || *n == 0) {
-	if (*lwork >= 1) {
-	    work[1] = 1.;
-	}
 	return 0;
     }
 
-/*     Get machine constants */
+/*     Determine the block size for this environment. */
 
-    eps = PRECISION;
-    smlnum = sqrt(SAFEMINIMUM) / eps;
-    bignum = 1. / smlnum;
+    nb = ilaenv_(&c__1, "CGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    if (nb <= 1 || nb >= min(*m,*n)) {
 
-/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+/*        Use unblocked code. */
 
-    anrm = dlange_("M", m, n, &a[a_offset], lda, dum);
-    iscl = 0;
-    if (anrm > 0. && anrm < smlnum) {
-	iscl = 1;
-	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
-		ierr);
-    } else if (anrm > bignum) {
-	iscl = 1;
-	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
-		ierr);
-    }
+	cgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
+    } else {
 
-    if (*m >= *n) {
+/*        Use blocked code. */
+
+	i__1 = min(*m,*n);
+	i__2 = nb;
+	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+	    i__3 = min(*m,*n) - j + 1;
+	    jb = min(i__3,nb);
 
 /*
-          A has at least as many rows as columns. If A has sufficiently
-          more rows than columns, first reduce using the QR
-          decomposition (if sufficient workspace available)
+             Factor diagonal and subdiagonal blocks and test for exact
+             singularity.
 */
 
-	if (*m >= mnthr) {
+	    i__3 = *m - j + 1;
+	    cgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
 
-	    if (wntqn) {
+/*           Adjust INFO and the pivot indices. */
 
-/*
-                Path 1 (M much larger than N, JOBZ='N')
-                No singular vectors to be computed
-*/
+	    if (*info == 0 && iinfo > 0) {
+		*info = iinfo + j - 1;
+	    }
+/* Computing MIN */
+	    i__4 = *m, i__5 = j + jb - 1;
+	    i__3 = min(i__4,i__5);
+	    for (i__ = j; i__ <= i__3; ++i__) {
+		ipiv[i__] = j - 1 + ipiv[i__];
+/* L10: */
+	    }
 
-		itau = 1;
-		nwork = itau + *n;
+/*           Apply interchanges to columns 1:J-1. */
 
-/*
-                Compute A=Q*R
-                (Workspace: need 2*N, prefer N+N*NB)
-*/
+	    i__3 = j - 1;
+	    i__4 = j + jb - 1;
+	    claswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
 
-		i__1 = *lwork - nwork + 1;
-		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__1, &ierr);
+	    if (j + jb <= *n) {
 
-/*              Zero out below R */
+/*              Apply interchanges to columns J+JB:N. */
 
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &a[a_dim1 + 2],
-			lda);
-		ie = 1;
-		itauq = ie + *n;
-		itaup = itauq + *n;
-		nwork = itaup + *n;
+		i__3 = *n - j - jb + 1;
+		i__4 = j + jb - 1;
+		claswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
+			ipiv[1], &c__1);
 
-/*
-                Bidiagonalize R in A
-                (Workspace: need 4*N, prefer 3*N+2*N*NB)
-*/
+/*              Compute block row of U. */
 
-		i__1 = *lwork - nwork + 1;
-		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
-		nwork = ie + *n;
+		i__3 = *n - j - jb + 1;
+		ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
+			c_b56, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
+			a_dim1], lda);
+		if (j + jb <= *m) {
 
-/*
-                Perform bidiagonal SVD, computing singular values only
-                (Workspace: need N+BDSPAC)
-*/
+/*                 Update trailing submatrix. */
 
-		dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
-			 dum, idum, &work[nwork], &iwork[1], info);
+		    i__3 = *m - j - jb + 1;
+		    i__4 = *n - j - jb + 1;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
+			    &q__1, &a[j + jb + j * a_dim1], lda, &a[j + (j +
+			    jb) * a_dim1], lda, &c_b56, &a[j + jb + (j + jb) *
+			     a_dim1], lda);
+		}
+	    }
+/* L20: */
+	}
+    }
+    return 0;
 
-	    } else if (wntqo) {
+/*     End of CGETRF */
 
-/*
-                Path 2 (M much larger than N, JOBZ = 'O')
-                N left singular vectors to be overwritten on A and
-                N right singular vectors to be computed in VT
-*/
+} /* cgetrf_ */
 
-		ir = 1;
+/* Subroutine */ int cgetrs_(char *trans, integer *n, integer *nrhs, complex *
+	a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
 
-/*              WORK(IR) is LDWRKR by N */
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *), xerbla_(char *,
+	    integer *), claswp_(integer *, complex *, integer *,
+	    integer *, integer *, integer *, integer *);
+    static logical notran;
 
-		if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) {
-		    ldwrkr = *lda;
-		} else {
-		    ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n;
-		}
-		itau = ir + ldwrkr * *n;
-		nwork = itau + *n;
 
 /*
-                Compute A=Q*R
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-		i__1 = *lwork - nwork + 1;
-		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__1, &ierr);
 
-/*              Copy R to WORK(IR), zeroing out below it */
+    Purpose
+    =======
 
-		dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &work[ir + 1], &
-			ldwrkr);
+    CGETRS solves a system of linear equations
+       A * X = B,  A**T * X = B,  or  A**H * X = B
+    with a general N-by-N matrix A using the LU factorization computed
+    by CGETRF.
 
-/*
-                Generate Q in A
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
+    Arguments
+    =========
 
-		i__1 = *lwork - nwork + 1;
-		dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
-			 &i__1, &ierr);
-		ie = itau;
-		itauq = ie + *n;
-		itaup = itauq + *n;
-		nwork = itaup + *n;
+    TRANS   (input) CHARACTER*1
+            Specifies the form of the system of equations:
+            = 'N':  A * X = B     (No transpose)
+            = 'T':  A**T * X = B  (Transpose)
+            = 'C':  A**H * X = B  (Conjugate transpose)
 
-/*
-                Bidiagonalize R in VT, copying result to WORK(IR)
-                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
-*/
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-		i__1 = *lwork - nwork + 1;
-		dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
 
-/*              WORK(IU) is N by N */
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The factors L and U from the factorization A = P*L*U
+            as computed by CGETRF.
 
-		iu = nwork;
-		nwork = iu + *n * *n;
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in WORK(IU) and computing right
-                singular vectors of bidiagonal matrix in VT
-                (Workspace: need N+N*N+BDSPAC)
-*/
+    IPIV    (input) INTEGER array, dimension (N)
+            The pivot indices from CGETRF; for 1<=i<=N, row i of the
+            matrix was interchanged with row IPIV(i).
 
-		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
+    B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
 
-/*
-                Overwrite WORK(IU) by left singular vectors of R
-                and VT by right singular vectors of R
-                (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
-*/
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
 
-		i__1 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
-			itauq], &work[iu], n, &work[nwork], &i__1, &ierr);
-		i__1 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
 
-/*
-                Multiply Q in A by left singular vectors of R in
-                WORK(IU), storing result in WORK(IR) and copying to A
-                (Workspace: need 2*N*N, prefer N*N+M*N)
+    =====================================================================
+
+
+       Test the input parameters.
 */
 
-		i__1 = *m;
-		i__2 = ldwrkr;
-		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
-			i__2) {
-/* Computing MIN */
-		    i__3 = *m - i__ + 1;
-		    chunk = min(i__3,ldwrkr);
-		    dgemm_("N", "N", &chunk, n, n, &c_b15, &a[i__ + a_dim1],
-			    lda, &work[iu], n, &c_b29, &work[ir], &ldwrkr);
-		    dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
-			    a_dim1], lda);
-/* L10: */
-		}
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
 
-	    } else if (wntqs) {
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGETRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
+
+    if (notran) {
 
 /*
-                Path 3 (M much larger than N, JOBZ='S')
-                N left singular vectors to be computed in U and
-                N right singular vectors to be computed in VT
+          Solve A * X = B.
+
+          Apply row interchanges to the right hand sides.
 */
 
-		ir = 1;
+	claswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
 
-/*              WORK(IR) is N by N */
+/*        Solve L*X = B, overwriting B with X. */
 
-		ldwrkr = *n;
-		itau = ir + ldwrkr * *n;
-		nwork = itau + *n;
+	ctrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b56, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b56, &
+		a[a_offset], lda, &b[b_offset], ldb);
+    } else {
 
 /*
-                Compute A=Q*R
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+          Solve A**T * X = B  or A**H * X = B.
+
+          Solve U'*X = B, overwriting B with X.
 */
 
-		i__2 = *lwork - nwork + 1;
-		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__2, &ierr);
+	ctrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b56, &a[
+		a_offset], lda, &b[b_offset], ldb);
 
-/*              Copy R to WORK(IR), zeroing out below it */
+/*        Solve L'*X = B, overwriting B with X. */
 
-		dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
-		i__2 = *n - 1;
-		i__1 = *n - 1;
-		dlaset_("L", &i__2, &i__1, &c_b29, &c_b29, &work[ir + 1], &
-			ldwrkr);
+	ctrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b56, &a[a_offset],
+		lda, &b[b_offset], ldb);
 
-/*
-                Generate Q in A
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
+/*        Apply row interchanges to the solution vectors. */
 
-		i__2 = *lwork - nwork + 1;
-		dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
-			 &i__2, &ierr);
-		ie = itau;
-		itauq = ie + *n;
-		itaup = itauq + *n;
-		nwork = itaup + *n;
+	claswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
+    }
 
-/*
-                Bidiagonalize R in WORK(IR)
-                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
-*/
+    return 0;
 
-		i__2 = *lwork - nwork + 1;
-		dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+/*     End of CGETRS */
 
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagoal matrix in U and computing right singular
-                vectors of bidiagonal matrix in VT
-                (Workspace: need N+BDSPAC)
-*/
+} /* cgetrs_ */
 
-		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
+/* Subroutine */ int cheevd_(char *jobz, char *uplo, integer *n, complex *a,
+	integer *lda, real *w, complex *work, integer *lwork, real *rwork,
+	integer *lrwork, integer *iwork, integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2;
 
-/*
-                Overwrite U by left singular vectors of R and VT
-                by right singular vectors of R
-                (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
-*/
+    /* Builtin functions */
+    double sqrt(doublereal);
 
-		i__2 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+    /* Local variables */
+    static real eps;
+    static integer inde;
+    static real anrm;
+    static integer imax;
+    static real rmin, rmax;
+    static integer lopt;
+    static real sigma;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static integer lwmin, liopt;
+    static logical lower;
+    static integer llrwk, lropt;
+    static logical wantz;
+    static integer indwk2, llwrk2;
+    extern doublereal clanhe_(char *, char *, integer *, complex *, integer *,
+	     real *);
+    static integer iscale;
+    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *,
+	    integer *, complex *, integer *, real *, integer *, integer *,
+	    integer *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int chetrd_(char *, integer *, complex *, integer
+	    *, real *, real *, complex *, complex *, integer *, integer *), clacpy_(char *, integer *, integer *, complex *, integer
+	    *, complex *, integer *);
+    static real safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real bignum;
+    static integer indtau, indrwk, indwrk, liwmin;
+    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
+    static integer lrwmin;
+    extern /* Subroutine */ int cunmtr_(char *, char *, char *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *);
+    static integer llwork;
+    static real smlnum;
+    static logical lquery;
 
-		i__2 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
-			ierr);
 
 /*
-                Multiply Q in A by left singular vectors of R in
-                WORK(IR), storing result in U
-                (Workspace: need N*N)
-*/
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-		dlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
-		dgemm_("N", "N", m, n, n, &c_b15, &a[a_offset], lda, &work[ir]
-			, &ldwrkr, &c_b29, &u[u_offset], ldu);
 
-	    } else if (wntqa) {
+    Purpose
+    =======
 
-/*
-                Path 4 (M much larger than N, JOBZ='A')
-                M left singular vectors to be computed in U and
-                N right singular vectors to be computed in VT
-*/
+    CHEEVD computes all eigenvalues and, optionally, eigenvectors of a
+    complex Hermitian matrix A.  If eigenvectors are desired, it uses a
+    divide and conquer algorithm.
 
-		iu = 1;
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
 
-/*              WORK(IU) is N by N */
+    Arguments
+    =========
 
-		ldwrku = *n;
-		itau = iu + ldwrku * *n;
-		nwork = itau + *n;
+    JOBZ    (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only;
+            = 'V':  Compute eigenvalues and eigenvectors.
 
-/*
-                Compute A=Q*R, copying result to U
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
 
-		i__2 = *lwork - nwork + 1;
-		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__2, &ierr);
-		dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-/*
-                Generate Q in U
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
-		i__2 = *lwork - nwork + 1;
-		dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
-			 &i__2, &ierr);
+    A       (input/output) COMPLEX array, dimension (LDA, N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of A contains the
+            upper triangular part of the matrix A.  If UPLO = 'L',
+            the leading N-by-N lower triangular part of A contains
+            the lower triangular part of the matrix A.
+            On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+            orthonormal eigenvectors of the matrix A.
+            If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+            or the upper triangle (if UPLO='U') of A, including the
+            diagonal, is destroyed.
 
-/*              Produce R in A, zeroing out other entries */
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-		i__2 = *n - 1;
-		i__1 = *n - 1;
-		dlaset_("L", &i__2, &i__1, &c_b29, &c_b29, &a[a_dim1 + 2],
-			lda);
-		ie = itau;
-		itauq = ie + *n;
-		itaup = itauq + *n;
-		nwork = itaup + *n;
+    W       (output) REAL array, dimension (N)
+            If INFO = 0, the eigenvalues in ascending order.
 
-/*
-                Bidiagonalize R in A
-                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
-*/
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
-		i__2 = *lwork - nwork + 1;
-		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+    LWORK   (input) INTEGER
+            The length of the array WORK.
+            If N <= 1,                LWORK must be at least 1.
+            If JOBZ  = 'N' and N > 1, LWORK must be at least N + 1.
+            If JOBZ  = 'V' and N > 1, LWORK must be at least 2*N + N**2.
 
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in WORK(IU) and computing right
-                singular vectors of bidiagonal matrix in VT
-                (Workspace: need N+N*N+BDSPAC)
-*/
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
 
-		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
+    RWORK   (workspace/output) REAL array,
+                                           dimension (LRWORK)
+            On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
 
-/*
-                Overwrite WORK(IU) by left singular vectors of R and VT
-                by right singular vectors of R
-                (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
-*/
+    LRWORK  (input) INTEGER
+            The dimension of the array RWORK.
+            If N <= 1,                LRWORK must be at least 1.
+            If JOBZ  = 'N' and N > 1, LRWORK must be at least N.
+            If JOBZ  = 'V' and N > 1, LRWORK must be at least
+                           1 + 5*N + 2*N**2.
 
-		i__2 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
-			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
-			ierr);
-		i__2 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
-			ierr);
+            If LRWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the RWORK array,
+            returns this value as the first entry of the RWORK array, and
+            no error message related to LRWORK is issued by XERBLA.
 
-/*
-                Multiply Q in U by left singular vectors of R in
-                WORK(IU), storing result in A
-                (Workspace: need N*N)
-*/
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
 
-		dgemm_("N", "N", m, n, n, &c_b15, &u[u_offset], ldu, &work[iu]
-			, &ldwrku, &c_b29, &a[a_offset], lda);
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If N <= 1,                LIWORK must be at least 1.
+            If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
+            If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
 
-/*              Copy left singular vectors of A from A to U */
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
 
-		dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the algorithm failed to converge; i
+                  off-diagonal elements of an intermediate tridiagonal
+                  form did not converge to zero.
 
-	    }
+    Further Details
+    ===============
 
-	} else {
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
 
-/*
-             M .LT. MNTHR
+    =====================================================================
 
-             Path 5 (M at least N, but not much larger)
-             Reduce to bidiagonal form without QR decomposition
+
+       Test the input parameters.
 */
 
-	    ie = 1;
-	    itauq = ie + *n;
-	    itaup = itauq + *n;
-	    nwork = itaup + *n;
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --w;
+    --work;
+    --rwork;
+    --iwork;
 
-/*
-             Bidiagonalize A
-             (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
-*/
+    /* Function Body */
+    wantz = lsame_(jobz, "V");
+    lower = lsame_(uplo, "L");
+    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;
 
-	    i__2 = *lwork - nwork + 1;
-	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
-		    work[itaup], &work[nwork], &i__2, &ierr);
-	    if (wntqn) {
+    *info = 0;
+    if (*n <= 1) {
+	lwmin = 1;
+	lrwmin = 1;
+	liwmin = 1;
+	lopt = lwmin;
+	lropt = lrwmin;
+	liopt = liwmin;
+    } else {
+	if (wantz) {
+	    lwmin = (*n << 1) + *n * *n;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
+	    liwmin = *n * 5 + 3;
+	} else {
+	    lwmin = *n + 1;
+	    lrwmin = *n;
+	    liwmin = 1;
+	}
+	lopt = lwmin;
+	lropt = lrwmin;
+	liopt = liwmin;
+    }
+    if (! (wantz || lsame_(jobz, "N"))) {
+	*info = -1;
+    } else if (! (lower || lsame_(uplo, "U"))) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*lrwork < lrwmin && ! lquery) {
+	*info = -10;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -12;
+    }
 
-/*
-                Perform bidiagonal SVD, only computing singular values
-                (Workspace: need N+BDSPAC)
-*/
+    if (*info == 0) {
+	work[1].r = (real) lopt, work[1].i = 0.f;
+	rwork[1] = (real) lropt;
+	iwork[1] = liopt;
+    }
 
-		dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
-			 dum, idum, &work[nwork], &iwork[1], info);
-	    } else if (wntqo) {
-		iu = nwork;
-		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CHEEVD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
 
-/*                 WORK( IU ) is M by N */
+/*     Quick return if possible */
 
-		    ldwrku = *m;
-		    nwork = iu + ldwrku * *n;
-		    dlaset_("F", m, n, &c_b29, &c_b29, &work[iu], &ldwrku);
-		} else {
+    if (*n == 0) {
+	return 0;
+    }
 
-/*                 WORK( IU ) is N by N */
+    if (*n == 1) {
+	i__1 = a_dim1 + 1;
+	w[1] = a[i__1].r;
+	if (wantz) {
+	    i__1 = a_dim1 + 1;
+	    a[i__1].r = 1.f, a[i__1].i = 0.f;
+	}
+	return 0;
+    }
 
-		    ldwrku = *n;
-		    nwork = iu + ldwrku * *n;
+/*     Get machine constants. */
 
-/*                 WORK(IR) is LDWRKR by N */
+    safmin = slamch_("Safe minimum");
+    eps = slamch_("Precision");
+    smlnum = safmin / eps;
+    bignum = 1.f / smlnum;
+    rmin = sqrt(smlnum);
+    rmax = sqrt(bignum);
 
-		    ir = nwork;
-		    ldwrkr = (*lwork - *n * *n - *n * 3) / *n;
-		}
-		nwork = iu + ldwrku * *n;
+/*     Scale matrix to allowable range, if necessary. */
 
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in WORK(IU) and computing right
-                singular vectors of bidiagonal matrix in VT
-                (Workspace: need N+N*N+BDSPAC)
-*/
+    anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]);
+    iscale = 0;
+    if (anrm > 0.f && anrm < rmin) {
+	iscale = 1;
+	sigma = rmin / anrm;
+    } else if (anrm > rmax) {
+	iscale = 1;
+	sigma = rmax / anrm;
+    }
+    if (iscale == 1) {
+	clascl_(uplo, &c__0, &c__0, &c_b871, &sigma, n, n, &a[a_offset], lda,
+		info);
+    }
 
-		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, &
-			vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[
-			1], info);
+/*     Call CHETRD to reduce Hermitian matrix to tridiagonal form. */
+
+    inde = 1;
+    indtau = 1;
+    indwrk = indtau + *n;
+    indrwk = inde + *n;
+    indwk2 = indwrk + *n * *n;
+    llwork = *lwork - indwrk + 1;
+    llwrk2 = *lwork - indwk2 + 1;
+    llrwk = *lrwork - indrwk + 1;
+    chetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], &
+	    work[indwrk], &llwork, &iinfo);
+/* Computing MAX */
+    i__1 = indwrk;
+    r__1 = (real) lopt, r__2 = (real) (*n) + work[i__1].r;
+    lopt = dmax(r__1,r__2);
 
 /*
-                Overwrite VT by right singular vectors of A
-                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+       For eigenvalues only, call SSTERF.  For eigenvectors, first call
+       CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+       tridiagonal matrix, then call CUNMTR to multiply it to the
+       Householder transformations represented as Householder vectors in
+       A.
 */
 
-		i__2 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
-			ierr);
-
-		if (*lwork >= *m * *n + *n * 3 + bdspac) {
-
+    if (! wantz) {
+	ssterf_(n, &w[1], &rwork[inde], info);
+    } else {
+	cstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2],
+		&llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info);
+	cunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
+		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
+	clacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
 /*
-                   Overwrite WORK(IU) by left singular vectors of A
-                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+   Computing MAX
+   Computing 2nd power
 */
+	i__3 = *n;
+	i__4 = indwk2;
+	i__1 = lopt, i__2 = *n + i__3 * i__3 + (integer) work[i__4].r;
+	lopt = max(i__1,i__2);
+    }
 
-		    i__2 = *lwork - nwork + 1;
-		    dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
-			    itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
-			    ierr);
+/*     If matrix was scaled, then rescale eigenvalues appropriately. */
 
-/*                 Copy left singular vectors of A from WORK(IU) to A */
+    if (iscale == 1) {
+	if (*info == 0) {
+	    imax = *n;
+	} else {
+	    imax = *info - 1;
+	}
+	r__1 = 1.f / sigma;
+	sscal_(&imax, &r__1, &w[1], &c__1);
+    }
 
-		    dlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
-		} else {
+    work[1].r = (real) lopt, work[1].i = 0.f;
+    rwork[1] = (real) lropt;
+    iwork[1] = liopt;
 
-/*
-                   Generate Q in A
-                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
-*/
+    return 0;
 
-		    i__2 = *lwork - nwork + 1;
-		    dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
-			    work[nwork], &i__2, &ierr);
+/*     End of CHEEVD */
 
-/*
-                   Multiply Q in A by left singular vectors of
-                   bidiagonal matrix in WORK(IU), storing result in
-                   WORK(IR) and copying to A
-                   (Workspace: need 2*N*N, prefer N*N+M*N)
-*/
+} /* cheevd_ */
 
-		    i__2 = *m;
-		    i__1 = ldwrkr;
-		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
-			     i__1) {
-/* Computing MIN */
-			i__3 = *m - i__ + 1;
-			chunk = min(i__3,ldwrkr);
-			dgemm_("N", "N", &chunk, n, n, &c_b15, &a[i__ +
-				a_dim1], lda, &work[iu], &ldwrku, &c_b29, &
-				work[ir], &ldwrkr);
-			dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
-				a_dim1], lda);
-/* L20: */
-		    }
-		}
-
-	    } else if (wntqs) {
+/* Subroutine */ int chetd2_(char *uplo, integer *n, complex *a, integer *lda,
+	 real *d__, real *e, complex *tau, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1;
+    complex q__1, q__2, q__3, q__4;
 
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in VT
-                (Workspace: need N+BDSPAC)
-*/
+    /* Local variables */
+    static integer i__;
+    static complex taui;
+    extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex *
+	    , integer *, complex *, integer *, complex *, integer *);
+    static complex alpha;
+    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
+	    *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex *
+	    , integer *, complex *, integer *, complex *, complex *, integer *
+	    ), caxpy_(integer *, complex *, complex *, integer *,
+	    complex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int clarfg_(integer *, complex *, complex *,
+	    integer *, complex *), xerbla_(char *, integer *);
 
-		dlaset_("F", m, n, &c_b29, &c_b29, &u[u_offset], ldu);
-		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
 
 /*
-                Overwrite U by left singular vectors of A and VT
-                by right singular vectors of A
-                (Workspace: need 3*N, prefer 2*N+N*NB)
-*/
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
 
-		i__1 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-		i__1 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-	    } else if (wntqa) {
 
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in VT
-                (Workspace: need N+BDSPAC)
-*/
+    Purpose
+    =======
 
-		dlaset_("F", m, m, &c_b29, &c_b29, &u[u_offset], ldu);
-		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
+    CHETD2 reduces a complex Hermitian matrix A to real symmetric
+    tridiagonal form T by a unitary similarity transformation:
+    Q' * A * Q = T.
 
-/*              Set the right corner of U to identity matrix */
+    Arguments
+    =========
 
-		i__1 = *m - *n;
-		i__2 = *m - *n;
-		dlaset_("F", &i__1, &i__2, &c_b29, &c_b15, &u[*n + 1 + (*n +
-			1) * u_dim1], ldu);
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            Hermitian matrix A is stored:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
 
-/*
-                Overwrite U by left singular vectors of A and VT
-                by right singular vectors of A
-                (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB)
-*/
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-		i__1 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-		i__1 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-	    }
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            n-by-n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n-by-n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit, if UPLO = 'U', the diagonal and first superdiagonal
+            of A are overwritten by the corresponding elements of the
+            tridiagonal matrix T, and the elements above the first
+            superdiagonal, with the array TAU, represent the unitary
+            matrix Q as a product of elementary reflectors; if UPLO
+            = 'L', the diagonal and first subdiagonal of A are over-
+            written by the corresponding elements of the tridiagonal
+            matrix T, and the elements below the first subdiagonal, with
+            the array TAU, represent the unitary matrix Q as a product
+            of elementary reflectors. See Further Details.
 
-	}
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-    } else {
+    D       (output) REAL array, dimension (N)
+            The diagonal elements of the tridiagonal matrix T:
+            D(i) = A(i,i).
 
-/*
-          A has more columns than rows. If A has sufficiently more
-          columns than rows, first reduce using the LQ decomposition (if
-          sufficient workspace available)
-*/
+    E       (output) REAL array, dimension (N-1)
+            The off-diagonal elements of the tridiagonal matrix T:
+            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
 
-	if (*n >= mnthr) {
+    TAU     (output) COMPLEX array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
 
-	    if (wntqn) {
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
 
-/*
-                Path 1t (N much larger than M, JOBZ='N')
-                No singular vectors to be computed
-*/
+    Further Details
+    ===============
 
-		itau = 1;
-		nwork = itau + *m;
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
 
-/*
-                Compute A=L*Q
-                (Workspace: need 2*M, prefer M+M*NB)
-*/
+       Q = H(n-1) . . . H(2) H(1).
 
-		i__1 = *lwork - nwork + 1;
-		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__1, &ierr);
+    Each H(i) has the form
 
-/*              Zero out above L */
+       H(i) = I - tau * v * v'
 
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		dlaset_("U", &i__1, &i__2, &c_b29, &c_b29, &a[(a_dim1 << 1) +
-			1], lda);
-		ie = 1;
-		itauq = ie + *m;
-		itaup = itauq + *m;
-		nwork = itaup + *m;
+    where tau is a complex scalar, and v is a complex vector with
+    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+    A(1:i-1,i+1), and tau in TAU(i).
 
-/*
-                Bidiagonalize L in A
-                (Workspace: need 4*M, prefer 3*M+2*M*NB)
-*/
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
 
-		i__1 = *lwork - nwork + 1;
-		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
-		nwork = ie + *m;
+       Q = H(1) H(2) . . . H(n-1).
 
-/*
-                Perform bidiagonal SVD, computing singular values only
-                (Workspace: need M+BDSPAC)
-*/
+    Each H(i) has the form
 
-		dbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
-			 dum, idum, &work[nwork], &iwork[1], info);
+       H(i) = I - tau * v * v'
 
-	    } else if (wntqo) {
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+    and tau in TAU(i).
 
-/*
-                Path 2t (N much larger than M, JOBZ='O')
-                M right singular vectors to be overwritten on A and
-                M left singular vectors to be computed in U
-*/
+    The contents of A on exit are illustrated by the following examples
+    with n = 5:
 
-		ivt = 1;
+    if UPLO = 'U':                       if UPLO = 'L':
 
-/*              IVT is M by M */
+      (  d   e   v2  v3  v4 )              (  d                  )
+      (      d   e   v3  v4 )              (  e   d              )
+      (          d   e   v4 )              (  v1  e   d          )
+      (              d   e  )              (  v1  v2  e   d      )
+      (                  d  )              (  v1  v2  v3  e   d  )
 
-		il = ivt + *m * *m;
-		if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) {
+    where d and e denote diagonal and off-diagonal elements of T, and vi
+    denotes an element of the vector defining H(i).
 
-/*                 WORK(IL) is M by N */
+    =====================================================================
 
-		    ldwrkl = *m;
-		    chunk = *n;
-		} else {
-		    ldwrkl = *m;
-		    chunk = (*lwork - *m * *m) / *m;
-		}
-		itau = il + ldwrkl * *m;
-		nwork = itau + *m;
 
-/*
-                Compute A=L*Q
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+       Test the input parameters
 */
 
-		i__1 = *lwork - nwork + 1;
-		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__1, &ierr);
-
-/*              Copy L to WORK(IL), zeroing about above it */
-
-		dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		dlaset_("U", &i__1, &i__2, &c_b29, &c_b29, &work[il + ldwrkl],
-			 &ldwrkl);
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tau;
 
-/*
-                Generate Q in A
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CHETD2", &i__1);
+	return 0;
+    }
 
-		i__1 = *lwork - nwork + 1;
-		dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
-			 &i__1, &ierr);
-		ie = itau;
-		itauq = ie + *m;
-		itaup = itauq + *m;
-		nwork = itaup + *m;
+/*     Quick return if possible */
 
-/*
-                Bidiagonalize L in WORK(IL)
-                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
-*/
+    if (*n <= 0) {
+	return 0;
+    }
 
-		i__1 = *lwork - nwork + 1;
-		dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+    if (upper) {
 
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U, and computing right singular
-                vectors of bidiagonal matrix in WORK(IVT)
-                (Workspace: need M+M*M+BDSPAC)
-*/
+/*        Reduce the upper triangle of A */
 
-		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
-			work[ivt], m, dum, idum, &work[nwork], &iwork[1],
-			info);
+	i__1 = *n + *n * a_dim1;
+	i__2 = *n + *n * a_dim1;
+	r__1 = a[i__2].r;
+	a[i__1].r = r__1, a[i__1].i = 0.f;
+	for (i__ = *n - 1; i__ >= 1; --i__) {
 
 /*
-                Overwrite U by left singular vectors of L and WORK(IVT)
-                by right singular vectors of L
-                (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
+             Generate elementary reflector H(i) = I - tau * v * v'
+             to annihilate A(1:i-1,i+1)
 */
 
-		i__1 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-		i__1 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
-			itaup], &work[ivt], m, &work[nwork], &i__1, &ierr);
+	    i__1 = i__ + (i__ + 1) * a_dim1;
+	    alpha.r = a[i__1].r, alpha.i = a[i__1].i;
+	    clarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui);
+	    i__1 = i__;
+	    e[i__1] = alpha.r;
 
-/*
-                Multiply right singular vectors of L in WORK(IVT) by Q
-                in A, storing result in WORK(IL) and copying to A
-                (Workspace: need 2*M*M, prefer M*M+M*N)
-*/
+	    if (taui.r != 0.f || taui.i != 0.f) {
 
-		i__1 = *n;
-		i__2 = chunk;
-		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
-			i__2) {
-/* Computing MIN */
-		    i__3 = *n - i__ + 1;
-		    blk = min(i__3,chunk);
-		    dgemm_("N", "N", m, &blk, m, &c_b15, &work[ivt], m, &a[
-			    i__ * a_dim1 + 1], lda, &c_b29, &work[il], &
-			    ldwrkl);
-		    dlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
-			    + 1], lda);
-/* L30: */
-		}
+/*              Apply H(i) from both sides to A(1:i,1:i) */
 
-	    } else if (wntqs) {
+		i__1 = i__ + (i__ + 1) * a_dim1;
+		a[i__1].r = 1.f, a[i__1].i = 0.f;
 
-/*
-                Path 3t (N much larger than M, JOBZ='S')
-                M right singular vectors to be computed in VT and
-                M left singular vectors to be computed in U
-*/
+/*              Compute  x := tau * A * v  storing x in TAU(1:i) */
 
-		il = 1;
+		chemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
+			a_dim1 + 1], &c__1, &c_b55, &tau[1], &c__1)
+			;
 
-/*              WORK(IL) is M by M */
+/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
 
-		ldwrkl = *m;
-		itau = il + ldwrkl * *m;
-		nwork = itau + *m;
+		q__3.r = -.5f, q__3.i = -0.f;
+		q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r *
+			taui.i + q__3.i * taui.r;
+		cdotc_(&q__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1]
+			, &c__1);
+		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r *
+			q__4.i + q__2.i * q__4.r;
+		alpha.r = q__1.r, alpha.i = q__1.i;
+		caxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
+			1], &c__1);
 
 /*
-                Compute A=L*Q
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+                Apply the transformation as a rank-2 update:
+                   A := A - v * w' - w * v'
 */
 
-		i__2 = *lwork - nwork + 1;
-		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__2, &ierr);
-
-/*              Copy L to WORK(IL), zeroing out above it */
+		q__1.r = -1.f, q__1.i = -0.f;
+		cher2_(uplo, &i__, &q__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, &
+			tau[1], &c__1, &a[a_offset], lda);
 
-		dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
-		i__2 = *m - 1;
-		i__1 = *m - 1;
-		dlaset_("U", &i__2, &i__1, &c_b29, &c_b29, &work[il + ldwrkl],
-			 &ldwrkl);
+	    } else {
+		i__1 = i__ + i__ * a_dim1;
+		i__2 = i__ + i__ * a_dim1;
+		r__1 = a[i__2].r;
+		a[i__1].r = r__1, a[i__1].i = 0.f;
+	    }
+	    i__1 = i__ + (i__ + 1) * a_dim1;
+	    i__2 = i__;
+	    a[i__1].r = e[i__2], a[i__1].i = 0.f;
+	    i__1 = i__ + 1;
+	    i__2 = i__ + 1 + (i__ + 1) * a_dim1;
+	    d__[i__1] = a[i__2].r;
+	    i__1 = i__;
+	    tau[i__1].r = taui.r, tau[i__1].i = taui.i;
+/* L10: */
+	}
+	i__1 = a_dim1 + 1;
+	d__[1] = a[i__1].r;
+    } else {
 
-/*
-                Generate Q in A
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
+/*        Reduce the lower triangle of A */
 
-		i__2 = *lwork - nwork + 1;
-		dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
-			 &i__2, &ierr);
-		ie = itau;
-		itauq = ie + *m;
-		itaup = itauq + *m;
-		nwork = itaup + *m;
+	i__1 = a_dim1 + 1;
+	i__2 = a_dim1 + 1;
+	r__1 = a[i__2].r;
+	a[i__1].r = r__1, a[i__1].i = 0.f;
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
 
 /*
-                Bidiagonalize L in WORK(IU), copying result to U
-                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+             Generate elementary reflector H(i) = I - tau * v * v'
+             to annihilate A(i+2:n,i)
 */
 
-		i__2 = *lwork - nwork + 1;
-		dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+	    i__2 = i__ + 1 + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *n - i__;
+/* Computing MIN */
+	    i__3 = i__ + 2;
+	    clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &
+		    taui);
+	    i__2 = i__;
+	    e[i__2] = alpha.r;
 
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in VT
-                (Workspace: need M+BDSPAC)
-*/
+	    if (taui.r != 0.f || taui.i != 0.f) {
 
-		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
+/*              Apply H(i) from both sides to A(i+1:n,i+1:n) */
 
-/*
-                Overwrite U by left singular vectors of L and VT
-                by right singular vectors of L
-                (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
-*/
+		i__2 = i__ + 1 + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
 
-		i__2 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
-		i__2 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
-			ierr);
+/*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */
 
-/*
-                Multiply right singular vectors of L in WORK(IL) by
-                Q in A, storing result in VT
-                (Workspace: need M*M)
-*/
+		i__2 = *n - i__;
+		chemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b55, &tau[
+			i__], &c__1);
 
-		dlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
-		dgemm_("N", "N", m, n, m, &c_b15, &work[il], &ldwrkl, &a[
-			a_offset], lda, &c_b29, &vt[vt_offset], ldvt);
+/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
 
-	    } else if (wntqa) {
+		q__3.r = -.5f, q__3.i = -0.f;
+		q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r *
+			taui.i + q__3.i * taui.r;
+		i__2 = *n - i__;
+		cdotc_(&q__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ *
+			a_dim1], &c__1);
+		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r *
+			q__4.i + q__2.i * q__4.r;
+		alpha.r = q__1.r, alpha.i = q__1.i;
+		i__2 = *n - i__;
+		caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+			i__], &c__1);
 
 /*
-                Path 4t (N much larger than M, JOBZ='A')
-                N right singular vectors to be computed in VT and
-                M left singular vectors to be computed in U
+                Apply the transformation as a rank-2 update:
+                   A := A - v * w' - w * v'
 */
 
-		ivt = 1;
-
-/*              WORK(IVT) is M by M */
+		i__2 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cher2_(uplo, &i__2, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1,
+			&tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda);
 
-		ldwkvt = *m;
-		itau = ivt + ldwkvt * *m;
-		nwork = itau + *m;
+	    } else {
+		i__2 = i__ + 1 + (i__ + 1) * a_dim1;
+		i__3 = i__ + 1 + (i__ + 1) * a_dim1;
+		r__1 = a[i__3].r;
+		a[i__2].r = r__1, a[i__2].i = 0.f;
+	    }
+	    i__2 = i__ + 1 + i__ * a_dim1;
+	    i__3 = i__;
+	    a[i__2].r = e[i__3], a[i__2].i = 0.f;
+	    i__2 = i__;
+	    i__3 = i__ + i__ * a_dim1;
+	    d__[i__2] = a[i__3].r;
+	    i__2 = i__;
+	    tau[i__2].r = taui.r, tau[i__2].i = taui.i;
+/* L20: */
+	}
+	i__1 = *n;
+	i__2 = *n + *n * a_dim1;
+	d__[i__1] = a[i__2].r;
+    }
 
-/*
-                Compute A=L*Q, copying result to VT
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
+    return 0;
 
-		i__2 = *lwork - nwork + 1;
-		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
-			i__2, &ierr);
-		dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+/*     End of CHETD2 */
 
-/*
-                Generate Q in VT
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
+} /* chetd2_ */
 
-		i__2 = *lwork - nwork + 1;
-		dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
-			nwork], &i__2, &ierr);
+/* Subroutine */ int chetrd_(char *uplo, integer *n, complex *a, integer *lda,
+	 real *d__, real *e, complex *tau, complex *work, integer *lwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1;
 
-/*              Produce L in A, zeroing out other entries */
+    /* Local variables */
+    static integer i__, j, nb, kk, nx, iws;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    static logical upper;
+    extern /* Subroutine */ int chetd2_(char *, integer *, complex *, integer
+	    *, real *, real *, complex *, integer *), cher2k_(char *,
+	    char *, integer *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, real *, complex *, integer *), clatrd_(char *, integer *, integer *, complex *, integer
+	    *, real *, complex *, complex *, integer *), xerbla_(char
+	    *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
 
-		i__2 = *m - 1;
-		i__1 = *m - 1;
-		dlaset_("U", &i__2, &i__1, &c_b29, &c_b29, &a[(a_dim1 << 1) +
-			1], lda);
-		ie = itau;
-		itauq = ie + *m;
-		itaup = itauq + *m;
-		nwork = itaup + *m;
 
 /*
-                Bidiagonalize L in A
-                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
-*/
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-		i__2 = *lwork - nwork + 1;
-		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
-			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
 
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in WORK(IVT)
-                (Workspace: need M+M*M+BDSPAC)
-*/
+    Purpose
+    =======
 
-		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
-			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
-			, info);
+    CHETRD reduces a complex Hermitian matrix A to real symmetric
+    tridiagonal form T by a unitary similarity transformation:
+    Q**H * A * Q = T.
 
-/*
-                Overwrite U by left singular vectors of L and WORK(IVT)
-                by right singular vectors of L
-                (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
-*/
+    Arguments
+    =========
 
-		i__2 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
-		i__2 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[
-			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
-			ierr);
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
 
-/*
-                Multiply right singular vectors of L in WORK(IVT) by
-                Q in VT, storing result in A
-                (Workspace: need M*M)
-*/
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-		dgemm_("N", "N", m, n, m, &c_b15, &work[ivt], &ldwkvt, &vt[
-			vt_offset], ldvt, &c_b29, &a[a_offset], lda);
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit, if UPLO = 'U', the diagonal and first superdiagonal
+            of A are overwritten by the corresponding elements of the
+            tridiagonal matrix T, and the elements above the first
+            superdiagonal, with the array TAU, represent the unitary
+            matrix Q as a product of elementary reflectors; if UPLO
+            = 'L', the diagonal and first subdiagonal of A are over-
+            written by the corresponding elements of the tridiagonal
+            matrix T, and the elements below the first subdiagonal, with
+            the array TAU, represent the unitary matrix Q as a product
+            of elementary reflectors. See Further Details.
 
-/*              Copy right singular vectors of A from A to VT */
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-		dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+    D       (output) REAL array, dimension (N)
+            The diagonal elements of the tridiagonal matrix T:
+            D(i) = A(i,i).
 
-	    }
+    E       (output) REAL array, dimension (N-1)
+            The off-diagonal elements of the tridiagonal matrix T:
+            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
 
-	} else {
+    TAU     (output) COMPLEX array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
 
-/*
-             N .LT. MNTHR
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
-             Path 5t (N greater than M, but not much larger)
-             Reduce to bidiagonal form without LQ decomposition
-*/
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= 1.
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
 
-	    ie = 1;
-	    itauq = ie + *m;
-	    itaup = itauq + *m;
-	    nwork = itaup + *m;
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
 
-/*
-             Bidiagonalize A
-             (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
-*/
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
 
-	    i__2 = *lwork - nwork + 1;
-	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
-		    work[itaup], &work[nwork], &i__2, &ierr);
-	    if (wntqn) {
+    Further Details
+    ===============
 
-/*
-                Perform bidiagonal SVD, only computing singular values
-                (Workspace: need M+BDSPAC)
-*/
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
 
-		dbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
-			 dum, idum, &work[nwork], &iwork[1], info);
-	    } else if (wntqo) {
-		ldwkvt = *m;
-		ivt = nwork;
-		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+       Q = H(n-1) . . . H(2) H(1).
 
-/*                 WORK( IVT ) is M by N */
+    Each H(i) has the form
 
-		    dlaset_("F", m, n, &c_b29, &c_b29, &work[ivt], &ldwkvt);
-		    nwork = ivt + ldwkvt * *n;
-		} else {
+       H(i) = I - tau * v * v'
 
-/*                 WORK( IVT ) is M by M */
+    where tau is a complex scalar, and v is a complex vector with
+    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+    A(1:i-1,i+1), and tau in TAU(i).
 
-		    nwork = ivt + ldwkvt * *m;
-		    il = nwork;
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
 
-/*                 WORK(IL) is M by CHUNK */
+       Q = H(1) H(2) . . . H(n-1).
 
-		    chunk = (*lwork - *m * *m - *m * 3) / *m;
-		}
+    Each H(i) has the form
 
-/*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in WORK(IVT)
-                (Workspace: need M*M+BDSPAC)
-*/
+       H(i) = I - tau * v * v'
 
-		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
-			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
-			, info);
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+    and tau in TAU(i).
 
-/*
-                Overwrite U by left singular vectors of A
-                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+    The contents of A on exit are illustrated by the following examples
+    with n = 5:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  d   e   v2  v3  v4 )              (  d                  )
+      (      d   e   v3  v4 )              (  e   d              )
+      (          d   e   v4 )              (  v1  e   d          )
+      (              d   e  )              (  v1  v2  e   d      )
+      (                  d  )              (  v1  v2  v3  e   d  )
+
+    where d and e denote diagonal and off-diagonal elements of T, and vi
+    denotes an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
 */
 
-		i__2 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tau;
+    --work;
 
-		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    lquery = *lwork == -1;
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*lwork < 1 && ! lquery) {
+	*info = -9;
+    }
+
+    if (*info == 0) {
+
+/*        Determine the block size. */
+
+	nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
+		 (ftnlen)1);
+	lwkopt = *n * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CHETRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nx = *n;
+    iws = 1;
+    if (nb > 1 && nb < *n) {
 
 /*
-                   Overwrite WORK(IVT) by left singular vectors of A
-                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
-*/
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
 
-		    i__2 = *lwork - nwork + 1;
-		    dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
-			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2,
-			    &ierr);
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "CHETRD", uplo, n, &c_n1, &c_n1, &
+		c_n1, (ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *n) {
 
-/*                 Copy right singular vectors of A from WORK(IVT) to A */
+/*           Determine if workspace is large enough for blocked code. */
 
-		    dlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
-		} else {
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
 
 /*
-                   Generate P**T in A
-                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code by setting NX = N.
+
+   Computing MAX
 */
+		i__1 = *lwork / ldwork;
+		nb = max(i__1,1);
+		nbmin = ilaenv_(&c__2, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		if (nb < nbmin) {
+		    nx = *n;
+		}
+	    }
+	} else {
+	    nx = *n;
+	}
+    } else {
+	nb = 1;
+    }
 
-		    i__2 = *lwork - nwork + 1;
-		    dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
-			    work[nwork], &i__2, &ierr);
+    if (upper) {
 
 /*
-                   Multiply Q in A by right singular vectors of
-                   bidiagonal matrix in WORK(IVT), storing result in
-                   WORK(IL) and copying to A
-                   (Workspace: need 2*M*M, prefer M*M+M*N)
+          Reduce the upper triangle of A.
+          Columns 1:kk are handled by the unblocked method.
 */
 
-		    i__2 = *n;
-		    i__1 = chunk;
-		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
-			     i__1) {
-/* Computing MIN */
-			i__3 = *n - i__ + 1;
-			blk = min(i__3,chunk);
-			dgemm_("N", "N", m, &blk, m, &c_b15, &work[ivt], &
-				ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b29, &
-				work[il], m);
-			dlacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 +
-				1], lda);
-/* L40: */
-		    }
-		}
-	    } else if (wntqs) {
+	kk = *n - (*n - nx + nb - 1) / nb * nb;
+	i__1 = kk + 1;
+	i__2 = -nb;
+	for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+		i__2) {
 
 /*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in VT
-                (Workspace: need M+BDSPAC)
+             Reduce columns i:i+nb-1 to tridiagonal form and form the
+             matrix W which is needed to update the unreduced part of
+             the matrix
 */
 
-		dlaset_("F", m, n, &c_b29, &c_b29, &vt[vt_offset], ldvt);
-		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
+	    i__3 = i__ + nb - 1;
+	    clatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
+		    work[1], &ldwork);
 
 /*
-                Overwrite U by left singular vectors of A and VT
-                by right singular vectors of A
-                (Workspace: need 3*M, prefer 2*M+M*NB)
+             Update the unreduced submatrix A(1:i-1,1:i-1), using an
+             update of the form:  A := A - V*W' - W*V'
 */
 
-		i__1 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-		i__1 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-	    } else if (wntqa) {
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ * a_dim1
+		    + 1], lda, &work[1], &ldwork, &c_b871, &a[a_offset], lda);
 
 /*
-                Perform bidiagonal SVD, computing left singular vectors
-                of bidiagonal matrix in U and computing right singular
-                vectors of bidiagonal matrix in VT
-                (Workspace: need M+BDSPAC)
+             Copy superdiagonal elements back into A, and diagonal
+             elements into D
 */
 
-		dlaset_("F", n, n, &c_b29, &c_b29, &vt[vt_offset], ldvt);
-		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
-			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
-			info);
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		i__4 = j - 1 + j * a_dim1;
+		i__5 = j - 1;
+		a[i__4].r = e[i__5], a[i__4].i = 0.f;
+		i__4 = j;
+		i__5 = j + j * a_dim1;
+		d__[i__4] = a[i__5].r;
+/* L10: */
+	    }
+/* L20: */
+	}
 
-/*              Set the right corner of VT to identity matrix */
+/*        Use unblocked code to reduce the last or only block */
 
-		i__1 = *n - *m;
-		i__2 = *n - *m;
-		dlaset_("F", &i__1, &i__2, &c_b29, &c_b15, &vt[*m + 1 + (*m +
-			1) * vt_dim1], ldvt);
+	chetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
+    } else {
+
+/*        Reduce the lower triangle of A */
+
+	i__2 = *n - nx;
+	i__1 = nb;
+	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
 
 /*
-                Overwrite U by left singular vectors of A and VT
-                by right singular vectors of A
-                (Workspace: need 2*M+N, prefer 2*M+N*NB)
+             Reduce columns i:i+nb-1 to tridiagonal form and form the
+             matrix W which is needed to update the unreduced part of
+             the matrix
 */
 
-		i__1 = *lwork - nwork + 1;
-		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
-			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
-		i__1 = *lwork - nwork + 1;
-		dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
-			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
-			ierr);
-	    }
+	    i__3 = *n - i__ + 1;
+	    clatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
+		    tau[i__], &work[1], &ldwork);
 
-	}
+/*
+             Update the unreduced submatrix A(i+nb:n,i+nb:n), using
+             an update of the form:  A := A - V*W' - W*V'
+*/
 
-    }
+	    i__3 = *n - i__ - nb + 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ + nb +
+		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b871, &a[
+		    i__ + nb + (i__ + nb) * a_dim1], lda);
 
-/*     Undo scaling if necessary */
+/*
+             Copy subdiagonal elements back into A, and diagonal
+             elements into D
+*/
 
-    if (iscl == 1) {
-	if (anrm > bignum) {
-	    dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
-		    minmn, &ierr);
-	}
-	if (anrm < smlnum) {
-	    dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
-		    minmn, &ierr);
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		i__4 = j + 1 + j * a_dim1;
+		i__5 = j;
+		a[i__4].r = e[i__5], a[i__4].i = 0.f;
+		i__4 = j;
+		i__5 = j + j * a_dim1;
+		d__[i__4] = a[i__5].r;
+/* L30: */
+	    }
+/* L40: */
 	}
-    }
 
-/*     Return optimal workspace in WORK(1) */
+/*        Use unblocked code to reduce the last or only block */
 
-    work[1] = (doublereal) maxwrk;
+	i__1 = *n - i__ + 1;
+	chetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__],
+		&tau[i__], &iinfo);
+    }
 
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
     return 0;
 
-/*     End of DGESDD */
+/*     End of CHETRD */
 
-} /* dgesdd_ */
+} /* chetrd_ */
 
-/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer
-	*lda, integer *ipiv, doublereal *b, integer *ldb, integer *info)
+/* Subroutine */ int chseqr_(char *job, char *compz, integer *n, integer *ilo,
+	 integer *ihi, complex *h__, integer *ldh, complex *w, complex *z__,
+	integer *ldz, complex *work, integer *lwork, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+    address a__1[2];
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4[2],
+	    i__5, i__6;
+    real r__1, r__2, r__3, r__4;
+    complex q__1;
+    char ch__1[2];
+
+    /* Builtin functions */
+    double r_imag(complex *);
+    void r_cnjg(complex *, complex *);
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
 
     /* Local variables */
-    extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), xerbla_(char *, integer *), dgetrs_(char *, integer *, integer *, doublereal *,
-	    integer *, integer *, doublereal *, integer *, integer *);
+    static integer i__, j, k, l;
+    static complex s[225]	/* was [15][15] */, v[16];
+    static integer i1, i2, ii, nh, nr, ns, nv;
+    static complex vv[16];
+    static integer itn;
+    static complex tau;
+    static integer its;
+    static real ulp, tst1;
+    static integer maxb, ierr;
+    static real unfl;
+    static complex temp;
+    static real ovfl;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), ccopy_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static integer itemp;
+    static real rtemp;
+    static logical initz, wantt, wantz;
+    static real rwork[1];
+    extern doublereal slapy2_(real *, real *);
+    extern /* Subroutine */ int slabad_(real *, real *), clarfg_(integer *,
+	    complex *, complex *, integer *, complex *);
+    extern integer icamax_(integer *, complex *, integer *);
+    extern doublereal slamch_(char *), clanhs_(char *, integer *,
+	    complex *, integer *, real *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *), clahqr_(logical *, logical *, integer *, integer *, integer *,
+	     complex *, integer *, complex *, integer *, integer *, complex *,
+	     integer *, integer *), clacpy_(char *, integer *, integer *,
+	    complex *, integer *, complex *, integer *), claset_(char
+	    *, integer *, integer *, complex *, complex *, complex *, integer
+	    *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int clarfx_(char *, integer *, integer *, complex
+	    *, complex *, complex *, integer *, complex *);
+    static real smlnum;
+    static logical lquery;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+       June 30, 1999
 
 
     Purpose
     =======
 
-    DGESV computes the solution to a real system of linear equations
-       A * X = B,
-    where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+    CHSEQR computes the eigenvalues of a complex upper Hessenberg
+    matrix H, and, optionally, the matrices T and Z from the Schur
+    decomposition H = Z T Z**H, where T is an upper triangular matrix
+    (the Schur form), and Z is the unitary matrix of Schur vectors.
 
-    The LU decomposition with partial pivoting and row interchanges is
-    used to factor A as
-       A = P * L * U,
-    where P is a permutation matrix, L is unit lower triangular, and U is
-    upper triangular.  The factored form of A is then used to solve the
-    system of equations A * X = B.
+    Optionally Z may be postmultiplied into an input unitary matrix Q,
+    so that this routine can give the Schur factorization of a matrix A
+    which has been reduced to the Hessenberg form H by the unitary
+    matrix Q:  A = Q*H*Q**H = (QZ)*T*(QZ)**H.
 
     Arguments
     =========
 
+    JOB     (input) CHARACTER*1
+            = 'E': compute eigenvalues only;
+            = 'S': compute eigenvalues and the Schur form T.
+
+    COMPZ   (input) CHARACTER*1
+            = 'N': no Schur vectors are computed;
+            = 'I': Z is initialized to the unit matrix and the matrix Z
+                   of Schur vectors of H is returned;
+            = 'V': Z must contain an unitary matrix Q on entry, and
+                   the product Q*Z is returned.
+
     N       (input) INTEGER
-            The number of linear equations, i.e., the order of the
-            matrix A.  N >= 0.
+            The order of the matrix H.  N >= 0.
 
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrix B.  NRHS >= 0.
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to CGEBAL, and then passed to CGEHRD
+            when the matrix output by CGEBAL is reduced to Hessenberg
+            form. Otherwise ILO and IHI should be set to 1 and N
+            respectively.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the N-by-N coefficient matrix A.
-            On exit, the factors L and U from the factorization
-            A = P*L*U; the unit diagonal elements of L are not stored.
+    H       (input/output) COMPLEX array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if JOB = 'S', H contains the upper triangular matrix
+            T from the Schur decomposition (the Schur form). If
+            JOB = 'E', the contents of H are unspecified on exit.
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
 
-    IPIV    (output) INTEGER array, dimension (N)
-            The pivot indices that define the permutation matrix P;
-            row i of the matrix was interchanged with row IPIV(i).
+    W       (output) COMPLEX array, dimension (N)
+            The computed eigenvalues. If JOB = 'S', the eigenvalues are
+            stored in the same order as on the diagonal of the Schur form
+            returned in H, with W(i) = H(i,i).
 
-    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
-            On entry, the N-by-NRHS matrix of right hand side matrix B.
-            On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+    Z       (input/output) COMPLEX array, dimension (LDZ,N)
+            If COMPZ = 'N': Z is not referenced.
+            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
+            contains the unitary matrix Z of the Schur vectors of H.
+            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
+            which is assumed to be equal to the unit matrix except for
+            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
+            Normally Q is the unitary matrix generated by CUNGHR after
+            the call to CGEHRD which formed the Hessenberg matrix H.
 
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,N).
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.
+            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
 
     INFO    (output) INTEGER
             = 0:  successful exit
             < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
-                  has been completed, but the factor U is exactly
-                  singular, so the solution could not be computed.
+            > 0:  if INFO = i, CHSEQR failed to compute all the
+                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
+                  elements 1:ilo-1 and i+1:n of W contain those
+                  eigenvalues which have been successfully computed.
 
     =====================================================================
 
 
-       Test the input parameters.
+       Decode and test the input parameters
 */
 
     /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --w;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
 
     /* Function Body */
+    wantt = lsame_(job, "S");
+    initz = lsame_(compz, "I");
+    wantz = initz || lsame_(compz, "V");
+
     *info = 0;
-    if (*n < 0) {
+    i__1 = max(1,*n);
+    work[1].r = (real) i__1, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (! lsame_(job, "E") && ! wantt) {
 	*info = -1;
-    } else if (*nrhs < 0) {
+    } else if (! lsame_(compz, "N") && ! wantz) {
 	*info = -2;
-    } else if (*lda < max(1,*n)) {
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
 	*info = -4;
-    } else if (*ldb < max(1,*n)) {
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -5;
+    } else if (*ldh < max(1,*n)) {
 	*info = -7;
+    } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) {
+	*info = -10;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -12;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DGESV ", &i__1);
+	xerbla_("CHSEQR", &i__1);
+	return 0;
+    } else if (lquery) {
 	return 0;
     }
 
-/*     Compute the LU factorization of A. */
-
-    dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
-    if (*info == 0) {
-
-/*        Solve the system A*X = B, overwriting B with X. */
+/*     Initialize Z, if necessary */
 
-	dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
-		b_offset], ldb, info);
+    if (initz) {
+	claset_("Full", n, n, &c_b55, &c_b56, &z__[z_offset], ldz);
     }
-    return 0;
 
-/*     End of DGESV */
-
-} /* dgesv_ */
+/*     Store the eigenvalues isolated by CGEBAL. */
 
-/* Subroutine */ int dgetf2_(integer *m, integer *n, doublereal *a, integer *
-	lda, integer *ipiv, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
-    doublereal d__1;
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__ + i__ * h_dim1;
+	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
+/* L10: */
+    }
+    i__1 = *n;
+    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__ + i__ * h_dim1;
+	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
+/* L20: */
+    }
 
-    /* Local variables */
-    static integer j, jp;
-    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *), dscal_(integer *, doublereal *, doublereal *, integer
-	    *), dswap_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
+/*     Quick return if possible. */
 
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	i__1 = *ilo;
+	i__2 = *ilo + *ilo * h_dim1;
+	w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
+	return 0;
+    }
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1992
-
-
-    Purpose
-    =======
+       Set rows and columns ILO to IHI to zero below the first
+       subdiagonal.
+*/
 
-    DGETF2 computes an LU factorization of a general m-by-n matrix A
-    using partial pivoting with row interchanges.
+    i__1 = *ihi - 2;
+    for (j = *ilo; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = j + 2; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * h_dim1;
+	    h__[i__3].r = 0.f, h__[i__3].i = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    nh = *ihi - *ilo + 1;
 
-    The factorization has the form
-       A = P * L * U
-    where P is a permutation matrix, L is lower triangular with unit
-    diagonal elements (lower trapezoidal if m > n), and U is upper
-    triangular (upper trapezoidal if m < n).
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are re-set inside the main loop.
+*/
 
-    This is the right-looking Level 2 BLAS version of the algorithm.
+    if (wantt) {
+	i1 = 1;
+	i2 = *n;
+    } else {
+	i1 = *ilo;
+	i2 = *ihi;
+    }
+
+/*     Ensure that the subdiagonal elements are real. */
+
+    i__1 = *ihi;
+    for (i__ = *ilo + 1; i__ <= i__1; ++i__) {
+	i__2 = i__ + (i__ - 1) * h_dim1;
+	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
+	if (r_imag(&temp) != 0.f) {
+	    r__1 = temp.r;
+	    r__2 = r_imag(&temp);
+	    rtemp = slapy2_(&r__1, &r__2);
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
+	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
+	    temp.r = q__1.r, temp.i = q__1.i;
+	    if (i2 > i__) {
+		i__2 = i2 - i__;
+		r_cnjg(&q__1, &temp);
+		cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
+	    }
+	    i__2 = i__ - i1;
+	    cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
+	    if (i__ < *ihi) {
+		i__2 = i__ + 1 + i__ * h_dim1;
+		i__3 = i__ + 1 + i__ * h_dim1;
+		q__1.r = temp.r * h__[i__3].r - temp.i * h__[i__3].i, q__1.i =
+			 temp.r * h__[i__3].i + temp.i * h__[i__3].r;
+		h__[i__2].r = q__1.r, h__[i__2].i = q__1.i;
+	    }
+	    if (wantz) {
+		cscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1);
+	    }
+	}
+/* L50: */
+    }
 
-    Arguments
-    =========
+/*
+       Determine the order of the multi-shift QR algorithm to be used.
 
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
+   Writing concatenation
+*/
+    i__4[0] = 1, a__1[0] = job;
+    i__4[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
+    ns = ilaenv_(&c__4, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+/* Writing concatenation */
+    i__4[0] = 1, a__1[0] = job;
+    i__4[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
+    maxb = ilaenv_(&c__8, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (ns <= 1 || ns > nh || maxb >= nh) {
 
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
+/*        Use the standard double-shift algorithm */
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the m by n matrix to be factored.
-            On exit, the factors L and U from the factorization
-            A = P*L*U; the unit diagonal elements of L are not stored.
+	clahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo,
+		ihi, &z__[z_offset], ldz, info);
+	return 0;
+    }
+    maxb = max(2,maxb);
+/* Computing MIN */
+    i__1 = min(ns,maxb);
+    ns = min(i__1,15);
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
+/*
+       Now 1 < NS <= MAXB < NH.
 
-    IPIV    (output) INTEGER array, dimension (min(M,N))
-            The pivot indices; for 1 <= i <= min(M,N), row i of the
-            matrix was interchanged with row IPIV(i).
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
 
-    INFO    (output) INTEGER
-            = 0: successful exit
-            < 0: if INFO = -k, the k-th argument had an illegal value
-            > 0: if INFO = k, U(k,k) is exactly zero. The factorization
-                 has been completed, but the factor U is exactly
-                 singular, and division by zero will occur if it is used
-                 to solve a system of equations.
+    unfl = slamch_("Safe minimum");
+    ovfl = 1.f / unfl;
+    slabad_(&unfl, &ovfl);
+    ulp = slamch_("Precision");
+    smlnum = unfl * (nh / ulp);
 
-    =====================================================================
+/*     ITN is the total number of multiple-shift QR iterations allowed. */
 
+    itn = nh * 30;
 
-       Test the input parameters.
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of at most MAXB. Each iteration of the loop
+       works with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
+       H(L,L-1) is negligible so that the matrix splits.
 */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGETF2", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*m == 0 || *n == 0) {
-	return 0;
+    i__ = *ihi;
+L60:
+    if (i__ < *ilo) {
+	goto L180;
     }
 
-    i__1 = min(*m,*n);
-    for (j = 1; j <= i__1; ++j) {
-
-/*        Find pivot and test for singularity. */
+/*
+       Perform multiple-shift QR iterations on rows and columns ILO to I
+       until a submatrix of order at most MAXB splits off at the bottom
+       because a subdiagonal element has become negligible.
+*/
 
-	i__2 = *m - j + 1;
-	jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1);
-	ipiv[j] = jp;
-	if (a[jp + j * a_dim1] != 0.) {
+    l = *ilo;
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
 
-/*           Apply the interchange to columns 1:N. */
+/*        Look for a single small subdiagonal element. */
 
-	    if (jp != j) {
-		dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    i__3 = k - 1 + (k - 1) * h_dim1;
+	    i__5 = k + k * h_dim1;
+	    tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[k -
+		    1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__5]
+		    .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]),
+		    dabs(r__4)));
+	    if (tst1 == 0.f) {
+		i__3 = i__ - l + 1;
+		tst1 = clanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork);
 	    }
-
-/*           Compute elements J+1:M of J-th column. */
-
-	    if (j < *m) {
-		i__2 = *m - j;
-		d__1 = 1. / a[j + j * a_dim1];
-		dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+	    i__3 = k + (k - 1) * h_dim1;
+/* Computing MAX */
+	    r__2 = ulp * tst1;
+	    if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) {
+		goto L80;
 	    }
-
-	} else if (*info == 0) {
-
-	    *info = j;
+/* L70: */
 	}
+L80:
+	l = k;
+	if (l > *ilo) {
 
-	if (j < min(*m,*n)) {
-
-/*           Update trailing submatrix. */
+/*           H(L,L-1) is negligible. */
 
-	    i__2 = *m - j;
-	    i__3 = *n - j;
-	    dger_(&i__2, &i__3, &c_b151, &a[j + 1 + j * a_dim1], &c__1, &a[j
-		    + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1],
-		    lda);
+	    i__2 = l + (l - 1) * h_dim1;
+	    h__[i__2].r = 0.f, h__[i__2].i = 0.f;
 	}
-/* L10: */
-    }
-    return 0;
-
-/*     End of DGETF2 */
-
-} /* dgetf2_ */
-
-/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer *
-	lda, integer *ipiv, integer *info)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
 
-    /* Local variables */
-    static integer i__, j, jb, nb;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer iinfo;
-    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *), dgetf2_(
-	    integer *, integer *, doublereal *, integer *, integer *, integer
-	    *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *,
-	    integer *, integer *, integer *, integer *);
+/*        Exit from loop if a submatrix of order <= MAXB has split off. */
 
+	if (l >= i__ - maxb + 1) {
+	    goto L170;
+	}
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
 
+	if (! wantt) {
+	    i1 = l;
+	    i2 = i__;
+	}
 
-    Purpose
-    =======
+	if (its == 20 || its == 30) {
 
-    DGETRF computes an LU factorization of a general M-by-N matrix A
-    using partial pivoting with row interchanges.
+/*           Exceptional shifts. */
 
-    The factorization has the form
-       A = P * L * U
-    where P is a permutation matrix, L is lower triangular with unit
-    diagonal elements (lower trapezoidal if m > n), and U is upper
-    triangular (upper trapezoidal if m < n).
+	    i__2 = i__;
+	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
+		i__3 = ii;
+		i__5 = ii + (ii - 1) * h_dim1;
+		i__6 = ii + ii * h_dim1;
+		r__3 = ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = h__[i__6]
+			.r, dabs(r__2))) * 1.5f;
+		w[i__3].r = r__3, w[i__3].i = 0.f;
+/* L90: */
+	    }
+	} else {
 
-    This is the right-looking Level 3 BLAS version of the algorithm.
+/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
 
-    Arguments
-    =========
+	    clacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
+		    h_dim1], ldh, s, &c__15);
+	    clahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &w[i__ -
+		    ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr);
+	    if (ierr > 0) {
 
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
+/*
+                If CLAHQR failed to compute all NS eigenvalues, use the
+                unconverged diagonal elements as the remaining shifts.
+*/
 
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
+		i__2 = ierr;
+		for (ii = 1; ii <= i__2; ++ii) {
+		    i__3 = i__ - ns + ii;
+		    i__5 = ii + ii * 15 - 16;
+		    w[i__3].r = s[i__5].r, w[i__3].i = s[i__5].i;
+/* L100: */
+		}
+	    }
+	}
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the M-by-N matrix to be factored.
-            On exit, the factors L and U from the factorization
-            A = P*L*U; the unit diagonal elements of L are not stored.
+/*
+          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
+          where G is the Hessenberg submatrix H(L:I,L:I) and w is
+          the vector of shifts (stored in W). The result is
+          stored in the local array V.
+*/
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
+	v[0].r = 1.f, v[0].i = 0.f;
+	i__2 = ns + 1;
+	for (ii = 2; ii <= i__2; ++ii) {
+	    i__3 = ii - 1;
+	    v[i__3].r = 0.f, v[i__3].i = 0.f;
+/* L110: */
+	}
+	nv = 1;
+	i__2 = i__;
+	for (j = i__ - ns + 1; j <= i__2; ++j) {
+	    i__3 = nv + 1;
+	    ccopy_(&i__3, v, &c__1, vv, &c__1);
+	    i__3 = nv + 1;
+	    i__5 = j;
+	    q__1.r = -w[i__5].r, q__1.i = -w[i__5].i;
+	    cgemv_("No transpose", &i__3, &nv, &c_b56, &h__[l + l * h_dim1],
+		    ldh, vv, &c__1, &q__1, v, &c__1);
+	    ++nv;
+
+/*
+             Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
+             reset it to the unit vector.
+*/
+
+	    itemp = icamax_(&nv, v, &c__1);
+	    i__3 = itemp - 1;
+	    rtemp = (r__1 = v[i__3].r, dabs(r__1)) + (r__2 = r_imag(&v[itemp
+		    - 1]), dabs(r__2));
+	    if (rtemp == 0.f) {
+		v[0].r = 1.f, v[0].i = 0.f;
+		i__3 = nv;
+		for (ii = 2; ii <= i__3; ++ii) {
+		    i__5 = ii - 1;
+		    v[i__5].r = 0.f, v[i__5].i = 0.f;
+/* L120: */
+		}
+	    } else {
+		rtemp = dmax(rtemp,smlnum);
+		r__1 = 1.f / rtemp;
+		csscal_(&nv, &r__1, v, &c__1);
+	    }
+/* L130: */
+	}
 
-    IPIV    (output) INTEGER array, dimension (min(M,N))
-            The pivot indices; for 1 <= i <= min(M,N), row i of the
-            matrix was interchanged with row IPIV(i).
+/*        Multiple-shift QR step */
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
-                  has been completed, but the factor U is exactly
-                  singular, and division by zero will occur if it is used
-                  to solve a system of equations.
+	i__2 = i__ - 1;
+	for (k = l; k <= i__2; ++k) {
 
-    =====================================================================
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
 
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix. NR is the order of G.
 
-       Test the input parameters.
+   Computing MIN
 */
+	    i__3 = ns + 1, i__5 = i__ - k + 1;
+	    nr = min(i__3,i__5);
+	    if (k > l) {
+		ccopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    clarfg_(&nr, v, &v[1], &c__1, &tau);
+	    if (k > l) {
+		i__3 = k + (k - 1) * h_dim1;
+		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
+		i__3 = i__;
+		for (ii = k + 1; ii <= i__3; ++ii) {
+		    i__5 = ii + (k - 1) * h_dim1;
+		    h__[i__5].r = 0.f, h__[i__5].i = 0.f;
+/* L140: */
+		}
+	    }
+	    v[0].r = 1.f, v[0].i = 0.f;
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-
-    /* Function Body */
-    *info = 0;
-    if (*m < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*lda < max(1,*m)) {
-	*info = -4;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGETRF", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*m == 0 || *n == 0) {
-	return 0;
-    }
-
-/*     Determine the block size for this environment. */
-
-    nb = ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
-	    1);
-    if (nb <= 1 || nb >= min(*m,*n)) {
+/*
+             Apply G' from the left to transform the rows of the matrix
+             in columns K to I2.
+*/
 
-/*        Use unblocked code. */
+	    i__3 = i2 - k + 1;
+	    r_cnjg(&q__1, &tau);
+	    clarfx_("Left", &nr, &i__3, v, &q__1, &h__[k + k * h_dim1], ldh, &
+		    work[1]);
 
-	dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
-    } else {
+/*
+             Apply G from the right to transform the columns of the
+             matrix in rows I1 to min(K+NR,I).
 
-/*        Use blocked code. */
+   Computing MIN
+*/
+	    i__5 = k + nr;
+	    i__3 = min(i__5,i__) - i1 + 1;
+	    clarfx_("Right", &i__3, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
+		    &work[1]);
 
-	i__1 = min(*m,*n);
-	i__2 = nb;
-	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
-/* Computing MIN */
-	    i__3 = min(*m,*n) - j + 1;
-	    jb = min(i__3,nb);
+	    if (wantz) {
 
-/*
-             Factor diagonal and subdiagonal blocks and test for exact
-             singularity.
-*/
+/*              Accumulate transformations in the matrix Z */
 
-	    i__3 = *m - j + 1;
-	    dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
+		clarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
+			ldz, &work[1]);
+	    }
+/* L150: */
+	}
 
-/*           Adjust INFO and the pivot indices. */
+/*        Ensure that H(I,I-1) is real. */
 
-	    if (*info == 0 && iinfo > 0) {
-		*info = iinfo + j - 1;
+	i__2 = i__ + (i__ - 1) * h_dim1;
+	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
+	if (r_imag(&temp) != 0.f) {
+	    r__1 = temp.r;
+	    r__2 = r_imag(&temp);
+	    rtemp = slapy2_(&r__1, &r__2);
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
+	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
+	    temp.r = q__1.r, temp.i = q__1.i;
+	    if (i2 > i__) {
+		i__2 = i2 - i__;
+		r_cnjg(&q__1, &temp);
+		cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
 	    }
-/* Computing MIN */
-	    i__4 = *m, i__5 = j + jb - 1;
-	    i__3 = min(i__4,i__5);
-	    for (i__ = j; i__ <= i__3; ++i__) {
-		ipiv[i__] = j - 1 + ipiv[i__];
-/* L10: */
+	    i__2 = i__ - i1;
+	    cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
+	    if (wantz) {
+		cscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1);
 	    }
+	}
 
-/*           Apply interchanges to columns 1:J-1. */
+/* L160: */
+    }
 
-	    i__3 = j - 1;
-	    i__4 = j + jb - 1;
-	    dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
+/*     Failure to converge in remaining number of iterations */
 
-	    if (j + jb <= *n) {
+    *info = i__;
+    return 0;
 
-/*              Apply interchanges to columns J+JB:N. */
+L170:
 
-		i__3 = *n - j - jb + 1;
-		i__4 = j + jb - 1;
-		dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
-			ipiv[1], &c__1);
+/*
+       A submatrix of order <= MAXB in rows and columns L to I has split
+       off. Use the double-shift QR algorithm to handle it.
+*/
 
-/*              Compute block row of U. */
+    clahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &w[1], ilo, ihi,
+	     &z__[z_offset], ldz, info);
+    if (*info > 0) {
+	return 0;
+    }
 
-		i__3 = *n - j - jb + 1;
-		dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
-			c_b15, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
-			a_dim1], lda);
-		if (j + jb <= *m) {
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with a new value of I.
+*/
 
-/*                 Update trailing submatrix. */
+    itn -= its;
+    i__ = l - 1;
+    goto L60;
 
-		    i__3 = *m - j - jb + 1;
-		    i__4 = *n - j - jb + 1;
-		    dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
-			    &c_b151, &a[j + jb + j * a_dim1], lda, &a[j + (j
-			    + jb) * a_dim1], lda, &c_b15, &a[j + jb + (j + jb)
-			     * a_dim1], lda);
-		}
-	    }
-/* L20: */
-	}
-    }
+L180:
+    i__1 = max(1,*n);
+    work[1].r = (real) i__1, work[1].i = 0.f;
     return 0;
 
-/*     End of DGETRF */
+/*     End of CHSEQR */
 
-} /* dgetrf_ */
+} /* chseqr_ */
 
-/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs,
-	doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *
-	ldb, integer *info)
+/* Subroutine */ int clabrd_(integer *m, integer *n, integer *nb, complex *a,
+	integer *lda, real *d__, real *e, complex *tauq, complex *taup,
+	complex *x, integer *ldx, complex *y, integer *ldy)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+    complex q__1;
 
     /* Local variables */
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *), xerbla_(
-	    char *, integer *), dlaswp_(integer *, doublereal *,
-	    integer *, integer *, integer *, integer *, integer *);
-    static logical notran;
+    static integer i__;
+    static complex alpha;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *), cgemv_(char *, integer *, integer *, complex *,
+	    complex *, integer *, complex *, integer *, complex *, complex *,
+	    integer *), clarfg_(integer *, complex *, complex *,
+	    integer *, complex *), clacgv_(integer *, complex *, integer *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
+    -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DGETRS solves a system of linear equations
-       A * X = B  or  A' * X = B
-    with a general N-by-N matrix A using the LU factorization computed
-    by DGETRF.
+    CLABRD reduces the first NB rows and columns of a complex general
+    m by n matrix A to upper or lower real bidiagonal form by a unitary
+    transformation Q' * A * P, and returns the matrices X and Y which
+    are needed to apply the transformation to the unreduced part of A.
+
+    If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
+    bidiagonal form.
+
+    This is an auxiliary routine called by CGEBRD
 
     Arguments
     =========
 
-    TRANS   (input) CHARACTER*1
-            Specifies the form of the system of equations:
-            = 'N':  A * X = B  (No transpose)
-            = 'T':  A'* X = B  (Transpose)
-            = 'C':  A'* X = B  (Conjugate transpose = Transpose)
+    M       (input) INTEGER
+            The number of rows in the matrix A.
 
     N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+            The number of columns in the matrix A.
 
-    NRHS    (input) INTEGER
-            The number of right hand sides, i.e., the number of columns
-            of the matrix B.  NRHS >= 0.
+    NB      (input) INTEGER
+            The number of leading rows and columns of A to be reduced.
 
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            The factors L and U from the factorization A = P*L*U
-            as computed by DGETRF.
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit, the first NB rows and columns of the matrix are
+            overwritten; the rest of the array is unchanged.
+            If m >= n, elements on and below the diagonal in the first NB
+              columns, with the array TAUQ, represent the unitary
+              matrix Q as a product of elementary reflectors; and
+              elements above the diagonal in the first NB rows, with the
+              array TAUP, represent the unitary matrix P as a product
+              of elementary reflectors.
+            If m < n, elements below the diagonal in the first NB
+              columns, with the array TAUQ, represent the unitary
+              matrix Q as a product of elementary reflectors, and
+              elements on and above the diagonal in the first NB rows,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors.
+            See Further Details.
 
     LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+            The leading dimension of the array A.  LDA >= max(1,M).
 
-    IPIV    (input) INTEGER array, dimension (N)
-            The pivot indices from DGETRF; for 1<=i<=N, row i of the
-            matrix was interchanged with row IPIV(i).
+    D       (output) REAL array, dimension (NB)
+            The diagonal elements of the first NB rows and columns of
+            the reduced matrix.  D(i) = A(i,i).
 
-    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
-            On entry, the right hand side matrix B.
-            On exit, the solution matrix X.
+    E       (output) REAL array, dimension (NB)
+            The off-diagonal elements of the first NB rows and columns of
+            the reduced matrix.
 
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,N).
+    TAUQ    (output) COMPLEX array dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix Q. See Further Details.
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
+    TAUP    (output) COMPLEX array, dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix P. See Further Details.
 
-    =====================================================================
+    X       (output) COMPLEX array, dimension (LDX,NB)
+            The m-by-nb matrix X required to update the unreduced part
+            of A.
 
+    LDX     (input) INTEGER
+            The leading dimension of the array X. LDX >= max(1,M).
 
-       Test the input parameters.
-*/
+    Y       (output) COMPLEX array, dimension (LDY,NB)
+            The n-by-nb matrix Y required to update the unreduced part
+            of A.
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --ipiv;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
+    LDY     (output) INTEGER
+            The leading dimension of the array Y. LDY >= max(1,N).
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+       Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are complex scalars, and v and u are complex
+    vectors.
+
+    If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
+    A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
+    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The elements of the vectors v and u together form the m-by-nb matrix
+    V and the nb-by-n matrix U' which are needed, with X and Y, to apply
+    the transformation to the unreduced part of the matrix, using a block
+    update of the form:  A := A - V*Y' - X*U'.
+
+    The contents of A on exit are illustrated by the following examples
+    with nb = 2:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
+      (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
+      (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )
+
+    where a denotes an element of the original matrix which is unchanged,
+    vi denotes an element of the vector defining H(i), and ui an element
+    of the vector defining G(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
 
     /* Function Body */
-    *info = 0;
-    notran = lsame_(trans, "N");
-    if (! notran && ! lsame_(trans, "T") && ! lsame_(
-	    trans, "C")) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*nrhs < 0) {
-	*info = -3;
-    } else if (*lda < max(1,*n)) {
-	*info = -5;
-    } else if (*ldb < max(1,*n)) {
-	*info = -8;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DGETRS", &i__1);
+    if (*m <= 0 || *n <= 0) {
 	return 0;
     }
 
-/*     Quick return if possible */
+    if (*m >= *n) {
 
-    if (*n == 0 || *nrhs == 0) {
-	return 0;
-    }
+/*        Reduce to upper bidiagonal form */
 
-    if (notran) {
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
 
-/*
-          Solve A * X = B.
+/*           Update A(i:m,i) */
 
-          Apply row interchanges to the right hand sides.
-*/
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &y[i__ + y_dim1], ldy);
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda,
+		     &y[i__ + y_dim1], ldy, &c_b56, &a[i__ + i__ * a_dim1], &
+		    c__1);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &y[i__ + y_dim1], ldy);
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + x_dim1], ldx,
+		     &a[i__ * a_dim1 + 1], &c__1, &c_b56, &a[i__ + i__ *
+		    a_dim1], &c__1);
 
-	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
+/*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
 
-/*        Solve L*X = B, overwriting B with X. */
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, &
+		    tauq[i__]);
+	    i__2 = i__;
+	    d__[i__2] = alpha.r;
+	    if (i__ < *n) {
+		i__2 = i__ + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
 
-	dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b15, &a[
-		a_offset], lda, &b[b_offset], ldb);
+/*              Compute Y(i+1:n,i) */
 
-/*        Solve U*X = B, overwriting B with X. */
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + (
+			i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], &
+			c__1, &c_b55, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+			a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b55, &
+			y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b56, &y[
+			i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &x[i__ +
+			x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b55, &
+			y[i__ * y_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ +
+			1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *n - i__;
+		cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
 
-	dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b15, &
-		a[a_offset], lda, &b[b_offset], ldb);
+/*              Update A(i,i+1:n) */
+
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		clacgv_(&i__, &a[i__ + a_dim1], lda);
+		i__2 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__, &q__1, &y[i__ + 1 +
+			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b56, &a[i__ +
+			(i__ + 1) * a_dim1], lda);
+		clacgv_(&i__, &a[i__ + a_dim1], lda);
+		i__2 = i__ - 1;
+		clacgv_(&i__2, &x[i__ + x_dim1], ldx);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ +
+			1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b56,
+			&a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ - 1;
+		clacgv_(&i__2, &x[i__ + x_dim1], ldx);
+
+/*              Generate reflection P(i) to annihilate A(i,i+2:n) */
+
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
+			taup[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ + 1 + (
+			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
+			 lda, &c_b55, &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__;
+		cgemv_("Conjugate transpose", &i__2, &i__, &c_b56, &y[i__ + 1
+			+ y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b55, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__, &q__1, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b55, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    }
+/* L10: */
+	}
     } else {
 
-/*
-          Solve A' * X = B.
+/*        Reduce to lower bidiagonal form */
 
-          Solve U'*X = B, overwriting B with X.
-*/
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
 
-	dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b15, &a[
-		a_offset], lda, &b[b_offset], ldb);
+/*           Update A(i,i:n) */
 
-/*        Solve L'*X = B, overwriting B with X. */
+	    i__2 = *n - i__ + 1;
+	    clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + y_dim1], ldy,
+		     &a[i__ + a_dim1], lda, &c_b56, &a[i__ + i__ * a_dim1],
+		    lda);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &x[i__ + x_dim1], ldx);
+	    i__2 = i__ - 1;
+	    i__3 = *n - i__ + 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[i__ *
+		    a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b56, &a[i__ +
+		    i__ * a_dim1], lda);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &x[i__ + x_dim1], ldx);
 
-	dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b15, &a[
-		a_offset], lda, &b[b_offset], ldb);
+/*           Generate reflection P(i) to annihilate A(i,i+1:n) */
 
-/*        Apply row interchanges to the solution vectors. */
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
+		    taup[i__]);
+	    i__2 = i__;
+	    d__[i__2] = alpha.r;
+	    if (i__ < *m) {
+		i__2 = i__ + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
 
-	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
-    }
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ + 1 + i__
+			* a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b55, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__ + 1;
+		i__3 = i__ - 1;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &y[i__ +
+			y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b55, &x[
+			i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__ + 1;
+		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ * a_dim1
+			+ 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b55, &x[
+			i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__ + 1;
+		clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+
+/*              Update A(i+1:m,i) */
+
+		i__2 = i__ - 1;
+		clacgv_(&i__2, &y[i__ + y_dim1], ldy);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 +
+			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b56, &a[i__ +
+			1 + i__ * a_dim1], &c__1);
+		i__2 = i__ - 1;
+		clacgv_(&i__2, &y[i__ + y_dim1], ldy);
+		i__2 = *m - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__, &q__1, &x[i__ + 1 +
+			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b56, &a[
+			i__ + 1 + i__ * a_dim1], &c__1);
 
+/*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
+
+		i__2 = i__ + 1 + i__ * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1,
+			 &tauq[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + 1 + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+			1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ *
+			a_dim1], &c__1, &c_b55, &y[i__ + 1 + i__ * y_dim1], &
+			c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b55, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b56, &y[
+			i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__;
+		cgemv_("Conjugate transpose", &i__2, &i__, &c_b56, &x[i__ + 1
+			+ x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b55, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("Conjugate transpose", &i__, &i__2, &q__1, &a[(i__ + 1)
+			 * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *n - i__;
+		cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+	    } else {
+		i__2 = *n - i__ + 1;
+		clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	    }
+/* L20: */
+	}
+    }
     return 0;
 
-/*     End of DGETRS */
+/*     End of CLABRD */
 
-} /* dgetrs_ */
+} /* clabrd_ */
 
-/* Subroutine */ int dhseqr_(char *job, char *compz, integer *n, integer *ilo,
-	 integer *ihi, doublereal *h__, integer *ldh, doublereal *wr,
-	doublereal *wi, doublereal *z__, integer *ldz, doublereal *work,
-	integer *lwork, integer *info)
+/* Subroutine */ int clacgv_(integer *n, complex *x, integer *incx)
 {
     /* System generated locals */
-    address a__1[2];
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4,
-	    i__5;
-    doublereal d__1, d__2;
-    char ch__1[2];
+    integer i__1, i__2;
+    complex q__1;
 
     /* Builtin functions */
-    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
-    static integer i__, j, k, l;
-    static doublereal s[225]	/* was [15][15] */, v[16];
-    static integer i1, i2, ii, nh, nr, ns, nv;
-    static doublereal vv[16];
-    static integer itn;
-    static doublereal tau;
-    static integer its;
-    static doublereal ulp, tst1;
-    static integer maxb;
-    static doublereal absw;
-    static integer ierr;
-    static doublereal unfl, temp, ovfl;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
-    static integer itemp;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static logical initz, wantt, wantz;
-    extern doublereal dlapy2_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
-
-    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
-	     integer *, doublereal *);
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern doublereal dlanhs_(char *, integer *, doublereal *, integer *,
-	    doublereal *);
-    extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *,
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *, integer *, doublereal *, integer *,
-	    integer *), dlacpy_(char *, integer *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlaset_(char *,
-	    integer *, integer *, doublereal *, doublereal *, doublereal *,
-	    integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *), dlarfx_(
-	    char *, integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *, doublereal *);
-    static doublereal smlnum;
-    static logical lquery;
+    static integer i__, ioff;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
+    -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+       October 31, 1992
 
 
     Purpose
     =======
 
-    DHSEQR computes the eigenvalues of a real upper Hessenberg matrix H
-    and, optionally, the matrices T and Z from the Schur decomposition
-    H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur
-    form), and Z is the orthogonal matrix of Schur vectors.
-
-    Optionally Z may be postmultiplied into an input orthogonal matrix Q,
-    so that this routine can give the Schur factorization of a matrix A
-    which has been reduced to the Hessenberg form H by the orthogonal
-    matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+    CLACGV conjugates a complex vector of length N.
 
     Arguments
     =========
 
-    JOB     (input) CHARACTER*1
-            = 'E':  compute eigenvalues only;
-            = 'S':  compute eigenvalues and the Schur form T.
-
-    COMPZ   (input) CHARACTER*1
-            = 'N':  no Schur vectors are computed;
-            = 'I':  Z is initialized to the unit matrix and the matrix Z
-                    of Schur vectors of H is returned;
-            = 'V':  Z must contain an orthogonal matrix Q on entry, and
-                    the product Q*Z is returned.
-
     N       (input) INTEGER
-            The order of the matrix H.  N >= 0.
+            The length of the vector X.  N >= 0.
 
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that H is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to DGEBAL, and then passed to SGEHRD
-            when the matrix output by DGEBAL is reduced to Hessenberg
-            form. Otherwise ILO and IHI should be set to 1 and N
-            respectively.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+    X       (input/output) COMPLEX array, dimension
+                           (1+(N-1)*abs(INCX))
+            On entry, the vector of length N to be conjugated.
+            On exit, X is overwritten with conjg(X).
 
-    H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
-            On entry, the upper Hessenberg matrix H.
-            On exit, if JOB = 'S', H contains the upper quasi-triangular
-            matrix T from the Schur decomposition (the Schur form);
-            2-by-2 diagonal blocks (corresponding to complex conjugate
-            pairs of eigenvalues) are returned in standard form, with
-            H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E',
-            the contents of H are unspecified on exit.
+    INCX    (input) INTEGER
+            The spacing between successive elements of X.
 
-    LDH     (input) INTEGER
-            The leading dimension of the array H. LDH >= max(1,N).
+   =====================================================================
+*/
 
-    WR      (output) DOUBLE PRECISION array, dimension (N)
-    WI      (output) DOUBLE PRECISION array, dimension (N)
-            The real and imaginary parts, respectively, of the computed
-            eigenvalues. If two eigenvalues are computed as a complex
-            conjugate pair, they are stored in consecutive elements of
-            WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and
-            WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the
-            same order as on the diagonal of the Schur form returned in
-            H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
-            diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and
-            WI(i+1) = -WI(i).
 
-    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
-            If COMPZ = 'N': Z is not referenced.
-            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
-            contains the orthogonal matrix Z of the Schur vectors of H.
-            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
-            which is assumed to be equal to the unit matrix except for
-            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
-            Normally Q is the orthogonal matrix generated by DORGHR after
-            the call to DGEHRD which formed the Hessenberg matrix H.
+    /* Parameter adjustments */
+    --x;
 
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.
-            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
+    /* Function Body */
+    if (*incx == 1) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__;
+	    r_cnjg(&q__1, &x[i__]);
+	    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+/* L10: */
+	}
+    } else {
+	ioff = 1;
+	if (*incx < 0) {
+	    ioff = 1 - (*n - 1) * *incx;
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = ioff;
+	    r_cnjg(&q__1, &x[ioff]);
+	    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+	    ioff += *incx;
+/* L20: */
+	}
+    }
+    return 0;
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+/*     End of CLACGV */
 
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,N).
+} /* clacgv_ */
 
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+/* Subroutine */ int clacp2_(char *uplo, integer *m, integer *n, real *a,
+	integer *lda, complex *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, DHSEQR failed to compute all of the
-                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
-                  elements 1:ilo-1 and i+1:n of WR and WI contain those
-                  eigenvalues which have been successfully computed.
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
 
-    =====================================================================
 
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-       Decode and test the input parameters
-*/
 
-    /* Parameter adjustments */
-    h_dim1 = *ldh;
-    h_offset = 1 + h_dim1;
-    h__ -= h_offset;
-    --wr;
-    --wi;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --work;
+    Purpose
+    =======
 
-    /* Function Body */
-    wantt = lsame_(job, "S");
-    initz = lsame_(compz, "I");
-    wantz = initz || lsame_(compz, "V");
+    CLACP2 copies all or part of a real two-dimensional matrix A to a
+    complex matrix B.
 
-    *info = 0;
-    work[1] = (doublereal) max(1,*n);
-    lquery = *lwork == -1;
-    if (! lsame_(job, "E") && ! wantt) {
-	*info = -1;
-    } else if (! lsame_(compz, "N") && ! wantz) {
-	*info = -2;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*ilo < 1 || *ilo > max(1,*n)) {
-	*info = -4;
-    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
-	*info = -5;
-    } else if (*ldh < max(1,*n)) {
-	*info = -7;
-    } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) {
-	*info = -11;
-    } else if (*lwork < max(1,*n) && ! lquery) {
-	*info = -13;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DHSEQR", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
+    Arguments
+    =========
 
-/*     Initialize Z, if necessary */
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be copied to B.
+            = 'U':      Upper triangular part
+            = 'L':      Lower triangular part
+            Otherwise:  All of the matrix A
 
-    if (initz) {
-	dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz);
-    }
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
 
-/*     Store the eigenvalues isolated by DGEBAL. */
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
 
-    i__1 = *ilo - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.;
-/* L10: */
-    }
-    i__1 = *n;
-    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
-	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.;
-/* L20: */
-    }
+    A       (input) REAL array, dimension (LDA,N)
+            The m by n matrix A.  If UPLO = 'U', only the upper trapezium
+            is accessed; if UPLO = 'L', only the lower trapezium is
+            accessed.
 
-/*     Quick return if possible. */
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
 
-    if (*n == 0) {
-	return 0;
-    }
-    if (*ilo == *ihi) {
-	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
-	wi[*ilo] = 0.;
-	return 0;
-    }
+    B       (output) COMPLEX array, dimension (LDB,N)
+            On exit, B = A in the locations specified by UPLO.
 
-/*
-       Set rows and columns ILO to IHI to zero below the first
-       subdiagonal.
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,M).
+
+    =====================================================================
 */
 
-    i__1 = *ihi - 2;
-    for (j = *ilo; j <= i__1; ++j) {
-	i__2 = *n;
-	for (i__ = j + 2; i__ <= i__2; ++i__) {
-	    h__[i__ + j * h_dim1] = 0.;
-/* L30: */
-	}
-/* L40: */
-    }
-    nh = *ihi - *ilo + 1;
 
-/*
-       Determine the order of the multi-shift QR algorithm to be used.
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
 
-   Writing concatenation
-*/
-    i__3[0] = 1, a__1[0] = job;
-    i__3[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-    ns = ilaenv_(&c__4, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-/* Writing concatenation */
-    i__3[0] = 1, a__1[0] = job;
-    i__3[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-    maxb = ilaenv_(&c__8, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-    if (ns <= 2 || ns > nh || maxb >= nh) {
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4], b[i__3].i = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
 
-/*        Use the standard double-shift algorithm */
+    } else if (lsame_(uplo, "L")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4], b[i__3].i = 0.f;
+/* L30: */
+	    }
+/* L40: */
+	}
 
-	dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
-		1], ilo, ihi, &z__[z_offset], ldz, info);
-	return 0;
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4], b[i__3].i = 0.f;
+/* L50: */
+	    }
+/* L60: */
+	}
     }
-    maxb = max(3,maxb);
-/* Computing MIN */
-    i__1 = min(ns,maxb);
-    ns = min(i__1,15);
-
-/*
-       Now 2 < NS <= MAXB < NH.
-
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
-
-    unfl = SAFEMINIMUM;
-    ovfl = 1. / unfl;
-    dlabad_(&unfl, &ovfl);
-    ulp = PRECISION;
-    smlnum = unfl * (nh / ulp);
 
-/*
-       I1 and I2 are the indices of the first row and last column of H
-       to which transformations must be applied. If eigenvalues only are
-       being computed, I1 and I2 are set inside the main loop.
-*/
+    return 0;
 
-    if (wantt) {
-	i1 = 1;
-	i2 = *n;
-    }
+/*     End of CLACP2 */
 
-/*     ITN is the total number of multiple-shift QR iterations allowed. */
+} /* clacp2_ */
 
-    itn = nh * 30;
+/* Subroutine */ int clacpy_(char *uplo, integer *m, integer *n, complex *a,
+	integer *lda, complex *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
 
-/*
-       The main loop begins here. I is the loop index and decreases from
-       IHI to ILO in steps of at most MAXB. Each iteration of the loop
-       works with the active submatrix in rows and columns L to I.
-       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
-       H(L,L-1) is negligible so that the matrix splits.
-*/
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
 
-    i__ = *ihi;
-L50:
-    l = *ilo;
-    if (i__ < *ilo) {
-	goto L170;
-    }
 
 /*
-       Perform multiple-shift QR iterations on rows and columns ILO to I
-       until a submatrix of order at most MAXB splits off at the bottom
-       because a subdiagonal element has become negligible.
-*/
-
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
 
-/*        Look for a single small subdiagonal element. */
 
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
-		     h__[k + k * h_dim1], abs(d__2));
-	    if (tst1 == 0.) {
-		i__4 = i__ - l + 1;
-		tst1 = dlanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1]
-			);
-	    }
-/* Computing MAX */
-	    d__2 = ulp * tst1;
-	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2,
-		    smlnum)) {
-		goto L70;
-	    }
-/* L60: */
-	}
-L70:
-	l = k;
-	if (l > *ilo) {
+    Purpose
+    =======
 
-/*           H(L,L-1) is negligible. */
+    CLACPY copies all or part of a two-dimensional matrix A to another
+    matrix B.
 
-	    h__[l + (l - 1) * h_dim1] = 0.;
-	}
+    Arguments
+    =========
 
-/*        Exit from loop if a submatrix of order <= MAXB has split off. */
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be copied to B.
+            = 'U':      Upper triangular part
+            = 'L':      Lower triangular part
+            Otherwise:  All of the matrix A
 
-	if (l >= i__ - maxb + 1) {
-	    goto L160;
-	}
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
 
-/*
-          Now the active submatrix is in rows and columns L to I. If
-          eigenvalues only are being computed, only the active submatrix
-          need be transformed.
-*/
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
 
-	if (! wantt) {
-	    i1 = l;
-	    i2 = i__;
-	}
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The m by n matrix A.  If UPLO = 'U', only the upper trapezium
+            is accessed; if UPLO = 'L', only the lower trapezium is
+            accessed.
 
-	if (its == 20 || its == 30) {
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
 
-/*           Exceptional shifts. */
+    B       (output) COMPLEX array, dimension (LDB,N)
+            On exit, B = A in the locations specified by UPLO.
 
-	    i__2 = i__;
-	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
-		wr[ii] = ((d__1 = h__[ii + (ii - 1) * h_dim1], abs(d__1)) + (
-			d__2 = h__[ii + ii * h_dim1], abs(d__2))) * 1.5;
-		wi[ii] = 0.;
-/* L80: */
-	    }
-	} else {
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,M).
 
-/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
+    =====================================================================
+*/
 
-	    dlacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
-		    h_dim1], ldh, s, &c__15);
-	    dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ -
-		    ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset],
-		    ldz, &ierr);
-	    if (ierr > 0) {
 
-/*
-                If DLAHQR failed to compute all NS eigenvalues, use the
-                unconverged diagonal elements as the remaining shifts.
-*/
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
 
-		i__2 = ierr;
-		for (ii = 1; ii <= i__2; ++ii) {
-		    wr[i__ - ns + ii] = s[ii + ii * 15 - 16];
-		    wi[i__ - ns + ii] = 0.;
-/* L90: */
-		}
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i;
+/* L10: */
 	    }
+/* L20: */
 	}
 
-/*
-          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
-          where G is the Hessenberg submatrix H(L:I,L:I) and w is
-          the vector of shifts (stored in WR and WI). The result is
-          stored in the local array V.
-*/
-
-	v[0] = 1.;
-	i__2 = ns + 1;
-	for (ii = 2; ii <= i__2; ++ii) {
-	    v[ii - 1] = 0.;
-/* L100: */
+    } else if (lsame_(uplo, "L")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i;
+/* L30: */
+	    }
+/* L40: */
 	}
-	nv = 1;
-	i__2 = i__;
-	for (j = i__ - ns + 1; j <= i__2; ++j) {
-	    if (wi[j] >= 0.) {
-		if (wi[j] == 0.) {
 
-/*                 real shift */
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i;
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
 
-		    i__4 = nv + 1;
-		    dcopy_(&i__4, v, &c__1, vv, &c__1);
-		    i__4 = nv + 1;
-		    d__1 = -wr[j];
-		    dgemv_("No transpose", &i__4, &nv, &c_b15, &h__[l + l *
-			    h_dim1], ldh, vv, &c__1, &d__1, v, &c__1);
-		    ++nv;
-		} else if (wi[j] > 0.) {
+    return 0;
 
-/*                 complex conjugate pair of shifts */
+/*     End of CLACPY */
 
-		    i__4 = nv + 1;
-		    dcopy_(&i__4, v, &c__1, vv, &c__1);
-		    i__4 = nv + 1;
-		    d__1 = wr[j] * -2.;
-		    dgemv_("No transpose", &i__4, &nv, &c_b15, &h__[l + l *
-			    h_dim1], ldh, v, &c__1, &d__1, vv, &c__1);
-		    i__4 = nv + 1;
-		    itemp = idamax_(&i__4, vv, &c__1);
-/* Computing MAX */
-		    d__2 = (d__1 = vv[itemp - 1], abs(d__1));
-		    temp = 1. / max(d__2,smlnum);
-		    i__4 = nv + 1;
-		    dscal_(&i__4, &temp, vv, &c__1);
-		    absw = dlapy2_(&wr[j], &wi[j]);
-		    temp = temp * absw * absw;
-		    i__4 = nv + 2;
-		    i__5 = nv + 1;
-		    dgemv_("No transpose", &i__4, &i__5, &c_b15, &h__[l + l *
-			    h_dim1], ldh, vv, &c__1, &temp, v, &c__1);
-		    nv += 2;
-		}
+} /* clacpy_ */
 
-/*
-                Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
-                reset it to the unit vector.
-*/
+/* Subroutine */ int clacrm_(integer *m, integer *n, complex *a, integer *lda,
+	 real *b, integer *ldb, complex *c__, integer *ldc, real *rwork)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, a_dim1, a_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3, i__4, i__5;
+    real r__1;
+    complex q__1;
 
-		itemp = idamax_(&nv, v, &c__1);
-		temp = (d__1 = v[itemp - 1], abs(d__1));
-		if (temp == 0.) {
-		    v[0] = 1.;
-		    i__4 = nv;
-		    for (ii = 2; ii <= i__4; ++ii) {
-			v[ii - 1] = 0.;
-/* L110: */
-		    }
-		} else {
-		    temp = max(temp,smlnum);
-		    d__1 = 1. / temp;
-		    dscal_(&nv, &d__1, v, &c__1);
-		}
-	    }
-/* L120: */
-	}
+    /* Builtin functions */
+    double r_imag(complex *);
 
-/*        Multiple-shift QR step */
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
 
-	i__2 = i__ - 1;
-	for (k = l; k <= i__2; ++k) {
 
 /*
-             The first iteration of this loop determines a reflection G
-             from the vector V and applies it from left and right to H,
-             thus creating a nonzero bulge below the subdiagonal.
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-             Each subsequent iteration determines a reflection G to
-             restore the Hessenberg form in the (K-1)th column, and thus
-             chases the bulge one step toward the bottom of the active
-             submatrix. NR is the order of G.
 
-   Computing MIN
-*/
-	    i__4 = ns + 1, i__5 = i__ - k + 1;
-	    nr = min(i__4,i__5);
-	    if (k > l) {
-		dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
-	    }
-	    dlarfg_(&nr, v, &v[1], &c__1, &tau);
-	    if (k > l) {
-		h__[k + (k - 1) * h_dim1] = v[0];
-		i__4 = i__;
-		for (ii = k + 1; ii <= i__4; ++ii) {
-		    h__[ii + (k - 1) * h_dim1] = 0.;
-/* L130: */
-		}
-	    }
-	    v[0] = 1.;
+    Purpose
+    =======
 
-/*
-             Apply G from the left to transform the rows of the matrix in
-             columns K to I2.
-*/
+    CLACRM performs a very simple matrix-matrix multiplication:
+             C := A * B,
+    where A is M by N and complex; B is N by N and real;
+    C is M by N and complex.
 
-	    i__4 = i2 - k + 1;
-	    dlarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, &
-		    work[1]);
+    Arguments
+    =========
 
-/*
-             Apply G from the right to transform the columns of the
-             matrix in rows I1 to min(K+NR,I).
+    M       (input) INTEGER
+            The number of rows of the matrix A and of the matrix C.
+            M >= 0.
 
-   Computing MIN
-*/
-	    i__5 = k + nr;
-	    i__4 = min(i__5,i__) - i1 + 1;
-	    dlarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
-		    &work[1]);
+    N       (input) INTEGER
+            The number of columns and rows of the matrix B and
+            the number of columns of the matrix C.
+            N >= 0.
 
-	    if (wantz) {
+    A       (input) COMPLEX array, dimension (LDA, N)
+            A contains the M by N matrix A.
 
-/*              Accumulate transformations in the matrix Z */
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >=max(1,M).
 
-		dlarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
-			ldz, &work[1]);
-	    }
-/* L140: */
-	}
+    B       (input) REAL array, dimension (LDB, N)
+            B contains the N by N matrix B.
 
-/* L150: */
-    }
+    LDB     (input) INTEGER
+            The leading dimension of the array B. LDB >=max(1,N).
 
-/*     Failure to converge in remaining number of iterations */
+    C       (input) COMPLEX array, dimension (LDC, N)
+            C contains the M by N matrix C.
 
-    *info = i__;
-    return 0;
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >=max(1,N).
 
-L160:
+    RWORK   (workspace) REAL array, dimension (2*M*N)
 
-/*
-       A submatrix of order <= MAXB in rows and columns L to I has split
-       off. Use the double-shift QR algorithm to handle it.
+    =====================================================================
+
+
+       Quick return if possible.
 */
 
-    dlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1],
-	    ilo, ihi, &z__[z_offset], ldz, info);
-    if (*info > 0) {
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --rwork;
+
+    /* Function Body */
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with a new value of I.
-*/
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    rwork[(j - 1) * *m + i__] = a[i__3].r;
+/* L10: */
+	}
+/* L20: */
+    }
 
-    itn -= its;
-    i__ = l - 1;
-    goto L50;
+    l = *m * *n + 1;
+    sgemm_("N", "N", m, n, n, &c_b871, &rwork[1], m, &b[b_offset], ldb, &
+	    c_b1101, &rwork[l], m);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * c_dim1;
+	    i__4 = l + (j - 1) * *m + i__ - 1;
+	    c__[i__3].r = rwork[i__4], c__[i__3].i = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    rwork[(j - 1) * *m + i__] = r_imag(&a[i__ + j * a_dim1]);
+/* L50: */
+	}
+/* L60: */
+    }
+    sgemm_("N", "N", m, n, n, &c_b871, &rwork[1], m, &b[b_offset], ldb, &
+	    c_b1101, &rwork[l], m);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * c_dim1;
+	    i__4 = i__ + j * c_dim1;
+	    r__1 = c__[i__4].r;
+	    i__5 = l + (j - 1) * *m + i__ - 1;
+	    q__1.r = r__1, q__1.i = rwork[i__5];
+	    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L70: */
+	}
+/* L80: */
+    }
 
-L170:
-    work[1] = (doublereal) max(1,*n);
     return 0;
 
-/*     End of DHSEQR */
+/*     End of CLACRM */
 
-} /* dhseqr_ */
+} /* clacrm_ */
 
-/* Subroutine */ int dlabad_(doublereal *small, doublereal *large)
+/* Complex */ VOID cladiv_(complex * ret_val, complex *x, complex *y)
 {
+    /* System generated locals */
+    real r__1, r__2, r__3, r__4;
+    complex q__1;
+
     /* Builtin functions */
-    double d_lg10(doublereal *), sqrt(doublereal);
+    double r_imag(complex *);
+
+    /* Local variables */
+    static real zi, zr;
+    extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real *
+	    , real *);
 
 
 /*
@@ -8006,877 +8767,844 @@ L170:
     Purpose
     =======
 
-    DLABAD takes as input the values computed by DLAMCH for underflow and
-    overflow, and returns the square root of each of these values if the
-    log of LARGE is sufficiently large.  This subroutine is intended to
-    identify machines with a large exponent range, such as the Crays, and
-    redefine the underflow and overflow limits to be the square roots of
-    the values computed by DLAMCH.  This subroutine is needed because
-    DLAMCH does not compensate for poor arithmetic in the upper half of
-    the exponent range, as is found on a Cray.
+    CLADIV := X / Y, where X and Y are complex.  The computation of X / Y
+    will not overflow on an intermediary step unless the results
+    overflows.
 
     Arguments
     =========
 
-    SMALL   (input/output) DOUBLE PRECISION
-            On entry, the underflow threshold as computed by DLAMCH.
-            On exit, if LOG10(LARGE) is sufficiently large, the square
-            root of SMALL, otherwise unchanged.
-
-    LARGE   (input/output) DOUBLE PRECISION
-            On entry, the overflow threshold as computed by DLAMCH.
-            On exit, if LOG10(LARGE) is sufficiently large, the square
-            root of LARGE, otherwise unchanged.
+    X       (input) COMPLEX
+    Y       (input) COMPLEX
+            The complex scalars X and Y.
 
     =====================================================================
-
-
-       If it looks like we're on a Cray, take the square root of
-       SMALL and LARGE to avoid overflow and underflow problems.
 */
 
-    if (d_lg10(large) > 2e3) {
-	*small = sqrt(*small);
-	*large = sqrt(*large);
-    }
 
-    return 0;
+    r__1 = x->r;
+    r__2 = r_imag(x);
+    r__3 = y->r;
+    r__4 = r_imag(y);
+    sladiv_(&r__1, &r__2, &r__3, &r__4, &zr, &zi);
+    q__1.r = zr, q__1.i = zi;
+     ret_val->r = q__1.r,  ret_val->i = q__1.i;
 
-/*     End of DLABAD */
+    return ;
 
-} /* dlabad_ */
+/*     End of CLADIV */
 
-/* Subroutine */ int dlabrd_(integer *m, integer *n, integer *nb, doublereal *
-	a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq,
-	doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer
-	*ldy)
+} /* cladiv_ */
+
+/* Subroutine */ int claed0_(integer *qsiz, integer *n, real *d__, real *e,
+	complex *q, integer *ldq, complex *qstore, integer *ldqs, real *rwork,
+	 integer *iwork, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2,
-	    i__3;
+    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
+    real r__1;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
 
     /* Local variables */
-    static integer i__;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *), dgemv_(char *, integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *), dlarfg_(integer *, doublereal *,
-	     doublereal *, integer *, doublereal *);
+    static integer i__, j, k, ll, iq, lgn, msd2, smm1, spm1, spm2;
+    static real temp;
+    static integer curr, iperm;
+    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static integer indxq, iwrem;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    static integer iqptr;
+    extern /* Subroutine */ int claed7_(integer *, integer *, integer *,
+	    integer *, integer *, integer *, real *, complex *, integer *,
+	    real *, integer *, real *, integer *, integer *, integer *,
+	    integer *, integer *, real *, complex *, real *, integer *,
+	    integer *);
+    static integer tlvls;
+    extern /* Subroutine */ int clacrm_(integer *, integer *, complex *,
+	    integer *, real *, integer *, complex *, integer *, real *);
+    static integer igivcl;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer igivnm, submat, curprb, subpbs, igivpt, curlvl, matsiz,
+	    iprmpt, smlsiz;
+    extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *,
+	    real *, integer *, real *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DLABRD reduces the first NB rows and columns of a real general
-    m by n matrix A to upper or lower bidiagonal form by an orthogonal
-    transformation Q' * A * P, and returns the matrices X and Y which
-    are needed to apply the transformation to the unreduced part of A.
-
-    If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
-    bidiagonal form.
-
-    This is an auxiliary routine called by DGEBRD
+    Using the divide and conquer method, CLAED0 computes all eigenvalues
+    of a symmetric tridiagonal matrix which is one diagonal block of
+    those from reducing a dense or band Hermitian matrix and
+    corresponding eigenvectors of the dense or band matrix.
 
     Arguments
     =========
 
-    M       (input) INTEGER
-            The number of rows in the matrix A.
-
-    N       (input) INTEGER
-            The number of columns in the matrix A.
-
-    NB      (input) INTEGER
-            The number of leading rows and columns of A to be reduced.
-
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            On entry, the m by n general matrix to be reduced.
-            On exit, the first NB rows and columns of the matrix are
-            overwritten; the rest of the array is unchanged.
-            If m >= n, elements on and below the diagonal in the first NB
-              columns, with the array TAUQ, represent the orthogonal
-              matrix Q as a product of elementary reflectors; and
-              elements above the diagonal in the first NB rows, with the
-              array TAUP, represent the orthogonal matrix P as a product
-              of elementary reflectors.
-            If m < n, elements below the diagonal in the first NB
-              columns, with the array TAUQ, represent the orthogonal
-              matrix Q as a product of elementary reflectors, and
-              elements on and above the diagonal in the first NB rows,
-              with the array TAUP, represent the orthogonal matrix P as
-              a product of elementary reflectors.
-            See Further Details.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
-
-    D       (output) DOUBLE PRECISION array, dimension (NB)
-            The diagonal elements of the first NB rows and columns of
-            the reduced matrix.  D(i) = A(i,i).
-
-    E       (output) DOUBLE PRECISION array, dimension (NB)
-            The off-diagonal elements of the first NB rows and columns of
-            the reduced matrix.
-
-    TAUQ    (output) DOUBLE PRECISION array dimension (NB)
-            The scalar factors of the elementary reflectors which
-            represent the orthogonal matrix Q. See Further Details.
-
-    TAUP    (output) DOUBLE PRECISION array, dimension (NB)
-            The scalar factors of the elementary reflectors which
-            represent the orthogonal matrix P. See Further Details.
-
-    X       (output) DOUBLE PRECISION array, dimension (LDX,NB)
-            The m-by-nb matrix X required to update the unreduced part
-            of A.
-
-    LDX     (input) INTEGER
-            The leading dimension of the array X. LDX >= M.
-
-    Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
-            The n-by-nb matrix Y required to update the unreduced part
-            of A.
-
-    LDY     (output) INTEGER
-            The leading dimension of the array Y. LDY >= N.
-
-    Further Details
-    ===============
-
-    The matrices Q and P are represented as products of elementary
-    reflectors:
-
-       Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
+    QSIZ   (input) INTEGER
+           The dimension of the unitary matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
 
-    Each H(i) and G(i) has the form:
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
 
-       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+    D      (input/output) REAL array, dimension (N)
+           On entry, the diagonal elements of the tridiagonal matrix.
+           On exit, the eigenvalues in ascending order.
 
-    where tauq and taup are real scalars, and v and u are real vectors.
+    E      (input/output) REAL array, dimension (N-1)
+           On entry, the off-diagonal elements of the tridiagonal matrix.
+           On exit, E has been destroyed.
 
-    If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
-    A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
-    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+    Q      (input/output) COMPLEX array, dimension (LDQ,N)
+           On entry, Q must contain an QSIZ x N matrix whose columns
+           unitarily orthonormal. It is a part of the unitary matrix
+           that reduces the full dense Hermitian matrix to a
+           (reducible) symmetric tridiagonal matrix.
 
-    If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
-    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
-    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
 
-    The elements of the vectors v and u together form the m-by-nb matrix
-    V and the nb-by-n matrix U' which are needed, with X and Y, to apply
-    the transformation to the unreduced part of the matrix, using a block
-    update of the form:  A := A - V*Y' - X*U'.
+    IWORK  (workspace) INTEGER array,
+           the dimension of IWORK must be at least
+                        6 + 6*N + 5*N*lg N
+                        ( lg( N ) = smallest integer k
+                                    such that 2^k >= N )
 
-    The contents of A on exit are illustrated by the following examples
-    with nb = 2:
+    RWORK  (workspace) REAL array,
+                                 dimension (1 + 3*N + 2*N*lg N + 3*N**2)
+                          ( lg( N ) = smallest integer k
+                                      such that 2^k >= N )
 
-    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+    QSTORE (workspace) COMPLEX array, dimension (LDQS, N)
+           Used to store parts of
+           the eigenvector matrix when the updating matrix multiplies
+           take place.
 
-      (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
-      (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
-      (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
-      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
-      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
-      (  v1  v2  a   a   a  )
+    LDQS   (input) INTEGER
+           The leading dimension of the array QSTORE.
+           LDQS >= max(1,N).
 
-    where a denotes an element of the original matrix which is unchanged,
-    vi denotes an element of the vector defining H(i), and ui an element
-    of the vector defining G(i).
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
 
     =====================================================================
 
+    Warning:      N could be as big as QSIZ!
 
-       Quick return if possible
+
+       Test the input parameters.
 */
 
     /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
     --d__;
     --e;
-    --tauq;
-    --taup;
-    x_dim1 = *ldx;
-    x_offset = 1 + x_dim1;
-    x -= x_offset;
-    y_dim1 = *ldy;
-    y_offset = 1 + y_dim1;
-    y -= y_offset;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    qstore_dim1 = *ldqs;
+    qstore_offset = 1 + qstore_dim1;
+    qstore -= qstore_offset;
+    --rwork;
+    --iwork;
 
     /* Function Body */
-    if (*m <= 0 || *n <= 0) {
+    *info = 0;
+
+/*
+       IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN
+          INFO = -1
+       ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) )
+      $        THEN
+*/
+    if (*qsiz < max(0,*n)) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldq < max(1,*n)) {
+	*info = -6;
+    } else if (*ldqs < max(1,*n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLAED0", &i__1);
 	return 0;
     }
 
-    if (*m >= *n) {
-
-/*        Reduce to upper bidiagonal form */
-
-	i__1 = *nb;
-	for (i__ = 1; i__ <= i__1; ++i__) {
+/*     Quick return if possible */
 
-/*           Update A(i:m,i) */
+    if (*n == 0) {
+	return 0;
+    }
 
-	    i__2 = *m - i__ + 1;
-	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + a_dim1],
-		    lda, &y[i__ + y_dim1], ldy, &c_b15, &a[i__ + i__ * a_dim1]
-		    , &c__1);
-	    i__2 = *m - i__ + 1;
-	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + x_dim1],
-		    ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b15, &a[i__ + i__ *
-		    a_dim1], &c__1);
+    smlsiz = ilaenv_(&c__9, "CLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
 
-/*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
+/*
+       Determine the size and placement of the submatrices, and save in
+       the leading elements of IWORK.
+*/
 
-	    i__2 = *m - i__ + 1;
-/* Computing MIN */
-	    i__3 = i__ + 1;
-	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
-		    a_dim1], &c__1, &tauq[i__]);
-	    d__[i__] = a[i__ + i__ * a_dim1];
-	    if (i__ < *n) {
-		a[i__ + i__ * a_dim1] = 1.;
+    iwork[1] = *n;
+    subpbs = 1;
+    tlvls = 0;
+L10:
+    if (iwork[subpbs] > smlsiz) {
+	for (j = subpbs; j >= 1; --j) {
+	    iwork[j * 2] = (iwork[j] + 1) / 2;
+	    iwork[(j << 1) - 1] = iwork[j] / 2;
+/* L20: */
+	}
+	++tlvls;
+	subpbs <<= 1;
+	goto L10;
+    }
+    i__1 = subpbs;
+    for (j = 2; j <= i__1; ++j) {
+	iwork[j] += iwork[j - 1];
+/* L30: */
+    }
 
-/*              Compute Y(i+1:n,i) */
+/*
+       Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
+       using rank-1 modifications (cuts).
+*/
 
-		i__2 = *m - i__ + 1;
-		i__3 = *n - i__;
-		dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + (i__ + 1) *
-			 a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b29,
-			&y[i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *m - i__ + 1;
-		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + a_dim1],
-			lda, &a[i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ *
-			y_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + 1 +
-			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b15, &y[
-			i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *m - i__ + 1;
-		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b15, &x[i__ + x_dim1],
-			ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ *
-			y_dim1 + 1], &c__1);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b15,
-			 &y[i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *n - i__;
-		dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+    spm1 = subpbs - 1;
+    i__1 = spm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	submat = iwork[i__] + 1;
+	smm1 = submat - 1;
+	d__[smm1] -= (r__1 = e[smm1], dabs(r__1));
+	d__[submat] -= (r__1 = e[smm1], dabs(r__1));
+/* L40: */
+    }
 
-/*              Update A(i,i+1:n) */
+    indxq = (*n << 2) + 3;
 
-		i__2 = *n - i__;
-		dgemv_("No transpose", &i__2, &i__, &c_b151, &y[i__ + 1 +
-			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b15, &a[i__ +
-			(i__ + 1) * a_dim1], lda);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b15, &a[
-			i__ + (i__ + 1) * a_dim1], lda);
+/*
+       Set up workspaces for eigenvalues only/accumulate new vectors
+       routine
+*/
 
-/*              Generate reflection P(i) to annihilate A(i,i+2:n) */
+    temp = log((real) (*n)) / log(2.f);
+    lgn = (integer) temp;
+    if (pow_ii(&c__2, &lgn) < *n) {
+	++lgn;
+    }
+    if (pow_ii(&c__2, &lgn) < *n) {
+	++lgn;
+    }
+    iprmpt = indxq + *n + 1;
+    iperm = iprmpt + *n * lgn;
+    iqptr = iperm + *n * lgn;
+    igivpt = iqptr + *n + 2;
+    igivcl = igivpt + *n * lgn;
 
-		i__2 = *n - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
-			i__3,*n) * a_dim1], lda, &taup[i__]);
-		e[i__] = a[i__ + (i__ + 1) * a_dim1];
-		a[i__ + (i__ + 1) * a_dim1] = 1.;
+    igivnm = 1;
+    iq = igivnm + (*n << 1) * lgn;
+/* Computing 2nd power */
+    i__1 = *n;
+    iwrem = iq + i__1 * i__1 + 1;
+/*     Initialize pointers */
+    i__1 = subpbs;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+	iwork[iprmpt + i__] = 1;
+	iwork[igivpt + i__] = 1;
+/* L50: */
+    }
+    iwork[iqptr] = 1;
 
-/*              Compute X(i+1:m,i) */
+/*
+       Solve each submatrix eigenproblem at the bottom of the divide and
+       conquer tree.
+*/
 
-		i__2 = *m - i__;
-		i__3 = *n - i__;
-		dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + (
-			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
-			 lda, &c_b29, &x[i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *n - i__;
-		dgemv_("Transpose", &i__2, &i__, &c_b15, &y[i__ + 1 + y_dim1],
-			 ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b29, &x[
-			i__ * x_dim1 + 1], &c__1);
-		i__2 = *m - i__;
-		dgemv_("No transpose", &i__2, &i__, &c_b151, &a[i__ + 1 +
-			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[
-			i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = i__ - 1;
-		i__3 = *n - i__;
-		dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b29, &x[i__ * x_dim1 + 1], &c__1);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + 1 +
-			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[
-			i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *m - i__;
-		dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
-	    }
-/* L10: */
+    curr = 0;
+    i__1 = spm1;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+	if (i__ == 0) {
+	    submat = 1;
+	    matsiz = iwork[1];
+	} else {
+	    submat = iwork[i__] + 1;
+	    matsiz = iwork[i__ + 1] - iwork[i__];
 	}
-    } else {
-
-/*        Reduce to lower bidiagonal form */
-
-	i__1 = *nb;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-
-/*           Update A(i,i:n) */
-
-	    i__2 = *n - i__ + 1;
-	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + y_dim1],
-		    ldy, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1]
-		    , lda);
-	    i__2 = i__ - 1;
-	    i__3 = *n - i__ + 1;
-	    dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[i__ * a_dim1 + 1],
-		    lda, &x[i__ + x_dim1], ldx, &c_b15, &a[i__ + i__ * a_dim1]
-		    , lda);
-
-/*           Generate reflection P(i) to annihilate A(i,i+1:n) */
+	ll = iq - 1 + iwork[iqptr + curr];
+	ssteqr_("I", &matsiz, &d__[submat], &e[submat], &rwork[ll], &matsiz, &
+		rwork[1], info);
+	clacrm_(qsiz, &matsiz, &q[submat * q_dim1 + 1], ldq, &rwork[ll], &
+		matsiz, &qstore[submat * qstore_dim1 + 1], ldqs, &rwork[iwrem]
+		);
+/* Computing 2nd power */
+	i__2 = matsiz;
+	iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
+	++curr;
+	if (*info > 0) {
+	    *info = submat * (*n + 1) + submat + matsiz - 1;
+	    return 0;
+	}
+	k = 1;
+	i__2 = iwork[i__ + 1];
+	for (j = submat; j <= i__2; ++j) {
+	    iwork[indxq + j] = k;
+	    ++k;
+/* L60: */
+	}
+/* L70: */
+    }
 
-	    i__2 = *n - i__ + 1;
-/* Computing MIN */
-	    i__3 = i__ + 1;
-	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
-		    a_dim1], lda, &taup[i__]);
-	    d__[i__] = a[i__ + i__ * a_dim1];
-	    if (i__ < *m) {
-		a[i__ + i__ * a_dim1] = 1.;
+/*
+       Successively merge eigensystems of adjacent submatrices
+       into eigensystem for the corresponding larger matrix.
 
-/*              Compute X(i+1:m,i) */
+       while ( SUBPBS > 1 )
+*/
 
-		i__2 = *m - i__;
-		i__3 = *n - i__ + 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + i__
-			* a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b29, &
-			x[i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *n - i__ + 1;
-		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b15, &y[i__ + y_dim1],
-			ldy, &a[i__ + i__ * a_dim1], lda, &c_b29, &x[i__ *
-			x_dim1 + 1], &c__1);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 +
-			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[
-			i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = i__ - 1;
-		i__3 = *n - i__ + 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b15, &a[i__ * a_dim1
-			+ 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b29, &x[
-			i__ * x_dim1 + 1], &c__1);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b151, &x[i__ + 1 +
-			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b15, &x[
-			i__ + 1 + i__ * x_dim1], &c__1);
-		i__2 = *m - i__;
-		dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+    curlvl = 1;
+L80:
+    if (subpbs > 1) {
+	spm2 = subpbs - 2;
+	i__1 = spm2;
+	for (i__ = 0; i__ <= i__1; i__ += 2) {
+	    if (i__ == 0) {
+		submat = 1;
+		matsiz = iwork[2];
+		msd2 = iwork[1];
+		curprb = 0;
+	    } else {
+		submat = iwork[i__] + 1;
+		matsiz = iwork[i__ + 2] - iwork[i__];
+		msd2 = matsiz / 2;
+		++curprb;
+	    }
 
-/*              Update A(i+1:m,i) */
+/*
+       Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
+       into an eigensystem of size MATSIZ.  CLAED7 handles the case
+       when the eigenvectors of a full or band Hermitian matrix (which
+       was reduced to tridiagonal form) are desired.
 
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 +
-			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b15, &a[i__ +
-			1 + i__ * a_dim1], &c__1);
-		i__2 = *m - i__;
-		dgemv_("No transpose", &i__2, &i__, &c_b151, &x[i__ + 1 +
-			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b15, &a[
-			i__ + 1 + i__ * a_dim1], &c__1);
+       I am free to use Q as a valuable working space until Loop 150.
+*/
 
-/*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
+	    claed7_(&matsiz, &msd2, qsiz, &tlvls, &curlvl, &curprb, &d__[
+		    submat], &qstore[submat * qstore_dim1 + 1], ldqs, &e[
+		    submat + msd2 - 1], &iwork[indxq + submat], &rwork[iq], &
+		    iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[
+		    igivpt], &iwork[igivcl], &rwork[igivnm], &q[submat *
+		    q_dim1 + 1], &rwork[iwrem], &iwork[subpbs + 1], info);
+	    if (*info > 0) {
+		*info = submat * (*n + 1) + submat + matsiz - 1;
+		return 0;
+	    }
+	    iwork[i__ / 2 + 1] = iwork[i__ + 2];
+/* L90: */
+	}
+	subpbs /= 2;
+	++curlvl;
+	goto L80;
+    }
 
-		i__2 = *m - i__;
-/* Computing MIN */
-		i__3 = i__ + 2;
-		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
-			i__ * a_dim1], &c__1, &tauq[i__]);
-		e[i__] = a[i__ + 1 + i__ * a_dim1];
-		a[i__ + 1 + i__ * a_dim1] = 1.;
+/*
+       end while
 
-/*              Compute Y(i+1:n,i) */
+       Re-merge the eigenvalues/vectors which were deflated at the final
+       merge step.
+*/
 
-		i__2 = *m - i__;
-		i__3 = *n - i__;
-		dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + (i__ +
-			1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1,
-			&c_b29, &y[i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *m - i__;
-		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1]
-			, lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &y[
-			i__ * y_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b151, &y[i__ + 1 +
-			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b15, &y[
-			i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *m - i__;
-		dgemv_("Transpose", &i__2, &i__, &c_b15, &x[i__ + 1 + x_dim1],
-			 ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &y[
-			i__ * y_dim1 + 1], &c__1);
-		i__2 = *n - i__;
-		dgemv_("Transpose", &i__, &i__2, &c_b151, &a[(i__ + 1) *
-			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &c_b15,
-			 &y[i__ + 1 + i__ * y_dim1], &c__1);
-		i__2 = *n - i__;
-		dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
-	    }
-/* L20: */
-	}
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	j = iwork[indxq + i__];
+	rwork[i__] = d__[j];
+	ccopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1]
+		, &c__1);
+/* L100: */
     }
+    scopy_(n, &rwork[1], &c__1, &d__[1], &c__1);
+
     return 0;
 
-/*     End of DLABRD */
+/*     End of CLAED0 */
 
-} /* dlabrd_ */
+} /* claed0_ */
 
-/* Subroutine */ int dlacpy_(char *uplo, integer *m, integer *n, doublereal *
-	a, integer *lda, doublereal *b, integer *ldb)
+/* Subroutine */ int claed7_(integer *n, integer *cutpnt, integer *qsiz,
+	integer *tlvls, integer *curlvl, integer *curpbm, real *d__, complex *
+	q, integer *ldq, real *rho, integer *indxq, real *qstore, integer *
+	qptr, integer *prmptr, integer *perm, integer *givptr, integer *
+	givcol, real *givnum, complex *work, real *rwork, integer *iwork,
+	integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+    integer q_dim1, q_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
 
     /* Local variables */
-    static integer i__, j;
-    extern logical lsame_(char *, char *);
+    static integer i__, k, n1, n2, iq, iw, iz, ptr, ind1, ind2, indx, curr,
+	    indxc, indxp;
+    extern /* Subroutine */ int claed8_(integer *, integer *, integer *,
+	    complex *, integer *, real *, real *, integer *, real *, real *,
+	    complex *, integer *, real *, integer *, integer *, integer *,
+	    integer *, integer *, integer *, real *, integer *), slaed9_(
+	    integer *, integer *, integer *, integer *, real *, real *,
+	    integer *, real *, real *, real *, real *, integer *, integer *),
+	    slaeda_(integer *, integer *, integer *, integer *, integer *,
+	    integer *, integer *, integer *, real *, real *, integer *, real *
+	    , real *, integer *);
+    static integer idlmda;
+    extern /* Subroutine */ int clacrm_(integer *, integer *, complex *,
+	    integer *, real *, integer *, complex *, integer *, real *),
+	    xerbla_(char *, integer *), slamrg_(integer *, integer *,
+	    real *, integer *, integer *, integer *);
+    static integer coltyp;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DLACPY copies all or part of a two-dimensional matrix A to another
-    matrix B.
+    CLAED7 computes the updated eigensystem of a diagonal
+    matrix after modification by a rank-one symmetric matrix. This
+    routine is used only for the eigenproblem which requires all
+    eigenvalues and optionally eigenvectors of a dense or banded
+    Hermitian matrix that has been reduced to tridiagonal form.
+
+      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+
+      where Z = Q'u, u is a vector of length N with ones in the
+      CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+
+       The eigenvectors of the original matrix are stored in Q, and the
+       eigenvalues are in D.  The algorithm consists of three stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple eigenvalues or if there is a zero in
+          the Z vector.  For each such occurence the dimension of the
+          secular equation problem is reduced by one.  This stage is
+          performed by the routine SLAED2.
+
+          The second stage consists of calculating the updated
+          eigenvalues. This is done by finding the roots of the secular
+          equation via the routine SLAED4 (as called by SLAED3).
+          This routine also calculates the eigenvectors of the current
+          problem.
+
+          The final stage consists of computing the updated eigenvectors
+          directly using the updated eigenvalues.  The eigenvectors for
+          the current problem are multiplied with the eigenvectors from
+          the overall problem.
 
     Arguments
     =========
 
-    UPLO    (input) CHARACTER*1
-            Specifies the part of the matrix A to be copied to B.
-            = 'U':      Upper triangular part
-            = 'L':      Lower triangular part
-            Otherwise:  All of the matrix A
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
 
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.
+    CUTPNT (input) INTEGER
+           Contains the location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
 
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.
+    QSIZ   (input) INTEGER
+           The dimension of the unitary matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N.
 
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            The m by n matrix A.  If UPLO = 'U', only the upper triangle
-            or trapezoid is accessed; if UPLO = 'L', only the lower
-            triangle or trapezoid is accessed.
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,M).
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= curlvl <= tlvls.
 
-    B       (output) DOUBLE PRECISION array, dimension (LDB,N)
-            On exit, B = A in the locations specified by UPLO.
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
 
-    LDB     (input) INTEGER
-            The leading dimension of the array B.  LDB >= max(1,M).
+    D      (input/output) REAL array, dimension (N)
+           On entry, the eigenvalues of the rank-1-perturbed matrix.
+           On exit, the eigenvalues of the repaired matrix.
 
-    =====================================================================
-*/
+    Q      (input/output) COMPLEX array, dimension (LDQ,N)
+           On entry, the eigenvectors of the rank-1-perturbed matrix.
+           On exit, the eigenvectors of the repaired tridiagonal matrix.
 
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
+    RHO    (input) REAL
+           Contains the subdiagonal element used to create the rank-1
+           modification.
 
-    /* Function Body */
-    if (lsame_(uplo, "U")) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = min(j,*m);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else if (lsame_(uplo, "L")) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = j; i__ <= i__2; ++i__) {
-		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
-/* L30: */
-	    }
-/* L40: */
-	}
-    } else {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
-/* L50: */
-	    }
-/* L60: */
-	}
-    }
-    return 0;
-
-/*     End of DLACPY */
-
-} /* dlacpy_ */
+    INDXQ  (output) INTEGER array, dimension (N)
+           This contains the permutation which will reintegrate the
+           subproblem just solved back into sorted order,
+           ie. D( INDXQ( I = 1, N ) ) will be in ascending order.
 
-/* Subroutine */ int dladiv_(doublereal *a, doublereal *b, doublereal *c__,
-	doublereal *d__, doublereal *p, doublereal *q)
-{
-    static doublereal e, f;
+    IWORK  (workspace) INTEGER array, dimension (4*N)
 
+    RWORK  (workspace) REAL array,
+                                   dimension (3*N+2*QSIZ*N)
 
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    WORK   (workspace) COMPLEX array, dimension (QSIZ*N)
 
+    QSTORE (input/output) REAL array, dimension (N**2+1)
+           Stores eigenvectors of submatrices encountered during
+           divide and conquer, packed together. QPTR points to
+           beginning of the submatrices.
 
-    Purpose
-    =======
+    QPTR   (input/output) INTEGER array, dimension (N+2)
+           List of indices pointing to beginning of submatrices stored
+           in QSTORE. The submatrices are numbered starting at the
+           bottom left of the divide and conquer tree, from left to
+           right and bottom to top.
 
-    DLADIV performs complex division in  real arithmetic
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and also the size of
+           the full, non-deflated problem.
 
-                          a + i*b
-               p + i*q = ---------
-                          c + i*d
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
 
-    The algorithm is due to Robert L. Smith and can be found
-    in D. Knuth, The art of Computer Programming, Vol.2, p.195
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
 
-    Arguments
-    =========
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
 
-    A       (input) DOUBLE PRECISION
-    B       (input) DOUBLE PRECISION
-    C       (input) DOUBLE PRECISION
-    D       (input) DOUBLE PRECISION
-            The scalars a, b, c, and d in the above expression.
+    GIVNUM (input) REAL array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
 
-    P       (output) DOUBLE PRECISION
-    Q       (output) DOUBLE PRECISION
-            The scalars p and q in the above expression.
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
 
     =====================================================================
-*/
 
 
-    if (abs(*d__) < abs(*c__)) {
-	e = *d__ / *c__;
-	f = *c__ + *d__ * e;
-	*p = (*a + *b * e) / f;
-	*q = (*b - *a * e) / f;
-    } else {
-	e = *c__ / *d__;
-	f = *d__ + *c__ * e;
-	*p = (*b + *a * e) / f;
-	*q = (-(*a) + *b * e) / f;
-    }
-
-    return 0;
-
-/*     End of DLADIV */
-
-} /* dladiv_ */
-
-/* Subroutine */ int dlae2_(doublereal *a, doublereal *b, doublereal *c__,
-	doublereal *rt1, doublereal *rt2)
-{
-    /* System generated locals */
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
+       Test the input parameters.
+*/
 
-    /* Local variables */
-    static doublereal ab, df, tb, sm, rt, adf, acmn, acmx;
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --qstore;
+    --qptr;
+    --prmptr;
+    --perm;
+    --givptr;
+    givcol -= 3;
+    givnum -= 3;
+    --work;
+    --rwork;
+    --iwork;
 
+    /* Function Body */
+    *info = 0;
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
-       [  A   B  ]
-       [  B   C  ].
-    On return, RT1 is the eigenvalue of larger absolute value, and RT2
-    is the eigenvalue of smaller absolute value.
-
-    Arguments
-    =========
-
-    A       (input) DOUBLE PRECISION
-            The (1,1) element of the 2-by-2 matrix.
-
-    B       (input) DOUBLE PRECISION
-            The (1,2) and (2,1) elements of the 2-by-2 matrix.
-
-    C       (input) DOUBLE PRECISION
-            The (2,2) element of the 2-by-2 matrix.
-
-    RT1     (output) DOUBLE PRECISION
-            The eigenvalue of larger absolute value.
-
-    RT2     (output) DOUBLE PRECISION
-            The eigenvalue of smaller absolute value.
-
-    Further Details
-    ===============
+       IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN
+          INFO = -1
+       ELSE IF( N.LT.0 ) THEN
+*/
+    if (*n < 0) {
+	*info = -1;
+    } else if (min(1,*n) > *cutpnt || *n < *cutpnt) {
+	*info = -2;
+    } else if (*qsiz < *n) {
+	*info = -3;
+    } else if (*ldq < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLAED7", &i__1);
+	return 0;
+    }
 
-    RT1 is accurate to a few ulps barring over/underflow.
+/*     Quick return if possible */
 
-    RT2 may be inaccurate if there is massive cancellation in the
-    determinant A*C-B*B; higher precision or correctly rounded or
-    correctly truncated arithmetic would be needed to compute RT2
-    accurately in all cases.
+    if (*n == 0) {
+	return 0;
+    }
 
-    Overflow is possible only if RT1 is within a factor of 5 of overflow.
-    Underflow is harmless if the input data is 0 or exceeds
-       underflow_threshold / macheps.
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in SLAED2 and SLAED3.
+*/
 
-   =====================================================================
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq = iw + *n;
 
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
 
-       Compute the eigenvalues
+/*
+       Form the z-vector which consists of the last row of Q_1 and the
+       first row of Q_2.
 */
 
-    sm = *a + *c__;
-    df = *a - *c__;
-    adf = abs(df);
-    tb = *b + *b;
-    ab = abs(tb);
-    if (abs(*a) > abs(*c__)) {
-	acmx = *a;
-	acmn = *c__;
-    } else {
-	acmx = *c__;
-	acmn = *a;
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = *tlvls - i__;
+	ptr += pow_ii(&c__2, &i__2);
+/* L10: */
     }
-    if (adf > ab) {
-/* Computing 2nd power */
-	d__1 = ab / adf;
-	rt = adf * sqrt(d__1 * d__1 + 1.);
-    } else if (adf < ab) {
-/* Computing 2nd power */
-	d__1 = adf / ab;
-	rt = ab * sqrt(d__1 * d__1 + 1.);
-    } else {
+    curr = ptr + *curpbm;
+    slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
+	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &rwork[iz], &rwork[
+	    iz + *n], info);
 
-/*        Includes case AB=ADF=0 */
+/*
+       When solving the final problem, we no longer need the stored data,
+       so we will overwrite the data from this level onto the previously
+       used storage space.
+*/
 
-	rt = ab * sqrt(2.);
+    if (*curlvl == *tlvls) {
+	qptr[curr] = 1;
+	prmptr[curr] = 1;
+	givptr[curr] = 1;
     }
-    if (sm < 0.) {
-	*rt1 = (sm - rt) * .5;
 
-/*
-          Order of execution important.
-          To get fully accurate smaller eigenvalue,
-          next line needs to be executed in higher precision.
-*/
+/*     Sort and Deflate eigenvalues. */
 
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else if (sm > 0.) {
-	*rt1 = (sm + rt) * .5;
+    claed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz],
+	    &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[
+	    indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[
+	    (givptr[curr] << 1) + 1], &givnum[(givptr[curr] << 1) + 1], info);
+    prmptr[curr + 1] = prmptr[curr] + *n;
+    givptr[curr + 1] += givptr[curr];
 
-/*
-          Order of execution important.
-          To get fully accurate smaller eigenvalue,
-          next line needs to be executed in higher precision.
-*/
+/*     Solve Secular Equation. */
 
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else {
+    if (k != 0) {
+	slaed9_(&k, &c__1, &k, n, &d__[1], &rwork[iq], &k, rho, &rwork[idlmda]
+		, &rwork[iw], &qstore[qptr[curr]], &k, info);
+	clacrm_(qsiz, &k, &work[1], qsiz, &qstore[qptr[curr]], &k, &q[
+		q_offset], ldq, &rwork[iq]);
+/* Computing 2nd power */
+	i__1 = k;
+	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
+	if (*info != 0) {
+	    return 0;
+	}
 
-/*        Includes case RT1 = RT2 = 0 */
+/*     Prepare the INDXQ sorting premutation. */
 
-	*rt1 = rt * .5;
-	*rt2 = rt * -.5;
+	n1 = k;
+	n2 = *n - k;
+	ind1 = 1;
+	ind2 = *n;
+	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+	qptr[curr + 1] = qptr[curr];
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    indxq[i__] = i__;
+/* L20: */
+	}
     }
+
     return 0;
 
-/*     End of DLAE2 */
+/*     End of CLAED7 */
 
-} /* dlae2_ */
+} /* claed7_ */
 
-/* Subroutine */ int dlaed0_(integer *icompq, integer *qsiz, integer *n,
-	doublereal *d__, doublereal *e, doublereal *q, integer *ldq,
-	doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork,
-	integer *info)
+/* Subroutine */ int claed8_(integer *k, integer *n, integer *qsiz, complex *
+	q, integer *ldq, real *d__, real *rho, integer *cutpnt, real *z__,
+	real *dlamda, complex *q2, integer *ldq2, real *w, integer *indxp,
+	integer *indx, integer *indxq, integer *perm, integer *givptr,
+	integer *givcol, real *givnum, integer *info)
 {
     /* System generated locals */
-    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
-    doublereal d__1;
+    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
+    real r__1;
 
     /* Builtin functions */
-    double log(doublereal);
-    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
 
     /* Local variables */
-    static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2;
-    static doublereal temp;
-    static integer curr;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer iperm;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer indxq, iwrem;
-    extern /* Subroutine */ int dlaed1_(integer *, doublereal *, doublereal *,
-	     integer *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, integer *);
-    static integer iqptr;
-    extern /* Subroutine */ int dlaed7_(integer *, integer *, integer *,
-	    integer *, integer *, integer *, doublereal *, doublereal *,
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, integer *, integer *, integer *, integer *, doublereal
-	    *, doublereal *, integer *, integer *);
-    static integer tlvls;
-    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *);
-    static integer igivcl;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    static integer igivnm, submat, curprb, subpbs, igivpt;
-    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *);
-    static integer curlvl, matsiz, iprmpt, smlsiz;
+    static real c__;
+    static integer i__, j;
+    static real s, t;
+    static integer k2, n1, n2, jp, n1p1;
+    static real eps, tau, tol;
+    static integer jlam, imax, jmax;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    ccopy_(integer *, complex *, integer *, complex *, integer *),
+	    csrot_(integer *, complex *, integer *, complex *, integer *,
+	    real *, real *), scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
+	    *, integer *, complex *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
+	    *, integer *, integer *);
 
 
 /*
     -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DLAED0 computes all eigenvalues and corresponding eigenvectors of a
-    symmetric tridiagonal matrix using the divide and conquer method.
+    CLAED8 merges the two sets of eigenvalues together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    eigenvalues are close together or if there is a tiny element in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
 
     Arguments
     =========
 
-    ICOMPQ  (input) INTEGER
-            = 0:  Compute eigenvalues only.
-            = 1:  Compute eigenvectors of original dense symmetric matrix
-                  also.  On entry, Q contains the orthogonal matrix used
-                  to reduce the original matrix to tridiagonal form.
-            = 2:  Compute eigenvalues and eigenvectors of tridiagonal
-                  matrix.
-
-    QSIZ   (input) INTEGER
-           The dimension of the orthogonal matrix used to reduce
-           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+    K      (output) INTEGER
+           Contains the number of non-deflated eigenvalues.
+           This is the order of the related secular equation.
 
     N      (input) INTEGER
            The dimension of the symmetric tridiagonal matrix.  N >= 0.
 
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry, the main diagonal of the tridiagonal matrix.
-           On exit, its eigenvalues.
-
-    E      (input) DOUBLE PRECISION array, dimension (N-1)
-           The off-diagonal elements of the tridiagonal matrix.
-           On exit, E has been destroyed.
+    QSIZ   (input) INTEGER
+           The dimension of the unitary matrix used to reduce
+           the dense or band matrix to tridiagonal form.
+           QSIZ >= N if ICOMPQ = 1.
 
-    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
-           On entry, Q must contain an N-by-N orthogonal matrix.
-           If ICOMPQ = 0    Q is not referenced.
-           If ICOMPQ = 1    On entry, Q is a subset of the columns of the
-                            orthogonal matrix used to reduce the full
-                            matrix to tridiagonal form corresponding to
-                            the subset of the full matrix which is being
-                            decomposed at this time.
-           If ICOMPQ = 2    On entry, Q will be the identity matrix.
-                            On exit, Q contains the eigenvectors of the
-                            tridiagonal matrix.
+    Q      (input/output) COMPLEX array, dimension (LDQ,N)
+           On entry, Q contains the eigenvectors of the partially solved
+           system which has been previously updated in matrix
+           multiplies with other partially solved eigensystems.
+           On exit, Q contains the trailing (N-K) updated eigenvectors
+           (those which were deflated) in its last N-K columns.
 
     LDQ    (input) INTEGER
-           The leading dimension of the array Q.  If eigenvectors are
-           desired, then  LDQ >= max(1,N).  In any case,  LDQ >= 1.
+           The leading dimension of the array Q.  LDQ >= max( 1, N ).
 
-    QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N)
-           Referenced only when ICOMPQ = 1.  Used to store parts of
-           the eigenvector matrix when the updating matrix multiplies
-           take place.
+    D      (input/output) REAL array, dimension (N)
+           On entry, D contains the eigenvalues of the two submatrices to
+           be combined.  On exit, D contains the trailing (N-K) updated
+           eigenvalues (those which were deflated) sorted into increasing
+           order.
 
-    LDQS   (input) INTEGER
-           The leading dimension of the array QSTORE.  If ICOMPQ = 1,
-           then  LDQS >= max(1,N).  In any case,  LDQS >= 1.
+    RHO    (input/output) REAL
+           Contains the off diagonal element associated with the rank-1
+           cut which originally split the two submatrices which are now
+           being recombined. RHO is modified during the computation to
+           the value required by SLAED3.
 
-    WORK   (workspace) DOUBLE PRECISION array,
-           If ICOMPQ = 0 or 1, the dimension of WORK must be at least
-                       1 + 3*N + 2*N*lg N + 2*N**2
-                       ( lg( N ) = smallest integer k
-                                   such that 2^k >= N )
-           If ICOMPQ = 2, the dimension of WORK must be at least
-                       4*N + N**2.
+    CUTPNT (input) INTEGER
+           Contains the location of the last eigenvalue in the leading
+           sub-matrix.  MIN(1,N) <= CUTPNT <= N.
 
-    IWORK  (workspace) INTEGER array,
-           If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
-                          6 + 6*N + 5*N*lg N.
-                          ( lg( N ) = smallest integer k
-                                      such that 2^k >= N )
-           If ICOMPQ = 2, the dimension of IWORK must be at least
-                          3 + 5*N.
+    Z      (input) REAL array, dimension (N)
+           On input this vector contains the updating vector (the last
+           row of the first sub-eigenvector matrix and the first row of
+           the second sub-eigenvector matrix).  The contents of Z are
+           destroyed during the updating process.
+
+    DLAMDA (output) REAL array, dimension (N)
+           Contains a copy of the first K eigenvalues which will be used
+           by SLAED3 to form the secular equation.
+
+    Q2     (output) COMPLEX array, dimension (LDQ2,N)
+           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
+           Contains a copy of the first K eigenvectors which will be used
+           by SLAED7 in a matrix multiply (SGEMM) to update the new
+           eigenvectors.
+
+    LDQ2   (input) INTEGER
+           The leading dimension of the array Q2.  LDQ2 >= max( 1, N ).
+
+    W      (output) REAL array, dimension (N)
+           This will hold the first k values of the final
+           deflation-altered z-vector and will be passed to SLAED3.
+
+    INDXP  (workspace) INTEGER array, dimension (N)
+           This will contain the permutation used to place deflated
+           values of D at the end of the array. On output INDXP(1:K)
+           points to the nondeflated D-values and INDXP(K+1:N)
+           points to the deflated eigenvalues.
+
+    INDX   (workspace) INTEGER array, dimension (N)
+           This will contain the permutation used to sort the contents of
+           D into ascending order.
+
+    INDXQ  (input) INTEGER array, dimension (N)
+           This contains the permutation which separately sorts the two
+           sub-problems in D into ascending order.  Note that elements in
+           the second half of this permutation must first have CUTPNT
+           added to their values in order to be accurate.
+
+    PERM   (output) INTEGER array, dimension (N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (output) INTEGER
+           Contains the number of Givens rotations which took place in
+           this subproblem.
+
+    GIVCOL (output) INTEGER array, dimension (2, N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (output) REAL array, dimension (2, N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
 
     INFO   (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The algorithm failed to compute an eigenvalue while
-                  working on the submatrix lying in rows and columns
-                  INFO/(N+1) through mod(INFO,N+1).
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
 
     =====================================================================
 
@@ -8885,34 +9613,40 @@ L170:
 */
 
     /* Parameter adjustments */
-    --d__;
-    --e;
     q_dim1 = *ldq;
     q_offset = 1 + q_dim1;
     q -= q_offset;
-    qstore_dim1 = *ldqs;
-    qstore_offset = 1 + qstore_dim1;
-    qstore -= qstore_offset;
-    --work;
-    --iwork;
+    --d__;
+    --z__;
+    --dlamda;
+    q2_dim1 = *ldq2;
+    q2_offset = 1 + q2_dim1;
+    q2 -= q2_offset;
+    --w;
+    --indxp;
+    --indx;
+    --indxq;
+    --perm;
+    givcol -= 3;
+    givnum -= 3;
 
     /* Function Body */
     *info = 0;
 
-    if (*icompq < 0 || *icompq > 2) {
-	*info = -1;
-    } else if (*icompq == 1 && *qsiz < max(0,*n)) {
+    if (*n < 0) {
 	*info = -2;
-    } else if (*n < 0) {
+    } else if (*qsiz < *n) {
 	*info = -3;
     } else if (*ldq < max(1,*n)) {
-	*info = -7;
-    } else if (*ldqs < max(1,*n)) {
-	*info = -9;
+	*info = -5;
+    } else if (*cutpnt < min(1,*n) || *cutpnt > *n) {
+	*info = -8;
+    } else if (*ldq2 < max(1,*n)) {
+	*info = -12;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLAED0", &i__1);
+	xerbla_("CLAED8", &i__1);
 	return 0;
     }
 
@@ -8922,269 +9656,268 @@ L170:
 	return 0;
     }
 
-    smlsiz = ilaenv_(&c__9, "DLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
+    n1 = *cutpnt;
+    n2 = *n - n1;
+    n1p1 = n1 + 1;
 
-/*
-       Determine the size and placement of the submatrices, and save in
-       the leading elements of IWORK.
-*/
+    if (*rho < 0.f) {
+	sscal_(&n2, &c_b1150, &z__[n1p1], &c__1);
+    }
 
-    iwork[1] = *n;
-    subpbs = 1;
-    tlvls = 0;
-L10:
-    if (iwork[subpbs] > smlsiz) {
-	for (j = subpbs; j >= 1; --j) {
-	    iwork[j * 2] = (iwork[j] + 1) / 2;
-	    iwork[(j << 1) - 1] = iwork[j] / 2;
+/*     Normalize z so that norm(z) = 1 */
+
+    t = 1.f / sqrt(2.f);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	indx[j] = j;
+/* L10: */
+    }
+    sscal_(n, &t, &z__[1], &c__1);
+    *rho = (r__1 = *rho * 2.f, dabs(r__1));
+
+/*     Sort the eigenvalues into increasing order */
+
+    i__1 = *n;
+    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
+	indxq[i__] += *cutpnt;
 /* L20: */
-	}
-	++tlvls;
-	subpbs <<= 1;
-	goto L10;
     }
-    i__1 = subpbs;
-    for (j = 2; j <= i__1; ++j) {
-	iwork[j] += iwork[j - 1];
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = d__[indxq[i__]];
+	w[i__] = z__[indxq[i__]];
 /* L30: */
     }
-
-/*
-       Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
-       using rank-1 modifications (cuts).
-*/
-
-    spm1 = subpbs - 1;
-    i__1 = spm1;
+    i__ = 1;
+    j = *cutpnt + 1;
+    slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
+    i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	submat = iwork[i__] + 1;
-	smm1 = submat - 1;
-	d__[smm1] -= (d__1 = e[smm1], abs(d__1));
-	d__[submat] -= (d__1 = e[smm1], abs(d__1));
+	d__[i__] = dlamda[indx[i__]];
+	z__[i__] = w[indx[i__]];
 /* L40: */
     }
 
-    indxq = (*n << 2) + 3;
-    if (*icompq != 2) {
+/*     Calculate the allowable deflation tolerance */
+
+    imax = isamax_(n, &z__[1], &c__1);
+    jmax = isamax_(n, &d__[1], &c__1);
+    eps = slamch_("Epsilon");
+    tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1));
 
 /*
-          Set up workspaces for eigenvalues only/accumulate new vectors
-          routine
+       If the rank-1 modifier is small enough, no more needs to be done
+       -- except to reorganize Q so that its columns correspond with the
+       elements in D.
 */
 
-	temp = log((doublereal) (*n)) / log(2.);
-	lgn = (integer) temp;
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	iprmpt = indxq + *n + 1;
-	iperm = iprmpt + *n * lgn;
-	iqptr = iperm + *n * lgn;
-	igivpt = iqptr + *n + 2;
-	igivcl = igivpt + *n * lgn;
-
-	igivnm = 1;
-	iq = igivnm + (*n << 1) * lgn;
-/* Computing 2nd power */
+    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
+	*k = 0;
 	i__1 = *n;
-	iwrem = iq + i__1 * i__1 + 1;
-
-/*        Initialize pointers */
-
-	i__1 = subpbs;
-	for (i__ = 0; i__ <= i__1; ++i__) {
-	    iwork[iprmpt + i__] = 1;
-	    iwork[igivpt + i__] = 1;
+	for (j = 1; j <= i__1; ++j) {
+	    perm[j] = indxq[indx[j]];
+	    ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
+		    , &c__1);
 /* L50: */
 	}
-	iwork[iqptr] = 1;
+	clacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
+	return 0;
     }
 
 /*
-       Solve each submatrix eigenproblem at the bottom of the divide and
-       conquer tree.
+       If there are multiple eigenvalues then the problem deflates.  Here
+       the number of equal eigenvalues are found.  As each equal
+       eigenvalue is found, an elementary reflector is computed to rotate
+       the corresponding eigensubspace so that the corresponding
+       components of Z are zero in this new basis.
 */
 
-    curr = 0;
-    i__1 = spm1;
-    for (i__ = 0; i__ <= i__1; ++i__) {
-	if (i__ == 0) {
-	    submat = 1;
-	    matsiz = iwork[1];
-	} else {
-	    submat = iwork[i__] + 1;
-	    matsiz = iwork[i__ + 1] - iwork[i__];
-	}
-	if (*icompq == 2) {
-	    dsteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat +
-		    submat * q_dim1], ldq, &work[1], info);
-	    if (*info != 0) {
-		goto L130;
+    *k = 0;
+    *givptr = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    indxp[k2] = j;
+	    if (j == *n) {
+		goto L100;
 	    }
 	} else {
-	    dsteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 +
-		    iwork[iqptr + curr]], &matsiz, &work[1], info);
-	    if (*info != 0) {
-		goto L130;
-	    }
-	    if (*icompq == 1) {
-		dgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b15, &q[submat *
-			q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]],
-			 &matsiz, &c_b29, &qstore[submat * qstore_dim1 + 1],
-			ldqs);
-	    }
-/* Computing 2nd power */
-	    i__2 = matsiz;
-	    iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
-	    ++curr;
+	    jlam = j;
+	    goto L70;
 	}
-	k = 1;
-	i__2 = iwork[i__ + 1];
-	for (j = submat; j <= i__2; ++j) {
-	    iwork[indxq + j] = k;
-	    ++k;
 /* L60: */
-	}
-/* L70: */
     }
+L70:
+    ++j;
+    if (j > *n) {
+	goto L90;
+    }
+    if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
 
-/*
-       Successively merge eigensystems of adjacent submatrices
-       into eigensystem for the corresponding larger matrix.
+/*        Deflate due to small z component. */
 
-       while ( SUBPBS > 1 )
-*/
+	--k2;
+	indxp[k2] = j;
+    } else {
 
-    curlvl = 1;
-L80:
-    if (subpbs > 1) {
-	spm2 = subpbs - 2;
-	i__1 = spm2;
-	for (i__ = 0; i__ <= i__1; i__ += 2) {
-	    if (i__ == 0) {
-		submat = 1;
-		matsiz = iwork[2];
-		msd2 = iwork[1];
-		curprb = 0;
-	    } else {
-		submat = iwork[i__] + 1;
-		matsiz = iwork[i__ + 2] - iwork[i__];
-		msd2 = matsiz / 2;
-		++curprb;
-	    }
+/*        Check if eigenvalues are close enough to allow deflation. */
+
+	s = z__[jlam];
+	c__ = z__[j];
 
 /*
-       Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
-       into an eigensystem of size MATSIZ.
-       DLAED1 is used only for the full eigensystem of a tridiagonal
-       matrix.
-       DLAED7 handles the cases in which eigenvalues only or eigenvalues
-       and eigenvectors of a full symmetric matrix (which was reduced to
-       tridiagonal form) are desired.
+          Find sqrt(a**2+b**2) without overflow or
+          destructive underflow.
 */
 
-	    if (*icompq == 2) {
-		dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1],
-			ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], &
-			msd2, &work[1], &iwork[subpbs + 1], info);
+	tau = slapy2_(&c__, &s);
+	t = d__[j] - d__[jlam];
+	c__ /= tau;
+	s = -s / tau;
+	if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    z__[j] = tau;
+	    z__[jlam] = 0.f;
+
+/*           Record the appropriate Givens rotation */
+
+	    ++(*givptr);
+	    givcol[(*givptr << 1) + 1] = indxq[indx[jlam]];
+	    givcol[(*givptr << 1) + 2] = indxq[indx[j]];
+	    givnum[(*givptr << 1) + 1] = c__;
+	    givnum[(*givptr << 1) + 2] = s;
+	    csrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[
+		    indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
+	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
+	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
+	    d__[jlam] = t;
+	    --k2;
+	    i__ = 1;
+L80:
+	    if (k2 + i__ <= *n) {
+		if (d__[jlam] < d__[indxp[k2 + i__]]) {
+		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+		    indxp[k2 + i__] = jlam;
+		    ++i__;
+		    goto L80;
+		} else {
+		    indxp[k2 + i__ - 1] = jlam;
+		}
 	    } else {
-		dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[
-			submat], &qstore[submat * qstore_dim1 + 1], ldqs, &
-			iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &
-			work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm]
-			, &iwork[igivpt], &iwork[igivcl], &work[igivnm], &
-			work[iwrem], &iwork[subpbs + 1], info);
-	    }
-	    if (*info != 0) {
-		goto L130;
+		indxp[k2 + i__ - 1] = jlam;
 	    }
-	    iwork[i__ / 2 + 1] = iwork[i__ + 2];
-/* L90: */
+	    jlam = j;
+	} else {
+	    ++(*k);
+	    w[*k] = z__[jlam];
+	    dlamda[*k] = d__[jlam];
+	    indxp[*k] = jlam;
+	    jlam = j;
 	}
-	subpbs /= 2;
-	++curlvl;
-	goto L80;
     }
+    goto L70;
+L90:
 
-/*
-       end while
+/*     Record the last eigenvalue. */
 
-       Re-merge the eigenvalues/vectors which were deflated at the final
-       merge step.
+    ++(*k);
+    w[*k] = z__[jlam];
+    dlamda[*k] = d__[jlam];
+    indxp[*k] = jlam;
+
+L100:
+
+/*
+       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+       and Q2 respectively.  The eigenvalues/vectors which were not
+       deflated go into the first K slots of DLAMDA and Q2 respectively,
+       while those which were deflated go into the last N - K slots.
 */
 
-    if (*icompq == 1) {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    j = iwork[indxq + i__];
-	    work[i__] = d__[j];
-	    dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1
-		    + 1], &c__1);
-/* L100: */
-	}
-	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
-    } else if (*icompq == 2) {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    j = iwork[indxq + i__];
-	    work[i__] = d__[j];
-	    dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	jp = indxp[j];
+	dlamda[j] = d__[jp];
+	perm[j] = indxq[indx[jp]];
+	ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], &
+		c__1);
 /* L110: */
-	}
-	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
-	dlacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq);
-    } else {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    j = iwork[indxq + i__];
-	    work[i__] = d__[j];
-/* L120: */
-	}
-	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
     }
-    goto L140;
 
-L130:
-    *info = submat * (*n + 1) + submat + matsiz - 1;
+/*
+       The deflated eigenvalues and their corresponding vectors go back
+       into the last N - K slots of D and Q respectively.
+*/
+
+    if (*k < *n) {
+	i__1 = *n - *k;
+	scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	i__1 = *n - *k;
+	clacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k +
+		1) * q_dim1 + 1], ldq);
+    }
 
-L140:
     return 0;
 
-/*     End of DLAED0 */
+/*     End of CLAED8 */
 
-} /* dlaed0_ */
+} /* claed8_ */
 
-/* Subroutine */ int dlaed1_(integer *n, doublereal *d__, doublereal *q,
-	integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt,
-	doublereal *work, integer *iwork, integer *info)
+/* Subroutine */ int clahqr_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w,
+	integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer *
+	info)
 {
     /* System generated locals */
-    integer q_dim1, q_offset, i__1, i__2;
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1, r__2, r__3, r__4, r__5, r__6;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+    void c_sqrt(complex *, complex *), r_cnjg(complex *, complex *);
+    double c_abs(complex *);
 
     /* Local variables */
-    static integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer indxp;
-    extern /* Subroutine */ int dlaed2_(integer *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
-	     integer *, integer *, integer *, integer *), dlaed3_(integer *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, doublereal *, doublereal *, integer *, integer *,
-	    doublereal *, doublereal *, integer *);
-    static integer idlmda;
-    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), xerbla_(char *, integer *);
-    static integer coltyp;
+    static integer i__, j, k, l, m;
+    static real s;
+    static complex t, u, v[2], x, y;
+    static integer i1, i2;
+    static complex t1;
+    static real t2;
+    static complex v2;
+    static real h10;
+    static complex h11;
+    static real h21;
+    static complex h22;
+    static integer nh, nz;
+    static complex h11s;
+    static integer itn, its;
+    static real ulp;
+    static complex sum;
+    static real tst1;
+    static complex temp;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *), ccopy_(integer *, complex *, integer *, complex *,
+	    integer *);
+    static real rtemp, rwork[1];
+    extern /* Subroutine */ int clarfg_(integer *, complex *, complex *,
+	    integer *, complex *);
+    extern /* Complex */ VOID cladiv_(complex *, complex *, complex *);
+    extern doublereal slamch_(char *), clanhs_(char *, integer *,
+	    complex *, integer *, real *);
+    static real smlnum;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
+    -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
        June 30, 1999
@@ -9193,2459 +9926,2618 @@ L140:
     Purpose
     =======
 
-    DLAED1 computes the updated eigensystem of a diagonal
-    matrix after modification by a rank-one symmetric matrix.  This
-    routine is used only for the eigenproblem which requires all
-    eigenvalues and eigenvectors of a tridiagonal matrix.  DLAED7 handles
-    the case in which eigenvalues only or eigenvalues and eigenvectors
-    of a full symmetric matrix (which was reduced to tridiagonal form)
-    are desired.
-
-      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
-
-       where Z = Q'u, u is a vector of length N with ones in the
-       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
-
-       The eigenvectors of the original matrix are stored in Q, and the
-       eigenvalues are in D.  The algorithm consists of three stages:
-
-          The first stage consists of deflating the size of the problem
-          when there are multiple eigenvalues or if there is a zero in
-          the Z vector.  For each such occurence the dimension of the
-          secular equation problem is reduced by one.  This stage is
-          performed by the routine DLAED2.
-
-          The second stage consists of calculating the updated
-          eigenvalues. This is done by finding the roots of the secular
-          equation via the routine DLAED4 (as called by DLAED3).
-          This routine also calculates the eigenvectors of the current
-          problem.
-
-          The final stage consists of computing the updated eigenvectors
-          directly using the updated eigenvalues.  The eigenvectors for
-          the current problem are multiplied with the eigenvectors from
-          the overall problem.
+    CLAHQR is an auxiliary routine called by CHSEQR to update the
+    eigenvalues and Schur decomposition already computed by CHSEQR, by
+    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
 
     Arguments
     =========
 
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry, the eigenvalues of the rank-1-perturbed matrix.
-           On exit, the eigenvalues of the repaired matrix.
+    WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
 
-    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
-           On entry, the eigenvectors of the rank-1-perturbed matrix.
-           On exit, the eigenvectors of the repaired tridiagonal matrix.
+    WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
 
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max(1,N).
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
 
-    INDXQ  (input/output) INTEGER array, dimension (N)
-           On entry, the permutation which separately sorts the two
-           subproblems in D into ascending order.
-           On exit, the permutation which will reintegrate the
-           subproblems back into sorted order,
-           i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper triangular in rows and
+            columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1).
+            CLAHQR works primarily with the Hessenberg submatrix in rows
+            and columns ILO to IHI, but applies transformations to all of
+            H if WANTT is .TRUE..
+            1 <= ILO <= max(1,IHI); IHI <= N.
 
-    RHO    (input) DOUBLE PRECISION
-           The subdiagonal entry used to create the rank-1 modification.
+    H       (input/output) COMPLEX array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if WANTT is .TRUE., H is upper triangular in rows
+            and columns ILO:IHI, with any 2-by-2 diagonal blocks in
+            standard form. If WANTT is .FALSE., the contents of H are
+            unspecified on exit.
 
-    CUTPNT (input) INTEGER
-           The location of the last eigenvalue in the leading sub-matrix.
-           min(1,N) <= CUTPNT <= N/2.
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
 
-    WORK   (workspace) DOUBLE PRECISION array, dimension (4*N + N**2)
+    W       (output) COMPLEX array, dimension (N)
+            The computed eigenvalues ILO to IHI are stored in the
+            corresponding elements of W. If WANTT is .TRUE., the
+            eigenvalues are stored in the same order as on the diagonal
+            of the Schur form returned in H, with W(i) = H(i,i).
 
-    IWORK  (workspace) INTEGER array, dimension (4*N)
+    ILOZ    (input) INTEGER
+    IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE..
+            1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
 
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an eigenvalue did not converge
+    Z       (input/output) COMPLEX array, dimension (LDZ,N)
+            If WANTZ is .TRUE., on entry Z must contain the current
+            matrix Z of transformations accumulated by CHSEQR, and on
+            exit Z has been updated; transformations are applied only to
+            the submatrix Z(ILOZ:IHIZ,ILO:IHI).
+            If WANTZ is .FALSE., Z is not referenced.
 
-    Further Details
-    ===============
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z. LDZ >= max(1,N).
 
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-    Modified by Francoise Tisseur, University of Tennessee.
+    INFO    (output) INTEGER
+            = 0: successful exit
+            > 0: if INFO = i, CLAHQR failed to compute all the
+                 eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1)
+                 iterations; elements i+1:ihi of W contain those
+                 eigenvalues which have been successfully computed.
 
     =====================================================================
-
-
-       Test the input parameters.
 */
 
+
     /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --indxq;
-    --work;
-    --iwork;
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --w;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
 
     /* Function Body */
     *info = 0;
 
-    if (*n < 0) {
-	*info = -1;
-    } else if (*ldq < max(1,*n)) {
-	*info = -4;
-    } else /* if(complicated condition) */ {
-/* Computing MIN */
-	i__1 = 1, i__2 = *n / 2;
-	if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) {
-	    *info = -7;
-	}
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAED1", &i__1);
-	return 0;
-    }
-
 /*     Quick return if possible */
 
     if (*n == 0) {
 	return 0;
     }
+    if (*ilo == *ihi) {
+	i__1 = *ilo;
+	i__2 = *ilo + *ilo * h_dim1;
+	w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
+	return 0;
+    }
+
+    nh = *ihi - *ilo + 1;
+    nz = *ihiz - *iloz + 1;
 
 /*
-       The following values are integer pointers which indicate
-       the portion of the workspace
-       used by a particular array in DLAED2 and DLAED3.
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
 */
 
-    iz = 1;
-    idlmda = iz + *n;
-    iw = idlmda + *n;
-    iq2 = iw + *n;
-
-    indx = 1;
-    indxc = indx + *n;
-    coltyp = indxc + *n;
-    indxp = coltyp + *n;
-
+    ulp = slamch_("Precision");
+    smlnum = slamch_("Safe minimum") / ulp;
 
 /*
-       Form the z-vector which consists of the last row of Q_1 and the
-       first row of Q_2.
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are set inside the main loop.
 */
 
-    dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
-    zpp1 = *cutpnt + 1;
-    i__1 = *n - *cutpnt;
-    dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);
+    if (*wantt) {
+	i1 = 1;
+	i2 = *n;
+    }
 
-/*     Deflate eigenvalues. */
+/*     ITN is the total number of QR iterations allowed. */
 
-    dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[
-	    iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[
-	    indxc], &iwork[indxp], &iwork[coltyp], info);
+    itn = nh * 30;
 
-    if (*info != 0) {
-	goto L20;
-    }
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of 1. Each iteration of the loop works
+       with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
 
-/*     Solve Secular Equation. */
+    i__ = *ihi;
+L10:
+    if (i__ < *ilo) {
+	goto L130;
+    }
 
-    if (k != 0) {
-	is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp +
-		1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
-	dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda],
-		 &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[
-		is], info);
-	if (*info != 0) {
-	    goto L20;
+/*
+       Perform QR iterations on rows and columns ILO to I until a
+       submatrix of order 1 splits off at the bottom because a
+       subdiagonal element has become negligible.
+*/
+
+    l = *ilo;
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    i__3 = k - 1 + (k - 1) * h_dim1;
+	    i__4 = k + k * h_dim1;
+	    tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[k -
+		    1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__4]
+		    .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]),
+		    dabs(r__4)));
+	    if (tst1 == 0.f) {
+		i__3 = i__ - l + 1;
+		tst1 = clanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork);
+	    }
+	    i__3 = k + (k - 1) * h_dim1;
+/* Computing MAX */
+	    r__2 = ulp * tst1;
+	    if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) {
+		goto L30;
+	    }
+/* L20: */
 	}
+L30:
+	l = k;
+	if (l > *ilo) {
 
-/*     Prepare the INDXQ sorting permutation. */
+/*           H(L,L-1) is negligible */
 
-	n1 = k;
-	n2 = *n - k;
-	dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
-    } else {
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    indxq[i__] = i__;
-/* L10: */
+	    i__2 = l + (l - 1) * h_dim1;
+	    h__[i__2].r = 0.f, h__[i__2].i = 0.f;
+	}
+
+/*        Exit from loop if a submatrix of order 1 has split off. */
+
+	if (l >= i__) {
+	    goto L120;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! (*wantt)) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if (its == 10 || its == 20) {
+
+/*           Exceptional shift. */
+
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    s = (r__1 = h__[i__2].r, dabs(r__1)) * .75f;
+	    i__2 = i__ + i__ * h_dim1;
+	    q__1.r = s + h__[i__2].r, q__1.i = h__[i__2].i;
+	    t.r = q__1.r, t.i = q__1.i;
+	} else {
+
+/*           Wilkinson's shift. */
+
+	    i__2 = i__ + i__ * h_dim1;
+	    t.r = h__[i__2].r, t.i = h__[i__2].i;
+	    i__2 = i__ - 1 + i__ * h_dim1;
+	    i__3 = i__ + (i__ - 1) * h_dim1;
+	    r__1 = h__[i__3].r;
+	    q__1.r = r__1 * h__[i__2].r, q__1.i = r__1 * h__[i__2].i;
+	    u.r = q__1.r, u.i = q__1.i;
+	    if (u.r != 0.f || u.i != 0.f) {
+		i__2 = i__ - 1 + (i__ - 1) * h_dim1;
+		q__2.r = h__[i__2].r - t.r, q__2.i = h__[i__2].i - t.i;
+		q__1.r = q__2.r * .5f, q__1.i = q__2.i * .5f;
+		x.r = q__1.r, x.i = q__1.i;
+		q__3.r = x.r * x.r - x.i * x.i, q__3.i = x.r * x.i + x.i *
+			x.r;
+		q__2.r = q__3.r + u.r, q__2.i = q__3.i + u.i;
+		c_sqrt(&q__1, &q__2);
+		y.r = q__1.r, y.i = q__1.i;
+		if (x.r * y.r + r_imag(&x) * r_imag(&y) < 0.f) {
+		    q__1.r = -y.r, q__1.i = -y.i;
+		    y.r = q__1.r, y.i = q__1.i;
+		}
+		q__3.r = x.r + y.r, q__3.i = x.i + y.i;
+		cladiv_(&q__2, &u, &q__3);
+		q__1.r = t.r - q__2.r, q__1.i = t.i - q__2.i;
+		t.r = q__1.r, t.i = q__1.i;
+	    }
+	}
+
+/*        Look for two consecutive small subdiagonal elements. */
+
+	i__2 = l + 1;
+	for (m = i__ - 1; m >= i__2; --m) {
+
+/*
+             Determine the effect of starting the single-shift QR
+             iteration at row M, and see if this would make H(M,M-1)
+             negligible.
+*/
+
+	    i__3 = m + m * h_dim1;
+	    h11.r = h__[i__3].r, h11.i = h__[i__3].i;
+	    i__3 = m + 1 + (m + 1) * h_dim1;
+	    h22.r = h__[i__3].r, h22.i = h__[i__3].i;
+	    q__1.r = h11.r - t.r, q__1.i = h11.i - t.i;
+	    h11s.r = q__1.r, h11s.i = q__1.i;
+	    i__3 = m + 1 + m * h_dim1;
+	    h21 = h__[i__3].r;
+	    s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(
+		    r__2)) + dabs(h21);
+	    q__1.r = h11s.r / s, q__1.i = h11s.i / s;
+	    h11s.r = q__1.r, h11s.i = q__1.i;
+	    h21 /= s;
+	    v[0].r = h11s.r, v[0].i = h11s.i;
+	    v[1].r = h21, v[1].i = 0.f;
+	    i__3 = m + (m - 1) * h_dim1;
+	    h10 = h__[i__3].r;
+	    tst1 = ((r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(
+		    r__2))) * ((r__3 = h11.r, dabs(r__3)) + (r__4 = r_imag(&
+		    h11), dabs(r__4)) + ((r__5 = h22.r, dabs(r__5)) + (r__6 =
+		    r_imag(&h22), dabs(r__6))));
+	    if ((r__1 = h10 * h21, dabs(r__1)) <= ulp * tst1) {
+		goto L50;
+	    }
+/* L40: */
+	}
+	i__2 = l + l * h_dim1;
+	h11.r = h__[i__2].r, h11.i = h__[i__2].i;
+	i__2 = l + 1 + (l + 1) * h_dim1;
+	h22.r = h__[i__2].r, h22.i = h__[i__2].i;
+	q__1.r = h11.r - t.r, q__1.i = h11.i - t.i;
+	h11s.r = q__1.r, h11s.i = q__1.i;
+	i__2 = l + 1 + l * h_dim1;
+	h21 = h__[i__2].r;
+	s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(r__2))
+		+ dabs(h21);
+	q__1.r = h11s.r / s, q__1.i = h11s.i / s;
+	h11s.r = q__1.r, h11s.i = q__1.i;
+	h21 /= s;
+	v[0].r = h11s.r, v[0].i = h11s.i;
+	v[1].r = h21, v[1].i = 0.f;
+L50:
+
+/*        Single-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = m; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix.
+
+             V(2) is always real before the call to CLARFG, and hence
+             after the call T2 ( = T1*V(2) ) is also real.
+*/
+
+	    if (k > m) {
+		ccopy_(&c__2, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    clarfg_(&c__2, v, &v[1], &c__1, &t1);
+	    if (k > m) {
+		i__3 = k + (k - 1) * h_dim1;
+		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
+		i__3 = k + 1 + (k - 1) * h_dim1;
+		h__[i__3].r = 0.f, h__[i__3].i = 0.f;
+	    }
+	    v2.r = v[1].r, v2.i = v[1].i;
+	    q__1.r = t1.r * v2.r - t1.i * v2.i, q__1.i = t1.r * v2.i + t1.i *
+		    v2.r;
+	    t2 = q__1.r;
+
+/*
+             Apply G from the left to transform the rows of the matrix
+             in columns K to I2.
+*/
+
+	    i__3 = i2;
+	    for (j = k; j <= i__3; ++j) {
+		r_cnjg(&q__3, &t1);
+		i__4 = k + j * h_dim1;
+		q__2.r = q__3.r * h__[i__4].r - q__3.i * h__[i__4].i, q__2.i =
+			 q__3.r * h__[i__4].i + q__3.i * h__[i__4].r;
+		i__5 = k + 1 + j * h_dim1;
+		q__4.r = t2 * h__[i__5].r, q__4.i = t2 * h__[i__5].i;
+		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+		sum.r = q__1.r, sum.i = q__1.i;
+		i__4 = k + j * h_dim1;
+		i__5 = k + j * h_dim1;
+		q__1.r = h__[i__5].r - sum.r, q__1.i = h__[i__5].i - sum.i;
+		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
+		i__4 = k + 1 + j * h_dim1;
+		i__5 = k + 1 + j * h_dim1;
+		q__2.r = sum.r * v2.r - sum.i * v2.i, q__2.i = sum.r * v2.i +
+			sum.i * v2.r;
+		q__1.r = h__[i__5].r - q__2.r, q__1.i = h__[i__5].i - q__2.i;
+		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
+/* L60: */
+	    }
+
+/*
+             Apply G from the right to transform the columns of the
+             matrix in rows I1 to min(K+2,I).
+
+   Computing MIN
+*/
+	    i__4 = k + 2;
+	    i__3 = min(i__4,i__);
+	    for (j = i1; j <= i__3; ++j) {
+		i__4 = j + k * h_dim1;
+		q__2.r = t1.r * h__[i__4].r - t1.i * h__[i__4].i, q__2.i =
+			t1.r * h__[i__4].i + t1.i * h__[i__4].r;
+		i__5 = j + (k + 1) * h_dim1;
+		q__3.r = t2 * h__[i__5].r, q__3.i = t2 * h__[i__5].i;
+		q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+		sum.r = q__1.r, sum.i = q__1.i;
+		i__4 = j + k * h_dim1;
+		i__5 = j + k * h_dim1;
+		q__1.r = h__[i__5].r - sum.r, q__1.i = h__[i__5].i - sum.i;
+		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
+		i__4 = j + (k + 1) * h_dim1;
+		i__5 = j + (k + 1) * h_dim1;
+		r_cnjg(&q__3, &v2);
+		q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r *
+			q__3.i + sum.i * q__3.r;
+		q__1.r = h__[i__5].r - q__2.r, q__1.i = h__[i__5].i - q__2.i;
+		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
+/* L70: */
+	    }
+
+	    if (*wantz) {
+
+/*              Accumulate transformations in the matrix Z */
+
+		i__3 = *ihiz;
+		for (j = *iloz; j <= i__3; ++j) {
+		    i__4 = j + k * z_dim1;
+		    q__2.r = t1.r * z__[i__4].r - t1.i * z__[i__4].i, q__2.i =
+			     t1.r * z__[i__4].i + t1.i * z__[i__4].r;
+		    i__5 = j + (k + 1) * z_dim1;
+		    q__3.r = t2 * z__[i__5].r, q__3.i = t2 * z__[i__5].i;
+		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+		    sum.r = q__1.r, sum.i = q__1.i;
+		    i__4 = j + k * z_dim1;
+		    i__5 = j + k * z_dim1;
+		    q__1.r = z__[i__5].r - sum.r, q__1.i = z__[i__5].i -
+			    sum.i;
+		    z__[i__4].r = q__1.r, z__[i__4].i = q__1.i;
+		    i__4 = j + (k + 1) * z_dim1;
+		    i__5 = j + (k + 1) * z_dim1;
+		    r_cnjg(&q__3, &v2);
+		    q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r *
+			     q__3.i + sum.i * q__3.r;
+		    q__1.r = z__[i__5].r - q__2.r, q__1.i = z__[i__5].i -
+			    q__2.i;
+		    z__[i__4].r = q__1.r, z__[i__4].i = q__1.i;
+/* L80: */
+		}
+	    }
+
+	    if (k == m && m > l) {
+
+/*
+                If the QR step was started at row M > L because two
+                consecutive small subdiagonals were found, then extra
+                scaling must be performed to ensure that H(M,M-1) remains
+                real.
+*/
+
+		q__1.r = 1.f - t1.r, q__1.i = 0.f - t1.i;
+		temp.r = q__1.r, temp.i = q__1.i;
+		r__1 = c_abs(&temp);
+		q__1.r = temp.r / r__1, q__1.i = temp.i / r__1;
+		temp.r = q__1.r, temp.i = q__1.i;
+		i__3 = m + 1 + m * h_dim1;
+		i__4 = m + 1 + m * h_dim1;
+		r_cnjg(&q__2, &temp);
+		q__1.r = h__[i__4].r * q__2.r - h__[i__4].i * q__2.i, q__1.i =
+			 h__[i__4].r * q__2.i + h__[i__4].i * q__2.r;
+		h__[i__3].r = q__1.r, h__[i__3].i = q__1.i;
+		if (m + 2 <= i__) {
+		    i__3 = m + 2 + (m + 1) * h_dim1;
+		    i__4 = m + 2 + (m + 1) * h_dim1;
+		    q__1.r = h__[i__4].r * temp.r - h__[i__4].i * temp.i,
+			    q__1.i = h__[i__4].r * temp.i + h__[i__4].i *
+			    temp.r;
+		    h__[i__3].r = q__1.r, h__[i__3].i = q__1.i;
+		}
+		i__3 = i__;
+		for (j = m; j <= i__3; ++j) {
+		    if (j != m + 1) {
+			if (i2 > j) {
+			    i__4 = i2 - j;
+			    cscal_(&i__4, &temp, &h__[j + (j + 1) * h_dim1],
+				    ldh);
+			}
+			i__4 = j - i1;
+			r_cnjg(&q__1, &temp);
+			cscal_(&i__4, &q__1, &h__[i1 + j * h_dim1], &c__1);
+			if (*wantz) {
+			    r_cnjg(&q__1, &temp);
+			    cscal_(&nz, &q__1, &z__[*iloz + j * z_dim1], &
+				    c__1);
+			}
+		    }
+/* L90: */
+		}
+	    }
+/* L100: */
+	}
+
+/*        Ensure that H(I,I-1) is real. */
+
+	i__2 = i__ + (i__ - 1) * h_dim1;
+	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
+	if (r_imag(&temp) != 0.f) {
+	    rtemp = c_abs(&temp);
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
+	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
+	    temp.r = q__1.r, temp.i = q__1.i;
+	    if (i2 > i__) {
+		i__2 = i2 - i__;
+		r_cnjg(&q__1, &temp);
+		cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
+	    }
+	    i__2 = i__ - i1;
+	    cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
+	    if (*wantz) {
+		cscal_(&nz, &temp, &z__[*iloz + i__ * z_dim1], &c__1);
+	    }
 	}
+
+/* L110: */
     }
 
-L20:
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
     return 0;
 
-/*     End of DLAED1 */
+L120:
 
-} /* dlaed1_ */
+/*     H(I,I-1) is negligible: one eigenvalue has converged. */
 
-/* Subroutine */ int dlaed2_(integer *k, integer *n, integer *n1, doublereal *
-	d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho,
-	doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2,
-	integer *indx, integer *indxc, integer *indxp, integer *coltyp,
-	integer *info)
+    i__1 = i__;
+    i__2 = i__ + i__ * h_dim1;
+    w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L10;
+
+L130:
+    return 0;
+
+/*     End of CLAHQR */
+
+} /* clahqr_ */
+
+/* Subroutine */ int clahrd_(integer *n, integer *k, integer *nb, complex *a,
+	integer *lda, complex *tau, complex *t, integer *ldt, complex *y,
+	integer *ldy)
 {
     /* System generated locals */
-    integer q_dim1, q_offset, i__1, i__2;
-    doublereal d__1, d__2, d__3, d__4;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
+    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+    complex q__1;
 
     /* Local variables */
-    static doublereal c__;
-    static integer i__, j;
-    static doublereal s, t;
-    static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
-    static doublereal eps, tau, tol;
-    static integer psm[4], imax, jmax;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static integer ctot[4];
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *), dcopy_(integer *, doublereal *, integer *, doublereal
-	    *, integer *);
-
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), dlacpy_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
+    static integer i__;
+    static complex ei;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *), cgemv_(char *, integer *, integer *, complex *,
+	    complex *, integer *, complex *, integer *, complex *, complex *,
+	    integer *), ccopy_(integer *, complex *, integer *,
+	    complex *, integer *), caxpy_(integer *, complex *, complex *,
+	    integer *, complex *, integer *), ctrmv_(char *, char *, char *,
+	    integer *, complex *, integer *, complex *, integer *), clarfg_(integer *, complex *, complex *, integer
+	    *, complex *), clacgv_(integer *, complex *, integer *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
+    -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+       June 30, 1999
 
 
     Purpose
     =======
 
-    DLAED2 merges the two sets of eigenvalues together into a single
-    sorted set.  Then it tries to deflate the size of the problem.
-    There are two ways in which deflation can occur:  when two or more
-    eigenvalues are close together or if there is a tiny entry in the
-    Z vector.  For each such occurrence the order of the related secular
-    equation problem is reduced by one.
+    CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1)
+    matrix A so that elements below the k-th subdiagonal are zero. The
+    reduction is performed by a unitary similarity transformation
+    Q' * A * Q. The routine returns the matrices V and T which determine
+    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+
+    This is an auxiliary routine called by CGEHRD.
 
     Arguments
     =========
 
-    K      (output) INTEGER
-           The number of non-deflated eigenvalues, and the order of the
-           related secular equation. 0 <= K <=N.
+    N       (input) INTEGER
+            The order of the matrix A.
 
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+    K       (input) INTEGER
+            The offset for the reduction. Elements below the k-th
+            subdiagonal in the first NB columns are reduced to zero.
 
-    N1     (input) INTEGER
-           The location of the last eigenvalue in the leading sub-matrix.
-           min(1,N) <= N1 <= N/2.
+    NB      (input) INTEGER
+            The number of columns to be reduced.
 
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry, D contains the eigenvalues of the two submatrices to
-           be combined.
-           On exit, D contains the trailing (N-K) updated eigenvalues
-           (those which were deflated) sorted into increasing order.
+    A       (input/output) COMPLEX array, dimension (LDA,N-K+1)
+            On entry, the n-by-(n-k+1) general matrix A.
+            On exit, the elements on and above the k-th subdiagonal in
+            the first NB columns are overwritten with the corresponding
+            elements of the reduced matrix; the elements below the k-th
+            subdiagonal, with the array TAU, represent the matrix Q as a
+            product of elementary reflectors. The other columns of A are
+            unchanged. See Further Details.
 
-    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
-           On entry, Q contains the eigenvectors of two submatrices in
-           the two square blocks with corners at (1,1), (N1,N1)
-           and (N1+1, N1+1), (N,N).
-           On exit, Q contains the trailing (N-K) updated eigenvectors
-           (those which were deflated) in its last N-K columns.
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max(1,N).
+    TAU     (output) COMPLEX array, dimension (NB)
+            The scalar factors of the elementary reflectors. See Further
+            Details.
 
-    INDXQ  (input/output) INTEGER array, dimension (N)
-           The permutation which separately sorts the two sub-problems
-           in D into ascending order.  Note that elements in the second
-           half of this permutation must first have N1 added to their
-           values. Destroyed on exit.
+    T       (output) COMPLEX array, dimension (LDT,NB)
+            The upper triangular matrix T.
 
-    RHO    (input/output) DOUBLE PRECISION
-           On entry, the off-diagonal element associated with the rank-1
-           cut which originally split the two submatrices which are now
-           being recombined.
-           On exit, RHO has been modified to the value required by
-           DLAED3.
+    LDT     (input) INTEGER
+            The leading dimension of the array T.  LDT >= NB.
 
-    Z      (input) DOUBLE PRECISION array, dimension (N)
-           On entry, Z contains the updating vector (the last
-           row of the first sub-eigenvector matrix and the first row of
-           the second sub-eigenvector matrix).
-           On exit, the contents of Z have been destroyed by the updating
-           process.
+    Y       (output) COMPLEX array, dimension (LDY,NB)
+            The n-by-nb matrix Y.
 
-    DLAMDA (output) DOUBLE PRECISION array, dimension (N)
-           A copy of the first K eigenvalues which will be used by
-           DLAED3 to form the secular equation.
+    LDY     (input) INTEGER
+            The leading dimension of the array Y. LDY >= max(1,N).
 
-    W      (output) DOUBLE PRECISION array, dimension (N)
-           The first k values of the final deflation-altered z-vector
-           which will be passed to DLAED3.
+    Further Details
+    ===============
 
-    Q2     (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
-           A copy of the first K eigenvectors which will be used by
-           DLAED3 in a matrix multiply (DGEMM) to solve for the new
-           eigenvectors.
+    The matrix Q is represented as a product of nb elementary reflectors
 
-    INDX   (workspace) INTEGER array, dimension (N)
-           The permutation used to sort the contents of DLAMDA into
-           ascending order.
+       Q = H(1) H(2) . . . H(nb).
 
-    INDXC  (output) INTEGER array, dimension (N)
-           The permutation used to arrange the columns of the deflated
-           Q matrix into three groups:  the first group contains non-zero
-           elements only at and above N1, the second contains
-           non-zero elements only below N1, and the third is dense.
+    Each H(i) has the form
 
-    INDXP  (workspace) INTEGER array, dimension (N)
-           The permutation used to place deflated values of D at the end
-           of the array.  INDXP(1:K) points to the nondeflated D-values
-           and INDXP(K+1:N) points to the deflated eigenvalues.
+       H(i) = I - tau * v * v'
 
-    COLTYP (workspace/output) INTEGER array, dimension (N)
-           During execution, a label which will indicate which of the
-           following types a column in the Q2 matrix is:
-           1 : non-zero in the upper half only;
-           2 : dense;
-           3 : non-zero in the lower half only;
-           4 : deflated.
-           On exit, COLTYP(i) is the number of columns of type i,
-           for i=1 to 4 only.
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
+    A(i+k+1:n,i), and tau in TAU(i).
 
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+    The elements of the vectors v together form the (n-k+1)-by-nb matrix
+    V which is needed, with T and Y, to apply the transformation to the
+    unreduced part of the matrix, using an update of the form:
+    A := (I - V*T*V') * (A - Y*V').
 
-    Further Details
-    ===============
+    The contents of A on exit are illustrated by the following example
+    with n = 7, k = 3 and nb = 2:
 
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-    Modified by Francoise Tisseur, University of Tennessee.
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( h   h   a   a   a )
+       ( v1  h   a   a   a )
+       ( v1  v2  a   a   a )
+       ( v1  v2  a   a   a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
 
     =====================================================================
 
 
-       Test the input parameters.
+       Quick return if possible
 */
 
     /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --indxq;
-    --z__;
-    --dlamda;
-    --w;
-    --q2;
-    --indx;
-    --indxc;
-    --indxp;
-    --coltyp;
+    --tau;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
 
     /* Function Body */
-    *info = 0;
-
-    if (*n < 0) {
-	*info = -2;
-    } else if (*ldq < max(1,*n)) {
-	*info = -6;
-    } else /* if(complicated condition) */ {
-/* Computing MIN */
-	i__1 = 1, i__2 = *n / 2;
-	if (min(i__1,i__2) > *n1 || *n / 2 < *n1) {
-	    *info = -3;
-	}
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAED2", &i__1);
+    if (*n <= 1) {
 	return 0;
     }
 
-/*     Quick return if possible */
+    i__1 = *nb;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (i__ > 1) {
 
-    if (*n == 0) {
-	return 0;
-    }
+/*
+             Update A(1:n,i)
 
-    n2 = *n - *n1;
-    n1p1 = *n1 + 1;
+             Compute i-th column of A - Y * V'
+*/
 
-    if (*rho < 0.) {
-	dscal_(&n2, &c_b151, &z__[n1p1], &c__1);
-    }
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
+	    i__2 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &a[*k
+		    + i__ - 1 + a_dim1], lda, &c_b56, &a[i__ * a_dim1 + 1], &
+		    c__1);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
 
 /*
-       Normalize z so that norm(z) = 1.  Since z is the concatenation of
-       two normalized vectors, norm2(z) = sqrt(2).
-*/
+             Apply I - V * T' * V' to this column (call it b) from the
+             left, using the last column of T as workspace
 
-    t = 1. / sqrt(2.);
-    dscal_(n, &t, &z__[1], &c__1);
+             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
+                      ( V2 )             ( b2 )
 
-/*     RHO = ABS( norm(z)**2 * RHO ) */
+             where V1 is unit lower triangular
 
-    *rho = (d__1 = *rho * 2., abs(d__1));
+             w := V1' * b1
+*/
 
-/*     Sort the eigenvalues into increasing order */
+	    i__2 = i__ - 1;
+	    ccopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
+		    1], &c__1);
+	    i__2 = i__ - 1;
+	    ctrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a[*k + 1 +
+		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1);
 
-    i__1 = *n;
-    for (i__ = n1p1; i__ <= i__1; ++i__) {
-	indxq[i__] += *n1;
-/* L10: */
-    }
+/*           w := w + V2'*b2 */
 
-/*     re-integrate the deflated parts from the last pass */
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[*k + i__ +
+		    a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56,
+		    &t[*nb * t_dim1 + 1], &c__1);
 
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dlamda[i__] = d__[indxq[i__]];
-/* L20: */
-    }
-    dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	indx[i__] = indxq[indxc[i__]];
-/* L30: */
-    }
+/*           w := T'*w */
 
-/*     Calculate the allowable deflation tolerance */
+	    i__2 = i__ - 1;
+	    ctrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[
+		    t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1);
 
-    imax = idamax_(n, &z__[1], &c__1);
-    jmax = idamax_(n, &d__[1], &c__1);
-    eps = EPSILON;
-/* Computing MAX */
-    d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2))
-	    ;
-    tol = eps * 8. * max(d__3,d__4);
+/*           b2 := b2 - V2*w */
 
-/*
-       If the rank-1 modifier is small enough, no more needs to be done
-       except to reorganize Q so that its columns correspond with the
-       elements in D.
-*/
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[*k + i__ + a_dim1],
+		     lda, &t[*nb * t_dim1 + 1], &c__1, &c_b56, &a[*k + i__ +
+		    i__ * a_dim1], &c__1);
 
-    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
-	*k = 0;
-	iq2 = 1;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__ = indx[j];
-	    dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
-	    dlamda[j] = d__[i__];
-	    iq2 += *n;
-/* L40: */
+/*           b1 := b1 - V1*w */
+
+	    i__2 = i__ - 1;
+	    ctrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
+		    , lda, &t[*nb * t_dim1 + 1], &c__1);
+	    i__2 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    caxpy_(&i__2, &q__1, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__
+		    * a_dim1], &c__1);
+
+	    i__2 = *k + i__ - 1 + (i__ - 1) * a_dim1;
+	    a[i__2].r = ei.r, a[i__2].i = ei.i;
 	}
-	dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
-	dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
-	goto L190;
-    }
 
 /*
-       If there are multiple eigenvalues then the problem deflates.  Here
-       the number of equal eigenvalues are found.  As each equal
-       eigenvalue is found, an elementary reflector is computed to rotate
-       the corresponding eigensubspace so that the corresponding
-       components of Z are zero in this new basis.
+          Generate the elementary reflector H(i) to annihilate
+          A(k+i+1:n,i)
 */
 
-    i__1 = *n1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	coltyp[i__] = 1;
-/* L50: */
-    }
-    i__1 = *n;
-    for (i__ = n1p1; i__ <= i__1; ++i__) {
-	coltyp[i__] = 3;
-/* L60: */
+	i__2 = *k + i__ + i__ * a_dim1;
+	ei.r = a[i__2].r, ei.i = a[i__2].i;
+	i__2 = *n - *k - i__ + 1;
+/* Computing MIN */
+	i__3 = *k + i__ + 1;
+	clarfg_(&i__2, &ei, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[i__])
+		;
+	i__2 = *k + i__ + i__ * a_dim1;
+	a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*        Compute  Y(1:n,i) */
+
+	i__2 = *n - *k - i__ + 1;
+	cgemv_("No transpose", n, &i__2, &c_b56, &a[(i__ + 1) * a_dim1 + 1],
+		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b55, &y[i__ *
+		y_dim1 + 1], &c__1);
+	i__2 = *n - *k - i__ + 1;
+	i__3 = i__ - 1;
+	cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[*k + i__ +
+		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b55, &t[
+		i__ * t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	q__1.r = -1.f, q__1.i = -0.f;
+	cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &t[i__ *
+		t_dim1 + 1], &c__1, &c_b56, &y[i__ * y_dim1 + 1], &c__1);
+	cscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+
+/*        Compute T(1:i,i) */
+
+	i__2 = i__ - 1;
+	i__3 = i__;
+	q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
+	cscal_(&i__2, &q__1, &t[i__ * t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		&t[i__ * t_dim1 + 1], &c__1)
+		;
+	i__2 = i__ + i__ * t_dim1;
+	i__3 = i__;
+	t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
+
+/* L10: */
     }
+    i__1 = *k + *nb + *nb * a_dim1;
+    a[i__1].r = ei.r, a[i__1].i = ei.i;
 
+    return 0;
 
-    *k = 0;
-    k2 = *n + 1;
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	nj = indx[j];
-	if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
+/*     End of CLAHRD */
 
-/*           Deflate due to small z component. */
+} /* clahrd_ */
 
-	    --k2;
-	    coltyp[nj] = 4;
-	    indxp[k2] = nj;
-	    if (j == *n) {
-		goto L100;
-	    }
-	} else {
-	    pj = nj;
-	    goto L80;
-	}
-/* L70: */
-    }
-L80:
-    ++j;
-    nj = indx[j];
-    if (j > *n) {
-	goto L100;
-    }
-    if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
-
-/*        Deflate due to small z component. */
+doublereal clange_(char *norm, integer *m, integer *n, complex *a, integer *
+	lda, real *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real ret_val, r__1, r__2;
 
-	--k2;
-	coltyp[nj] = 4;
-	indxp[k2] = nj;
-    } else {
+    /* Builtin functions */
+    double c_abs(complex *), sqrt(doublereal);
 
-/*        Check if eigenvalues are close enough to allow deflation. */
+    /* Local variables */
+    static integer i__, j;
+    static real sum, scale;
+    extern logical lsame_(char *, char *);
+    static real value;
+    extern /* Subroutine */ int classq_(integer *, complex *, integer *, real
+	    *, real *);
 
-	s = z__[pj];
-	c__ = z__[nj];
 
 /*
-          Find sqrt(a**2+b**2) without overflow or
-          destructive underflow.
-*/
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
 
-	tau = dlapy2_(&c__, &s);
-	t = d__[nj] - d__[pj];
-	c__ /= tau;
-	s = -s / tau;
-	if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
 
-/*           Deflation is possible. */
+    Purpose
+    =======
 
-	    z__[nj] = tau;
-	    z__[pj] = 0.;
-	    if (coltyp[nj] != coltyp[pj]) {
-		coltyp[nj] = 2;
-	    }
-	    coltyp[pj] = 4;
-	    drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
-		    c__, &s);
-/* Computing 2nd power */
-	    d__1 = c__;
-/* Computing 2nd power */
-	    d__2 = s;
-	    t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
-/* Computing 2nd power */
-	    d__1 = s;
-/* Computing 2nd power */
-	    d__2 = c__;
-	    d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
-	    d__[pj] = t;
-	    --k2;
-	    i__ = 1;
-L90:
-	    if (k2 + i__ <= *n) {
-		if (d__[pj] < d__[indxp[k2 + i__]]) {
-		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
-		    indxp[k2 + i__] = pj;
-		    ++i__;
-		    goto L90;
-		} else {
-		    indxp[k2 + i__ - 1] = pj;
-		}
-	    } else {
-		indxp[k2 + i__ - 1] = pj;
-	    }
-	    pj = nj;
-	} else {
-	    ++(*k);
-	    dlamda[*k] = d__[pj];
-	    w[*k] = z__[pj];
-	    indxp[*k] = pj;
-	    pj = nj;
-	}
-    }
-    goto L80;
-L100:
+    CLANGE  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    complex matrix A.
 
-/*     Record the last eigenvalue. */
+    Description
+    ===========
 
-    ++(*k);
-    dlamda[*k] = d__[pj];
-    w[*k] = z__[pj];
-    indxp[*k] = pj;
+    CLANGE returns the value
 
-/*
-       Count up the total number of the various types of columns, then
-       form a permutation which positions the four column types into
-       four uniform groups (although one or more of these groups may be
-       empty).
-*/
+       CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
 
-    for (j = 1; j <= 4; ++j) {
-	ctot[j - 1] = 0;
-/* L110: */
-    }
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	ct = coltyp[j];
-	++ctot[ct - 1];
-/* L120: */
-    }
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
 
-/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
+    Arguments
+    =========
 
-    psm[0] = 1;
-    psm[1] = ctot[0] + 1;
-    psm[2] = psm[1] + ctot[1];
-    psm[3] = psm[2] + ctot[2];
-    *k = *n - ctot[3];
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in CLANGE as described
+            above.
 
-/*
-       Fill out the INDXC array so that the permutation which it induces
-       will place all type-1 columns first, all type-2 columns next,
-       then all type-3's, and finally all type-4's.
-*/
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.  When M = 0,
+            CLANGE is set to zero.
 
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	js = indxp[j];
-	ct = coltyp[js];
-	indx[psm[ct - 1]] = js;
-	indxc[psm[ct - 1]] = j;
-	++psm[ct - 1];
-/* L130: */
-    }
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.  When N = 0,
+            CLANGE is set to zero.
 
-/*
-       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
-       and Q2 respectively.  The eigenvalues/vectors which were not
-       deflated go into the first K slots of DLAMDA and Q2 respectively,
-       while those which were deflated go into the last N - K slots.
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(M,1).
+
+    WORK    (workspace) REAL array, dimension (LWORK),
+            where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
 */
 
-    i__ = 1;
-    iq1 = 1;
-    iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
-    i__1 = ctot[0];
-    for (j = 1; j <= i__1; ++j) {
-	js = indx[i__];
-	dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
-	z__[i__] = d__[js];
-	++i__;
-	iq1 += *n1;
-/* L140: */
-    }
 
-    i__1 = ctot[1];
-    for (j = 1; j <= i__1; ++j) {
-	js = indx[i__];
-	dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
-	dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
-	z__[i__] = d__[js];
-	++i__;
-	iq1 += *n1;
-	iq2 += n2;
-/* L150: */
-    }
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
 
-    i__1 = ctot[2];
-    for (j = 1; j <= i__1; ++j) {
-	js = indx[i__];
-	dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
-	z__[i__] = d__[js];
-	++i__;
-	iq2 += n2;
-/* L160: */
-    }
+    /* Function Body */
+    if (min(*m,*n) == 0) {
+	value = 0.f;
+    } else if (lsame_(norm, "M")) {
 
-    iq1 = iq2;
-    i__1 = ctot[3];
-    for (j = 1; j <= i__1; ++j) {
-	js = indx[i__];
-	dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
-	iq2 += *n;
-	z__[i__] = d__[js];
-	++i__;
-/* L170: */
-    }
+/*        Find max(abs(A(i,j))). */
 
-/*
-       The deflated eigenvalues and their corresponding vectors go back
-       into the last N - K slots of D and Q respectively.
-*/
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
+		value = dmax(r__1,r__2);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1') {
 
-    dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
-    i__1 = *n - *k;
-    dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
+/*        Find norm1(A). */
 
-/*     Copy CTOT into COLTYP for referencing in DLAED3. */
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.f;
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += c_abs(&a[i__ + j * a_dim1]);
+/* L30: */
+	    }
+	    value = dmax(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
 
-    for (j = 1; j <= 4; ++j) {
-	coltyp[j] = ctot[j - 1];
-/* L180: */
+/*        Find normI(A). */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.f;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += c_abs(&a[i__ + j * a_dim1]);
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.f;
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    r__1 = value, r__2 = work[i__];
+	    value = dmax(r__1,r__2);
+/* L80: */
+	}
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    classq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
     }
 
-L190:
-    return 0;
+    ret_val = value;
+    return ret_val;
 
-/*     End of DLAED2 */
+/*     End of CLANGE */
 
-} /* dlaed2_ */
+} /* clange_ */
 
-/* Subroutine */ int dlaed3_(integer *k, integer *n, integer *n1, doublereal *
-	d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda,
-	 doublereal *q2, integer *indx, integer *ctot, doublereal *w,
-	doublereal *s, integer *info)
+doublereal clanhe_(char *norm, char *uplo, integer *n, complex *a, integer *
+	lda, real *work)
 {
     /* System generated locals */
-    integer q_dim1, q_offset, i__1, i__2;
-    doublereal d__1;
+    integer a_dim1, a_offset, i__1, i__2;
+    real ret_val, r__1, r__2, r__3;
 
     /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+    double c_abs(complex *), sqrt(doublereal);
 
     /* Local variables */
-    static integer i__, j, n2, n12, ii, n23, iq2;
-    static doublereal temp;
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *),
-	     dcopy_(integer *, doublereal *, integer *, doublereal *, integer
-	    *), dlaed4_(integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, integer *);
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *),
-	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *), xerbla_(char *, integer *);
+    static integer i__, j;
+    static real sum, absa, scale;
+    extern logical lsame_(char *, char *);
+    static real value;
+    extern /* Subroutine */ int classq_(integer *, complex *, integer *, real
+	    *, real *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
 
 
     Purpose
     =======
 
-    DLAED3 finds the roots of the secular equation, as defined by the
-    values in D, W, and RHO, between 1 and K.  It makes the
-    appropriate calls to DLAED4 and then updates the eigenvectors by
-    multiplying the matrix of eigenvectors of the pair of eigensystems
-    being combined by the matrix of eigenvectors of the K-by-K system
-    which is solved here.
+    CLANHE  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    complex hermitian matrix A.
 
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
+    Description
+    ===========
+
+    CLANHE returns the value
+
+       CLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
 
     Arguments
     =========
 
-    K       (input) INTEGER
-            The number of terms in the rational function to be solved by
-            DLAED4.  K >= 0.
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in CLANHE as described
+            above.
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            hermitian matrix A is to be referenced.
+            = 'U':  Upper triangular part of A is referenced
+            = 'L':  Lower triangular part of A is referenced
 
     N       (input) INTEGER
-            The number of rows and columns in the Q matrix.
-            N >= K (deflation may result in N>K).
+            The order of the matrix A.  N >= 0.  When N = 0, CLANHE is
+            set to zero.
 
-    N1      (input) INTEGER
-            The location of the last eigenvalue in the leading submatrix.
-            min(1,N) <= N1 <= N/2.
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The hermitian matrix A.  If UPLO = 'U', the leading n by n
+            upper triangular part of A contains the upper triangular part
+            of the matrix A, and the strictly lower triangular part of A
+            is not referenced.  If UPLO = 'L', the leading n by n lower
+            triangular part of A contains the lower triangular part of
+            the matrix A, and the strictly upper triangular part of A is
+            not referenced. Note that the imaginary parts of the diagonal
+            elements need not be set and are assumed to be zero.
 
-    D       (output) DOUBLE PRECISION array, dimension (N)
-            D(I) contains the updated eigenvalues for
-            1 <= I <= K.
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
 
-    Q       (output) DOUBLE PRECISION array, dimension (LDQ,N)
-            Initially the first K columns are used as workspace.
-            On output the columns 1 to K contain
-            the updated eigenvectors.
+    WORK    (workspace) REAL array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+            WORK is not referenced.
 
-    LDQ     (input) INTEGER
-            The leading dimension of the array Q.  LDQ >= max(1,N).
+   =====================================================================
+*/
 
-    RHO     (input) DOUBLE PRECISION
-            The value of the parameter in the rank one update equation.
-            RHO >= 0 required.
 
-    DLAMDA  (input/output) DOUBLE PRECISION array, dimension (K)
-            The first K elements of this array contain the old roots
-            of the deflated updating problem.  These are the poles
-            of the secular equation. May be changed on output by
-            having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
-            Cray-2, or Cray C-90, as described above.
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
 
-    Q2      (input) DOUBLE PRECISION array, dimension (LDQ2, N)
-            The first K columns of this matrix contain the non-deflated
-            eigenvectors for the split problem.
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.f;
+    } else if (lsame_(norm, "M")) {
 
-    INDX    (input) INTEGER array, dimension (N)
-            The permutation used to arrange the columns of the deflated
-            Q matrix into three groups (see DLAED2).
-            The rows of the eigenvectors found by DLAED4 must be likewise
-            permuted before the matrix multiply can take place.
+/*        Find max(abs(A(i,j))). */
 
-    CTOT    (input) INTEGER array, dimension (4)
-            A count of the total number of the various types of columns
-            in Q, as described in INDX.  The fourth column type is any
-            column which has been deflated.
+	value = 0.f;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
+		    value = dmax(r__1,r__2);
+/* L10: */
+		}
+/* Computing MAX */
+		i__2 = j + j * a_dim1;
+		r__2 = value, r__3 = (r__1 = a[i__2].r, dabs(r__1));
+		value = dmax(r__2,r__3);
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+		i__2 = j + j * a_dim1;
+		r__2 = value, r__3 = (r__1 = a[i__2].r, dabs(r__1));
+		value = dmax(r__2,r__3);
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
+		    value = dmax(r__1,r__2);
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+    } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') {
 
-    W       (input/output) DOUBLE PRECISION array, dimension (K)
-            The first K elements of this array contain the components
-            of the deflation-adjusted updating vector. Destroyed on
-            output.
+/*        Find normI(A) ( = norm1(A), since A is hermitian). */
 
-    S       (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K
-            Will contain the eigenvectors of the repaired matrix which
-            will be multiplied by the previously accumulated eigenvectors
-            to update the system.
+	value = 0.f;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		sum = 0.f;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    absa = c_abs(&a[i__ + j * a_dim1]);
+		    sum += absa;
+		    work[i__] += absa;
+/* L50: */
+		}
+		i__2 = j + j * a_dim1;
+		work[j] = sum + (r__1 = a[i__2].r, dabs(r__1));
+/* L60: */
+	    }
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+		r__1 = value, r__2 = work[i__];
+		value = dmax(r__1,r__2);
+/* L70: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[i__] = 0.f;
+/* L80: */
+	    }
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j + j * a_dim1;
+		sum = work[j] + (r__1 = a[i__2].r, dabs(r__1));
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    absa = c_abs(&a[i__ + j * a_dim1]);
+		    sum += absa;
+		    work[i__] += absa;
+/* L90: */
+		}
+		value = dmax(value,sum);
+/* L100: */
+	    }
+	}
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
 
-    LDS     (input) INTEGER
-            The leading dimension of S.  LDS >= max(1,K).
+/*        Find normF(A). */
 
-    INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an eigenvalue did not converge
+	scale = 0.f;
+	sum = 1.f;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		i__2 = j - 1;
+		classq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L110: */
+	    }
+	} else {
+	    i__1 = *n - 1;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n - j;
+		classq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum);
+/* L120: */
+	    }
+	}
+	sum *= 2;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    if (a[i__2].r != 0.f) {
+		i__2 = i__ + i__ * a_dim1;
+		absa = (r__1 = a[i__2].r, dabs(r__1));
+		if (scale < absa) {
+/* Computing 2nd power */
+		    r__1 = scale / absa;
+		    sum = sum * (r__1 * r__1) + 1.f;
+		    scale = absa;
+		} else {
+/* Computing 2nd power */
+		    r__1 = absa / scale;
+		    sum += r__1 * r__1;
+		}
+	    }
+/* L130: */
+	}
+	value = scale * sqrt(sum);
+    }
 
-    Further Details
-    ===============
+    ret_val = value;
+    return ret_val;
 
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
-    Modified by Francoise Tisseur, University of Tennessee.
+/*     End of CLANHE */
 
-    =====================================================================
+} /* clanhe_ */
 
+doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
+	work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    real ret_val, r__1, r__2;
 
-       Test the input parameters.
-*/
+    /* Builtin functions */
+    double c_abs(complex *), sqrt(doublereal);
 
-    /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --dlamda;
-    --q2;
-    --indx;
-    --ctot;
-    --w;
-    --s;
+    /* Local variables */
+    static integer i__, j;
+    static real sum, scale;
+    extern logical lsame_(char *, char *);
+    static real value;
+    extern /* Subroutine */ int classq_(integer *, complex *, integer *, real
+	    *, real *);
 
-    /* Function Body */
-    *info = 0;
 
-    if (*k < 0) {
-	*info = -1;
-    } else if (*n < *k) {
-	*info = -2;
-    } else if (*ldq < max(1,*n)) {
-	*info = -6;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAED3", &i__1);
-	return 0;
-    }
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
 
-/*     Quick return if possible */
 
-    if (*k == 0) {
-	return 0;
-    }
+    Purpose
+    =======
 
-/*
-       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
-       be computed with high relative accuracy (barring over/underflow).
-       This is a problem on machines without a guard digit in
-       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
-       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
-       which on any of these machines zeros out the bottommost
-       bit of DLAMDA(I) if it is 1; this makes the subsequent
-       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
-       occurs. On binary machines with a guard digit (almost all
-       machines) it does not change DLAMDA(I) at all. On hexadecimal
-       and decimal machines with a guard digit, it slightly
-       changes the bottommost bits of DLAMDA(I). It does not account
-       for hexadecimal or decimal machines without guard digits
-       (we know of none). We use a subroutine call to compute
-       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
-       this code.
+    CLANHS  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    Hessenberg matrix A.
+
+    Description
+    ===========
+
+    CLANHS returns the value
+
+       CLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in CLANHS as described
+            above.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, CLANHS is
+            set to zero.
+
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The n by n upper Hessenberg matrix A; the part of A below the
+            first sub-diagonal is not referenced.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) REAL array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
 */
 
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
-/* L10: */
-    }
 
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
-		info);
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
 
-/*        If the zero finder fails, the computation is terminated. */
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.f;
+    } else if (lsame_(norm, "M")) {
 
-	if (*info != 0) {
-	    goto L120;
-	}
-/* L20: */
-    }
+/*        Find max(abs(A(i,j))). */
 
-    if (*k == 1) {
-	goto L110;
-    }
-    if (*k == 2) {
-	i__1 = *k;
+	value = 0.f;
+	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
-	    w[1] = q[j * q_dim1 + 1];
-	    w[2] = q[j * q_dim1 + 2];
-	    ii = indx[1];
-	    q[j * q_dim1 + 1] = w[ii];
-	    ii = indx[2];
-	    q[j * q_dim1 + 2] = w[ii];
-/* L30: */
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
+		value = dmax(r__1,r__2);
+/* L10: */
+	    }
+/* L20: */
 	}
-	goto L110;
-    }
-
-/*     Compute updated W. */
-
-    dcopy_(k, &w[1], &c__1, &s[1], &c__1);
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1') {
 
-/*     Initialize W(I) = Q(I,I) */
+/*        Find norm1(A). */
 
-    i__1 = *ldq + 1;
-    dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = j - 1;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.f;
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += c_abs(&a[i__ + j * a_dim1]);
+/* L30: */
+	    }
+	    value = dmax(value,sum);
 /* L40: */
 	}
-	i__2 = *k;
-	for (i__ = j + 1; i__ <= i__2; ++i__) {
-	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.f;
 /* L50: */
 	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += c_abs(&a[i__ + j * a_dim1]);
 /* L60: */
-    }
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	d__1 = sqrt(-w[i__]);
-	w[i__] = d_sign(&d__1, &s[i__]);
+	    }
 /* L70: */
-    }
-
-/*     Compute eigenvectors of the modified rank-1 modification. */
-
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *k;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    s[i__] = w[i__] / q[i__ + j * q_dim1];
+	}
+	value = 0.f;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    r__1 = value, r__2 = work[i__];
+	    value = dmax(r__1,r__2);
 /* L80: */
 	}
-	temp = dnrm2_(k, &s[1], &c__1);
-	i__2 = *k;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    ii = indx[i__];
-	    q[i__ + j * q_dim1] = s[ii] / temp;
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    classq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
 /* L90: */
 	}
-/* L100: */
+	value = scale * sqrt(sum);
     }
 
-/*     Compute the updated eigenvectors. */
+    ret_val = value;
+    return ret_val;
 
-L110:
+/*     End of CLANHS */
 
-    n2 = *n - *n1;
-    n12 = ctot[1] + ctot[2];
-    n23 = ctot[2] + ctot[3];
+} /* clanhs_ */
 
-    dlacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23);
-    iq2 = *n1 * n12 + 1;
-    if (n23 != 0) {
-	dgemm_("N", "N", &n2, k, &n23, &c_b15, &q2[iq2], &n2, &s[1], &n23, &
-		c_b29, &q[*n1 + 1 + q_dim1], ldq);
-    } else {
-	dlaset_("A", &n2, k, &c_b29, &c_b29, &q[*n1 + 1 + q_dim1], ldq);
+/* Subroutine */ int clarcm_(integer *m, integer *n, real *a, integer *lda,
+	complex *b, integer *ldb, complex *c__, integer *ldc, real *rwork)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3, i__4, i__5;
+    real r__1;
+    complex q__1;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CLARCM performs a very simple matrix-matrix multiplication:
+             C := A * B,
+    where A is M by M and real; B is M by N and complex;
+    C is M by N and complex.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A and of the matrix C.
+            M >= 0.
+
+    N       (input) INTEGER
+            The number of columns and rows of the matrix B and
+            the number of columns of the matrix C.
+            N >= 0.
+
+    A       (input) REAL array, dimension (LDA, M)
+            A contains the M by M matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >=max(1,M).
+
+    B       (input) REAL array, dimension (LDB, N)
+            B contains the M by N matrix B.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B. LDB >=max(1,M).
+
+    C       (input) COMPLEX array, dimension (LDC, N)
+            C contains the M by N matrix C.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >=max(1,M).
+
+    RWORK   (workspace) REAL array, dimension (2*M*N)
+
+    =====================================================================
+
+
+       Quick return if possible.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --rwork;
+
+    /* Function Body */
+    if (*m == 0 || *n == 0) {
+	return 0;
     }
 
-    dlacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12);
-    if (n12 != 0) {
-	dgemm_("N", "N", n1, k, &n12, &c_b15, &q2[1], n1, &s[1], &n12, &c_b29,
-		 &q[q_offset], ldq);
-    } else {
-	dlaset_("A", n1, k, &c_b29, &c_b29, &q[q_dim1 + 1], ldq);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * b_dim1;
+	    rwork[(j - 1) * *m + i__] = b[i__3].r;
+/* L10: */
+	}
+/* L20: */
     }
 
+    l = *m * *n + 1;
+    sgemm_("N", "N", m, n, m, &c_b871, &a[a_offset], lda, &rwork[1], m, &
+	    c_b1101, &rwork[l], m);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * c_dim1;
+	    i__4 = l + (j - 1) * *m + i__ - 1;
+	    c__[i__3].r = rwork[i__4], c__[i__3].i = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    rwork[(j - 1) * *m + i__] = r_imag(&b[i__ + j * b_dim1]);
+/* L50: */
+	}
+/* L60: */
+    }
+    sgemm_("N", "N", m, n, m, &c_b871, &a[a_offset], lda, &rwork[1], m, &
+	    c_b1101, &rwork[l], m);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * c_dim1;
+	    i__4 = i__ + j * c_dim1;
+	    r__1 = c__[i__4].r;
+	    i__5 = l + (j - 1) * *m + i__ - 1;
+	    q__1.r = r__1, q__1.i = rwork[i__5];
+	    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L70: */
+	}
+/* L80: */
+    }
 
-L120:
     return 0;
 
-/*     End of DLAED3 */
+/*     End of CLARCM */
 
-} /* dlaed3_ */
+} /* clarcm_ */
 
-/* Subroutine */ int dlaed4_(integer *n, integer *i__, doublereal *d__,
-	doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam,
-	 integer *info)
+/* Subroutine */ int clarf_(char *side, integer *m, integer *n, complex *v,
+	integer *incv, complex *tau, complex *c__, integer *ldc, complex *
+	work)
 {
     /* System generated locals */
-    integer i__1;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
+    integer c_dim1, c_offset;
+    complex q__1;
 
     /* Local variables */
-    static doublereal a, b, c__;
-    static integer j;
-    static doublereal w;
-    static integer ii;
-    static doublereal dw, zz[3];
-    static integer ip1;
-    static doublereal del, eta, phi, eps, tau, psi;
-    static integer iim1, iip1;
-    static doublereal dphi, dpsi;
-    static integer iter;
-    static doublereal temp, prew, temp1, dltlb, dltub, midpt;
-    static integer niter;
-    static logical swtch;
-    extern /* Subroutine */ int dlaed5_(integer *, doublereal *, doublereal *,
-	     doublereal *, doublereal *, doublereal *), dlaed6_(integer *,
-	    logical *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *);
-    static logical swtch3;
-
-    static logical orgati;
-    static doublereal erretm, rhoinv;
+    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *,
+	    complex *, integer *, complex *, integer *, complex *, integer *),
+	     cgemv_(char *, integer *, integer *, complex *, complex *,
+	    integer *, complex *, integer *, complex *, complex *, integer *);
+    extern logical lsame_(char *, char *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       December 23, 1999
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
 
     Purpose
     =======
 
-    This subroutine computes the I-th updated eigenvalue of a symmetric
-    rank-one modification to a diagonal matrix whose elements are
-    given in the array d, and that
-
-               D(i) < D(j)  for  i < j
+    CLARF applies a complex elementary reflector H to a complex M-by-N
+    matrix C, from either the left or the right. H is represented in the
+    form
 
-    and that RHO > 0.  This is arranged by the calling routine, and is
-    no loss in generality.  The rank-one modified system is thus
+          H = I - tau * v * v'
 
-               diag( D )  +  RHO *  Z * Z_transpose.
+    where tau is a complex scalar and v is a complex vector.
 
-    where we assume the Euclidean norm of Z is 1.
+    If tau = 0, then H is taken to be the unit matrix.
 
-    The method consists of approximating the rational functions in the
-    secular equation by simpler interpolating rational functions.
+    To apply H' (the conjugate transpose of H), supply conjg(tau) instead
+    tau.
 
     Arguments
     =========
 
-    N      (input) INTEGER
-           The length of all arrays.
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
 
-    I      (input) INTEGER
-           The index of the eigenvalue to be computed.  1 <= I <= N.
+    M       (input) INTEGER
+            The number of rows of the matrix C.
 
-    D      (input) DOUBLE PRECISION array, dimension (N)
-           The original eigenvalues.  It is assumed that they are in
-           order, D(I) < D(J)  for I < J.
+    N       (input) INTEGER
+            The number of columns of the matrix C.
 
-    Z      (input) DOUBLE PRECISION array, dimension (N)
-           The components of the updating vector.
+    V       (input) COMPLEX array, dimension
+                       (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+                    or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+            The vector v in the representation of H. V is not used if
+            TAU = 0.
 
-    DELTA  (output) DOUBLE PRECISION array, dimension (N)
-           If N .ne. 1, DELTA contains (D(j) - lambda_I) in its  j-th
-           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
-           contains the information necessary to construct the
-           eigenvectors.
+    INCV    (input) INTEGER
+            The increment between elements of v. INCV <> 0.
 
-    RHO    (input) DOUBLE PRECISION
-           The scalar in the symmetric updating formula.
+    TAU     (input) COMPLEX
+            The value tau in the representation of H.
 
-    DLAM   (output) DOUBLE PRECISION
-           The computed lambda_I, the I-th updated eigenvalue.
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
 
-    INFO   (output) INTEGER
-           = 0:  successful exit
-           > 0:  if INFO = 1, the updating process failed.
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
 
-    Internal Parameters
-    ===================
+    WORK    (workspace) COMPLEX array, dimension
+                           (N) if SIDE = 'L'
+                        or (M) if SIDE = 'R'
 
-    Logical variable ORGATI (origin-at-i?) is used for distinguishing
-    whether D(i) or D(i+1) is treated as the origin.
+    =====================================================================
+*/
 
-              ORGATI = .true.    origin at i
-              ORGATI = .false.   origin at i+1
 
-     Logical variable SWTCH3 (switch-for-3-poles?) is for noting
-     if we are working with THREE poles!
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
 
-     MAXIT is the maximum number of iterations allowed for each
-     eigenvalue.
+    /* Function Body */
+    if (lsame_(side, "L")) {
 
-    Further Details
-    ===============
+/*        Form  H * C */
 
-    Based on contributions by
-       Ren-Cang Li, Computer Science Division, University of California
-       at Berkeley, USA
+	if (tau->r != 0.f || tau->i != 0.f) {
 
-    =====================================================================
+/*           w := C' * v */
 
+	    cgemv_("Conjugate transpose", m, n, &c_b56, &c__[c_offset], ldc, &
+		    v[1], incv, &c_b55, &work[1], &c__1);
 
-       Since this routine is called in an inner loop, we do no argument
-       checking.
+/*           C := C - v * w' */
 
-       Quick return for N=1 and 2.
-*/
+	    q__1.r = -tau->r, q__1.i = -tau->i;
+	    cgerc_(m, n, &q__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
+		    ldc);
+	}
+    } else {
 
-    /* Parameter adjustments */
-    --delta;
-    --z__;
-    --d__;
+/*        Form  C * H */
 
-    /* Function Body */
-    *info = 0;
-    if (*n == 1) {
+	if (tau->r != 0.f || tau->i != 0.f) {
 
-/*         Presumably, I=1 upon entry */
+/*           w := C * v */
 
-	*dlam = d__[1] + *rho * z__[1] * z__[1];
-	delta[1] = 1.;
-	return 0;
-    }
-    if (*n == 2) {
-	dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
-	return 0;
-    }
+	    cgemv_("No transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1],
+		    incv, &c_b55, &work[1], &c__1);
 
-/*     Compute machine epsilon */
+/*           C := C - w * v' */
 
-    eps = EPSILON;
-    rhoinv = 1. / *rho;
+	    q__1.r = -tau->r, q__1.i = -tau->i;
+	    cgerc_(m, n, &q__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
+		    ldc);
+	}
+    }
+    return 0;
 
-/*     The case I = N */
+/*     End of CLARF */
 
-    if (*i__ == *n) {
+} /* clarf_ */
 
-/*        Initialize some basic variables */
+/* Subroutine */ int clarfb_(char *side, char *trans, char *direct, char *
+	storev, integer *m, integer *n, integer *k, complex *v, integer *ldv,
+	complex *t, integer *ldt, complex *c__, integer *ldc, complex *work,
+	integer *ldwork)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
+	    work_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2;
 
-	ii = *n - 1;
-	niter = 1;
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
 
-/*        Calculate initial guess */
+    /* Local variables */
+    static integer i__, j;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
+	    complex *, integer *), ctrmm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *), clacgv_(integer *,
+	    complex *, integer *);
+    static char transt[1];
 
-	midpt = *rho / 2.;
 
 /*
-          If ||Z||_2 is not one, then TEMP should be set to
-          RHO * ||Z||_2^2 / TWO
-*/
-
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    delta[j] = d__[j] - d__[*i__] - midpt;
-/* L10: */
-	}
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-	psi = 0.;
-	i__1 = *n - 2;
-	for (j = 1; j <= i__1; ++j) {
-	    psi += z__[j] * z__[j] / delta[j];
-/* L20: */
-	}
 
-	c__ = rhoinv + psi;
-	w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*
-		n];
+    Purpose
+    =======
 
-	if (w <= 0.) {
-	    temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho)
-		    + z__[*n] * z__[*n] / *rho;
-	    if (c__ <= temp) {
-		tau = *rho;
-	    } else {
-		del = d__[*n] - d__[*n - 1];
-		a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]
-			;
-		b = z__[*n] * z__[*n] * del;
-		if (a < 0.) {
-		    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
-		} else {
-		    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
-		}
-	    }
+    CLARFB applies a complex block reflector H or its transpose H' to a
+    complex M-by-N matrix C, from either the left or the right.
 
-/*
-             It can be proved that
-                 D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO
-*/
+    Arguments
+    =========
 
-	    dltlb = midpt;
-	    dltub = *rho;
-	} else {
-	    del = d__[*n] - d__[*n - 1];
-	    a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
-	    b = z__[*n] * z__[*n] * del;
-	    if (a < 0.) {
-		tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
-	    } else {
-		tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
-	    }
+    SIDE    (input) CHARACTER*1
+            = 'L': apply H or H' from the Left
+            = 'R': apply H or H' from the Right
 
-/*
-             It can be proved that
-                 D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
-*/
+    TRANS   (input) CHARACTER*1
+            = 'N': apply H (No transpose)
+            = 'C': apply H' (Conjugate transpose)
 
-	    dltlb = 0.;
-	    dltub = midpt;
-	}
+    DIRECT  (input) CHARACTER*1
+            Indicates how H is formed from a product of elementary
+            reflectors
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
 
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    delta[j] = d__[j] - d__[*i__] - tau;
-/* L30: */
-	}
+    STOREV  (input) CHARACTER*1
+            Indicates how the vectors which define the elementary
+            reflectors are stored:
+            = 'C': Columnwise
+            = 'R': Rowwise
 
-/*        Evaluate PSI and the derivative DPSI */
+    M       (input) INTEGER
+            The number of rows of the matrix C.
 
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
-	i__1 = ii;
-	for (j = 1; j <= i__1; ++j) {
-	    temp = z__[j] / delta[j];
-	    psi += z__[j] * temp;
-	    dpsi += temp * temp;
-	    erretm += psi;
-/* L40: */
-	}
-	erretm = abs(erretm);
+    N       (input) INTEGER
+            The number of columns of the matrix C.
 
-/*        Evaluate PHI and the derivative DPHI */
+    K       (input) INTEGER
+            The order of the matrix T (= the number of elementary
+            reflectors whose product defines the block reflector).
 
-	temp = z__[*n] / delta[*n];
-	phi = z__[*n] * temp;
-	dphi = temp * temp;
-	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
-		+ dphi);
+    V       (input) COMPLEX array, dimension
+                                  (LDV,K) if STOREV = 'C'
+                                  (LDV,M) if STOREV = 'R' and SIDE = 'L'
+                                  (LDV,N) if STOREV = 'R' and SIDE = 'R'
+            The matrix V. See further details.
 
-	w = rhoinv + phi + psi;
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
+            if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
+            if STOREV = 'R', LDV >= K.
 
-/*        Test for convergence */
+    T       (input) COMPLEX array, dimension (LDT,K)
+            The triangular K-by-K matrix T in the representation of the
+            block reflector.
 
-	if (abs(w) <= eps * erretm) {
-	    *dlam = d__[*i__] + tau;
-	    goto L250;
-	}
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
 
-	if (w <= 0.) {
-	    dltlb = max(dltlb,tau);
-	} else {
-	    dltub = min(dltub,tau);
-	}
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
 
-/*        Calculate the new step */
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX array, dimension (LDWORK,K)
+
+    LDWORK  (input) INTEGER
+            The leading dimension of the array WORK.
+            If SIDE = 'L', LDWORK >= max(1,N);
+            if SIDE = 'R', LDWORK >= max(1,M).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    work_dim1 = *ldwork;
+    work_offset = 1 + work_dim1;
+    work -= work_offset;
+
+    /* Function Body */
+    if (*m <= 0 || *n <= 0) {
+	return 0;
+    }
+
+    if (lsame_(trans, "N")) {
+	*(unsigned char *)transt = 'C';
+    } else {
+	*(unsigned char *)transt = 'N';
+    }
+
+    if (lsame_(storev, "C")) {
+
+	if (lsame_(direct, "F")) {
 
-	++niter;
-	c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
-	a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (
-		dpsi + dphi);
-	b = delta[*n - 1] * delta[*n] * w;
-	if (c__ < 0.) {
-	    c__ = abs(c__);
-	}
-	if (c__ == 0.) {
 /*
-            ETA = B/A
-             ETA = RHO - TAU
+             Let  V =  ( V1 )    (first K rows)
+                       ( V2 )
+             where  V1  is unit lower triangular.
 */
-	    eta = dltub - tau;
-	} else if (a >= 0.) {
-	    eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
-		    * 2.);
-	} else {
-	    eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
-		    );
-	}
+
+	    if (lsame_(side, "L")) {
 
 /*
-          Note, eta should be positive if w is negative, and
-          eta should be negative otherwise. However,
-          if for some reason caused by roundoff, eta*w > 0,
-          we simply use one Newton step instead. This way
-          will guarantee eta*w < 0.
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C1'
 */
 
-	if (w * eta > 0.) {
-	    eta = -w / (dpsi + dphi);
-	}
-	temp = tau + eta;
-	if (temp > dltub || temp < dltlb) {
-	    if (w < 0.) {
-		eta = (dltub - tau) / 2.;
-	    } else {
-		eta = (dltlb - tau) / 2.;
-	    }
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    delta[j] -= eta;
-/* L50: */
-	}
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
+/* L10: */
+		}
 
-	tau += eta;
+/*              W := W * V1 */
 
-/*        Evaluate PSI and the derivative DPSI */
+		ctrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b56,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*m > *k) {
 
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
-	i__1 = ii;
-	for (j = 1; j <= i__1; ++j) {
-	    temp = z__[j] / delta[j];
-	    psi += z__[j] * temp;
-	    dpsi += temp * temp;
-	    erretm += psi;
-/* L60: */
-	}
-	erretm = abs(erretm);
+/*                 W := W + C2'*V2 */
 
-/*        Evaluate PHI and the derivative DPHI */
+		    i__1 = *m - *k;
+		    cgemm_("Conjugate transpose", "No transpose", n, k, &i__1,
+			     &c_b56, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 +
+			    v_dim1], ldv, &c_b56, &work[work_offset], ldwork);
+		}
 
-	temp = z__[*n] / delta[*n];
-	phi = z__[*n] * temp;
-	dphi = temp * temp;
-	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
-		+ dphi);
+/*              W := W * T'  or  W * T */
 
-	w = rhoinv + phi + psi;
+		ctrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
-/*        Main loop to update the values of the array   DELTA */
+/*              C := C - V * W' */
 
-	iter = niter + 1;
+		if (*m > *k) {
 
-	for (niter = iter; niter <= 30; ++niter) {
+/*                 C2 := C2 - V2 * W' */
 
-/*           Test for convergence */
+		    i__1 = *m - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "Conjugate transpose", &i__1, n, k,
+			     &q__1, &v[*k + 1 + v_dim1], ldv, &work[
+			    work_offset], ldwork, &c_b56, &c__[*k + 1 +
+			    c_dim1], ldc);
+		}
 
-	    if (abs(w) <= eps * erretm) {
-		*dlam = d__[*i__] + tau;
-		goto L250;
-	    }
+/*              W := W * V1' */
 
-	    if (w <= 0.) {
-		dltlb = max(dltlb,tau);
-	    } else {
-		dltub = min(dltub,tau);
-	    }
+		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k,
+			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
 
-/*           Calculate the new step */
+/*              C1 := C1 - W' */
 
-	    c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
-	    a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] *
-		    (dpsi + dphi);
-	    b = delta[*n - 1] * delta[*n] * w;
-	    if (a >= 0.) {
-		eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-			c__ * 2.);
-	    } else {
-		eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
-			d__1))));
-	    }
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = j + i__ * c_dim1;
+			i__4 = j + i__ * c_dim1;
+			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
+			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
+				q__2.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L20: */
+		    }
+/* L30: */
+		}
+
+	    } else if (lsame_(side, "R")) {
 
 /*
-             Note, eta should be positive if w is negative, and
-             eta should be negative otherwise. However,
-             if for some reason caused by roundoff, eta*w > 0,
-             we simply use one Newton step instead. This way
-             will guarantee eta*w < 0.
-*/
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
-	    if (w * eta > 0.) {
-		eta = -w / (dpsi + dphi);
-	    }
-	    temp = tau + eta;
-	    if (temp > dltub || temp < dltlb) {
-		if (w < 0.) {
-		    eta = (dltub - tau) / 2.;
-		} else {
-		    eta = (dltlb - tau) / 2.;
-		}
-	    }
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		delta[j] -= eta;
-/* L70: */
-	    }
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 
-	    tau += eta;
+                W := C1
+*/
 
-/*           Evaluate PSI and the derivative DPSI */
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L40: */
+		}
 
-	    dpsi = 0.;
-	    psi = 0.;
-	    erretm = 0.;
-	    i__1 = ii;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = z__[j] / delta[j];
-		psi += z__[j] * temp;
-		dpsi += temp * temp;
-		erretm += psi;
-/* L80: */
-	    }
-	    erretm = abs(erretm);
+/*              W := W * V1 */
 
-/*           Evaluate PHI and the derivative DPHI */
+		ctrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b56,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*n > *k) {
 
-	    temp = z__[*n] / delta[*n];
-	    phi = z__[*n] * temp;
-	    dphi = temp * temp;
-	    erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
-		    dpsi + dphi);
+/*                 W := W + C2 * V2 */
 
-	    w = rhoinv + phi + psi;
-/* L90: */
-	}
+		    i__1 = *n - *k;
+		    cgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b56, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
+			    1 + v_dim1], ldv, &c_b56, &work[work_offset],
+			    ldwork);
+		}
 
-/*        Return with INFO = 1, NITER = MAXIT and not converged */
+/*              W := W * T  or  W * T' */
 
-	*info = 1;
-	*dlam = d__[*i__] + tau;
-	goto L250;
+		ctrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
-/*        End for the case I = N */
+/*              C := C - W * V' */
 
-    } else {
+		if (*n > *k) {
 
-/*        The case for I < N */
+/*                 C2 := C2 - W * V2' */
 
-	niter = 1;
-	ip1 = *i__ + 1;
+		    i__1 = *n - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "Conjugate transpose", m, &i__1, k,
+			     &q__1, &work[work_offset], ldwork, &v[*k + 1 +
+			    v_dim1], ldv, &c_b56, &c__[(*k + 1) * c_dim1 + 1],
+			     ldc);
+		}
 
-/*        Calculate initial guess */
+/*              W := W * V1' */
 
-	del = d__[ip1] - d__[*i__];
-	midpt = del / 2.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    delta[j] = d__[j] - d__[*i__] - midpt;
-/* L100: */
-	}
+		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k,
+			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
 
-	psi = 0.;
-	i__1 = *i__ - 1;
-	for (j = 1; j <= i__1; ++j) {
-	    psi += z__[j] * z__[j] / delta[j];
-/* L110: */
-	}
+/*              C1 := C1 - W */
 
-	phi = 0.;
-	i__1 = *i__ + 2;
-	for (j = *n; j >= i__1; --j) {
-	    phi += z__[j] * z__[j] / delta[j];
-/* L120: */
-	}
-	c__ = rhoinv + psi + phi;
-	w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] /
-		delta[ip1];
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			i__5 = i__ + j * work_dim1;
+			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
+				i__4].i - work[i__5].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    }
 
-	if (w > 0.) {
+	} else {
 
 /*
-             d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
-
-             We choose d(i) as origin.
+             Let  V =  ( V1 )
+                       ( V2 )    (last K rows)
+             where  V2  is unit upper triangular.
 */
 
-	    orgati = TRUE_;
-	    a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
-	    b = z__[*i__] * z__[*i__] * del;
-	    if (a > 0.) {
-		tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
-			d__1))));
-	    } else {
-		tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-			c__ * 2.);
-	    }
-	    dltlb = 0.;
-	    dltub = midpt;
-	} else {
+	    if (lsame_(side, "L")) {
 
 /*
-             (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
 
-             We choose d(i+1) as origin.
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C2'
 */
 
-	    orgati = FALSE_;
-	    a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
-	    b = z__[ip1] * z__[ip1] * del;
-	    if (a < 0.) {
-		tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
-			d__1))));
-	    } else {
-		tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
-			(c__ * 2.);
-	    }
-	    dltlb = -midpt;
-	    dltub = 0.;
-	}
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
+/* L70: */
+		}
 
-	if (orgati) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		delta[j] = d__[j] - d__[*i__] - tau;
-/* L130: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		delta[j] = d__[j] - d__[ip1] - tau;
-/* L140: */
-	    }
-	}
-	if (orgati) {
-	    ii = *i__;
-	} else {
-	    ii = *i__ + 1;
-	}
-	iim1 = ii - 1;
-	iip1 = ii + 1;
+/*              W := W * V2 */
 
-/*        Evaluate PSI and the derivative DPSI */
+		ctrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b56,
+			 &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+		if (*m > *k) {
 
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
-	i__1 = iim1;
-	for (j = 1; j <= i__1; ++j) {
-	    temp = z__[j] / delta[j];
-	    psi += z__[j] * temp;
-	    dpsi += temp * temp;
-	    erretm += psi;
-/* L150: */
-	}
-	erretm = abs(erretm);
+/*                 W := W + C1'*V1 */
 
-/*        Evaluate PHI and the derivative DPHI */
+		    i__1 = *m - *k;
+		    cgemm_("Conjugate transpose", "No transpose", n, k, &i__1,
+			     &c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b56, &work[work_offset], ldwork);
+		}
 
-	dphi = 0.;
-	phi = 0.;
-	i__1 = iip1;
-	for (j = *n; j >= i__1; --j) {
-	    temp = z__[j] / delta[j];
-	    phi += z__[j] * temp;
-	    dphi += temp * temp;
-	    erretm += phi;
-/* L160: */
-	}
+/*              W := W * T'  or  W * T */
 
-	w = rhoinv + phi + psi;
+		ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
-/*
-          W is the value of the secular function with
-          its ii-th element removed.
-*/
+/*              C := C - V * W' */
 
-	swtch3 = FALSE_;
-	if (orgati) {
-	    if (w < 0.) {
-		swtch3 = TRUE_;
-	    }
-	} else {
-	    if (w > 0.) {
-		swtch3 = TRUE_;
-	    }
-	}
-	if (ii == 1 || ii == *n) {
-	    swtch3 = FALSE_;
-	}
+		if (*m > *k) {
 
-	temp = z__[ii] / delta[ii];
-	dw = dpsi + dphi + temp * temp;
-	temp = z__[ii] * temp;
-	w += temp;
-	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
-		abs(tau) * dw;
+/*                 C1 := C1 - V1 * W' */
 
-/*        Test for convergence */
+		    i__1 = *m - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "Conjugate transpose", &i__1, n, k,
+			     &q__1, &v[v_offset], ldv, &work[work_offset],
+			    ldwork, &c_b56, &c__[c_offset], ldc);
+		}
 
-	if (abs(w) <= eps * erretm) {
-	    if (orgati) {
-		*dlam = d__[*i__] + tau;
-	    } else {
-		*dlam = d__[ip1] + tau;
-	    }
-	    goto L250;
-	}
+/*              W := W * V2' */
 
-	if (w <= 0.) {
-	    dltlb = max(dltlb,tau);
-	} else {
-	    dltub = min(dltub,tau);
-	}
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k,
+			&c_b56, &v[*m - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
 
-/*        Calculate the new step */
+/*              C2 := C2 - W' */
 
-	++niter;
-	if (! swtch3) {
-	    if (orgati) {
-/* Computing 2nd power */
-		d__1 = z__[*i__] / delta[*i__];
-		c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 *
-			d__1);
-	    } else {
-/* Computing 2nd power */
-		d__1 = z__[ip1] / delta[ip1];
-		c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 *
-			d__1);
-	    }
-	    a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] *
-		    dw;
-	    b = delta[*i__] * delta[ip1] * w;
-	    if (c__ == 0.) {
-		if (a == 0.) {
-		    if (orgati) {
-			a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] *
-				(dpsi + dphi);
-		    } else {
-			a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] *
-				(dpsi + dphi);
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = *m - *k + j + i__ * c_dim1;
+			i__4 = *m - *k + j + i__ * c_dim1;
+			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
+			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
+				q__2.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L80: */
 		    }
+/* L90: */
 		}
-		eta = b / a;
-	    } else if (a <= 0.) {
-		eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-			c__ * 2.);
-	    } else {
-		eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
-			d__1))));
-	    }
-	} else {
-
-/*           Interpolation using THREE most relevant poles */
 
-	    temp = rhoinv + psi + phi;
-	    if (orgati) {
-		temp1 = z__[iim1] / delta[iim1];
-		temp1 *= temp1;
-		c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[
-			iip1]) * temp1;
-		zz[0] = z__[iim1] * z__[iim1];
-		zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
-	    } else {
-		temp1 = z__[iip1] / delta[iip1];
-		temp1 *= temp1;
-		c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[
-			iim1]) * temp1;
-		zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
-		zz[2] = z__[iip1] * z__[iip1];
-	    }
-	    zz[1] = z__[ii] * z__[ii];
-	    dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
-	    if (*info != 0) {
-		goto L250;
-	    }
-	}
+	    } else if (lsame_(side, "R")) {
 
 /*
-          Note, eta should be positive if w is negative, and
-          eta should be negative otherwise. However,
-          if for some reason caused by roundoff, eta*w > 0,
-          we simply use one Newton step instead. This way
-          will guarantee eta*w < 0.
-*/
-
-	if (w * eta >= 0.) {
-	    eta = -w / dw;
-	}
-	temp = tau + eta;
-	if (temp > dltub || temp < dltlb) {
-	    if (w < 0.) {
-		eta = (dltub - tau) / 2.;
-	    } else {
-		eta = (dltlb - tau) / 2.;
-	    }
-	}
-
-	prew = w;
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
-/* L170: */
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    delta[j] -= eta;
-/* L180: */
-	}
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 
-/*        Evaluate PSI and the derivative DPSI */
+                W := C2
+*/
 
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
-	i__1 = iim1;
-	for (j = 1; j <= i__1; ++j) {
-	    temp = z__[j] / delta[j];
-	    psi += z__[j] * temp;
-	    dpsi += temp * temp;
-	    erretm += psi;
-/* L190: */
-	}
-	erretm = abs(erretm);
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L100: */
+		}
 
-/*        Evaluate PHI and the derivative DPHI */
+/*              W := W * V2 */
 
-	dphi = 0.;
-	phi = 0.;
-	i__1 = iip1;
-	for (j = *n; j >= i__1; --j) {
-	    temp = z__[j] / delta[j];
-	    phi += z__[j] * temp;
-	    dphi += temp * temp;
-	    erretm += phi;
-/* L200: */
-	}
+		ctrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b56,
+			 &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+		if (*n > *k) {
 
-	temp = z__[ii] / delta[ii];
-	dw = dpsi + dphi + temp * temp;
-	temp = z__[ii] * temp;
-	w = rhoinv + phi + psi + temp;
-	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + (
-		d__1 = tau + eta, abs(d__1)) * dw;
+/*                 W := W + C1 * V1 */
 
-	swtch = FALSE_;
-	if (orgati) {
-	    if (-w > abs(prew) / 10.) {
-		swtch = TRUE_;
-	    }
-	} else {
-	    if (w > abs(prew) / 10.) {
-		swtch = TRUE_;
-	    }
-	}
+		    i__1 = *n - *k;
+		    cgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b56, &work[work_offset], ldwork);
+		}
 
-	tau += eta;
+/*              W := W * T  or  W * T' */
 
-/*        Main loop to update the values of the array   DELTA */
+		ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
-	iter = niter + 1;
+/*              C := C - W * V' */
 
-	for (niter = iter; niter <= 30; ++niter) {
+		if (*n > *k) {
 
-/*           Test for convergence */
+/*                 C1 := C1 - W * V1' */
 
-	    if (abs(w) <= eps * erretm) {
-		if (orgati) {
-		    *dlam = d__[*i__] + tau;
-		} else {
-		    *dlam = d__[ip1] + tau;
+		    i__1 = *n - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "Conjugate transpose", m, &i__1, k,
+			     &q__1, &work[work_offset], ldwork, &v[v_offset],
+			    ldv, &c_b56, &c__[c_offset], ldc);
 		}
-		goto L250;
-	    }
-
-	    if (w <= 0.) {
-		dltlb = max(dltlb,tau);
-	    } else {
-		dltub = min(dltub,tau);
-	    }
 
-/*           Calculate the new step */
+/*              W := W * V2' */
 
-	    if (! swtch3) {
-		if (! swtch) {
-		    if (orgati) {
-/* Computing 2nd power */
-			d__1 = z__[*i__] / delta[*i__];
-			c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (
-				d__1 * d__1);
-		    } else {
-/* Computing 2nd power */
-			d__1 = z__[ip1] / delta[ip1];
-			c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) *
-				(d__1 * d__1);
-		    }
-		} else {
-		    temp = z__[ii] / delta[ii];
-		    if (orgati) {
-			dpsi += temp * temp;
-		    } else {
-			dphi += temp * temp;
-		    }
-		    c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi;
-		}
-		a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1]
-			* dw;
-		b = delta[*i__] * delta[ip1] * w;
-		if (c__ == 0.) {
-		    if (a == 0.) {
-			if (! swtch) {
-			    if (orgati) {
-				a = z__[*i__] * z__[*i__] + delta[ip1] *
-					delta[ip1] * (dpsi + dphi);
-			    } else {
-				a = z__[ip1] * z__[ip1] + delta[*i__] * delta[
-					*i__] * (dpsi + dphi);
-			    }
-			} else {
-			    a = delta[*i__] * delta[*i__] * dpsi + delta[ip1]
-				    * delta[ip1] * dphi;
-			}
-		    }
-		    eta = b / a;
-		} else if (a <= 0.) {
-		    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
-			     / (c__ * 2.);
-		} else {
-		    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
-			    abs(d__1))));
-		}
-	    } else {
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k,
+			&c_b56, &v[*n - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
 
-/*              Interpolation using THREE most relevant poles */
+/*              C2 := C2 - W */
 
-		temp = rhoinv + psi + phi;
-		if (swtch) {
-		    c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
-		    zz[0] = delta[iim1] * delta[iim1] * dpsi;
-		    zz[2] = delta[iip1] * delta[iip1] * dphi;
-		} else {
-		    if (orgati) {
-			temp1 = z__[iim1] / delta[iim1];
-			temp1 *= temp1;
-			c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1]
-				- d__[iip1]) * temp1;
-			zz[0] = z__[iim1] * z__[iim1];
-			zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 +
-				dphi);
-		    } else {
-			temp1 = z__[iip1] / delta[iip1];
-			temp1 *= temp1;
-			c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1]
-				- d__[iim1]) * temp1;
-			zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi -
-				temp1));
-			zz[2] = z__[iip1] * z__[iip1];
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + (*n - *k + j) * c_dim1;
+			i__4 = i__ + (*n - *k + j) * c_dim1;
+			i__5 = i__ + j * work_dim1;
+			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
+				i__4].i - work[i__5].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L110: */
 		    }
-		}
-		dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta,
-			info);
-		if (*info != 0) {
-		    goto L250;
+/* L120: */
 		}
 	    }
+	}
+
+    } else if (lsame_(storev, "R")) {
+
+	if (lsame_(direct, "F")) {
 
 /*
-             Note, eta should be positive if w is negative, and
-             eta should be negative otherwise. However,
-             if for some reason caused by roundoff, eta*w > 0,
-             we simply use one Newton step instead. This way
-             will guarantee eta*w < 0.
+             Let  V =  ( V1  V2 )    (V1: first K columns)
+             where  V1  is unit upper triangular.
 */
 
-	    if (w * eta >= 0.) {
-		eta = -w / dw;
-	    }
-	    temp = tau + eta;
-	    if (temp > dltub || temp < dltlb) {
-		if (w < 0.) {
-		    eta = (dltub - tau) / 2.;
-		} else {
-		    eta = (dltlb - tau) / 2.;
-		}
-	    }
+	    if (lsame_(side, "L")) {
 
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		delta[j] -= eta;
-/* L210: */
-	    }
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
 
-	    tau += eta;
-	    prew = w;
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
 
-/*           Evaluate PSI and the derivative DPSI */
+                W := C1'
+*/
 
-	    dpsi = 0.;
-	    psi = 0.;
-	    erretm = 0.;
-	    i__1 = iim1;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = z__[j] / delta[j];
-		psi += z__[j] * temp;
-		dpsi += temp * temp;
-		erretm += psi;
-/* L220: */
-	    }
-	    erretm = abs(erretm);
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
+/* L130: */
+		}
 
-/*           Evaluate PHI and the derivative DPHI */
+/*              W := W * V1' */
 
-	    dphi = 0.;
-	    phi = 0.;
-	    i__1 = iip1;
-	    for (j = *n; j >= i__1; --j) {
-		temp = z__[j] / delta[j];
-		phi += z__[j] * temp;
-		dphi += temp * temp;
-		erretm += phi;
-/* L230: */
-	    }
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k,
+			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*m > *k) {
 
-	    temp = z__[ii] / delta[ii];
-	    dw = dpsi + dphi + temp * temp;
-	    temp = z__[ii] * temp;
-	    w = rhoinv + phi + psi + temp;
-	    erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
-		    + abs(tau) * dw;
-	    if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
-		swtch = ! swtch;
-	    }
+/*                 W := W + C2'*V2' */
 
-/* L240: */
-	}
+		    i__1 = *m - *k;
+		    cgemm_("Conjugate transpose", "Conjugate transpose", n, k,
+			     &i__1, &c_b56, &c__[*k + 1 + c_dim1], ldc, &v[(*
+			    k + 1) * v_dim1 + 1], ldv, &c_b56, &work[
+			    work_offset], ldwork);
+		}
 
-/*        Return with INFO = 1, NITER = MAXIT and not converged */
+/*              W := W * T'  or  W * T */
 
-	*info = 1;
-	if (orgati) {
-	    *dlam = d__[*i__] + tau;
-	} else {
-	    *dlam = d__[ip1] + tau;
-	}
+		ctrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
-    }
+/*              C := C - V' * W' */
 
-L250:
+		if (*m > *k) {
 
-    return 0;
+/*                 C2 := C2 - V2' * W' */
 
-/*     End of DLAED4 */
+		    i__1 = *m - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("Conjugate transpose", "Conjugate transpose", &
+			    i__1, n, k, &q__1, &v[(*k + 1) * v_dim1 + 1], ldv,
+			     &work[work_offset], ldwork, &c_b56, &c__[*k + 1
+			    + c_dim1], ldc);
+		}
 
-} /* dlaed4_ */
+/*              W := W * V1 */
 
-/* Subroutine */ int dlaed5_(integer *i__, doublereal *d__, doublereal *z__,
-	doublereal *delta, doublereal *rho, doublereal *dlam)
-{
-    /* System generated locals */
-    doublereal d__1;
+		ctrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b56,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
 
-    /* Builtin functions */
-    double sqrt(doublereal);
+/*              C1 := C1 - W' */
 
-    /* Local variables */
-    static doublereal b, c__, w, del, tau, temp;
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = j + i__ * c_dim1;
+			i__4 = j + i__ * c_dim1;
+			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
+			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
+				q__2.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L140: */
+		    }
+/* L150: */
+		}
 
+	    } else if (lsame_(side, "R")) {
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
 
-    Purpose
-    =======
+                W := C1
+*/
 
-    This subroutine computes the I-th eigenvalue of a symmetric rank-one
-    modification of a 2-by-2 diagonal matrix
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L160: */
+		}
 
-               diag( D )  +  RHO *  Z * transpose(Z) .
+/*              W := W * V1' */
 
-    The diagonal elements in the array D are assumed to satisfy
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k,
+			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*n > *k) {
 
-               D(i) < D(j)  for  i < j .
+/*                 W := W + C2 * V2' */
 
-    We also assume RHO > 0 and that the Euclidean norm of the vector
-    Z is one.
+		    i__1 = *n - *k;
+		    cgemm_("No transpose", "Conjugate transpose", m, k, &i__1,
+			     &c_b56, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k
+			    + 1) * v_dim1 + 1], ldv, &c_b56, &work[
+			    work_offset], ldwork);
+		}
 
-    Arguments
-    =========
+/*              W := W * T  or  W * T' */
 
-    I      (input) INTEGER
-           The index of the eigenvalue to be computed.  I = 1 or I = 2.
+		ctrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
-    D      (input) DOUBLE PRECISION array, dimension (2)
-           The original eigenvalues.  We assume D(1) < D(2).
+/*              C := C - W * V */
 
-    Z      (input) DOUBLE PRECISION array, dimension (2)
-           The components of the updating vector.
+		if (*n > *k) {
 
-    DELTA  (output) DOUBLE PRECISION array, dimension (2)
-           The vector DELTA contains the information necessary
-           to construct the eigenvectors.
+/*                 C2 := C2 - W * V2 */
 
-    RHO    (input) DOUBLE PRECISION
-           The scalar in the symmetric updating formula.
+		    i__1 = *n - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "No transpose", m, &i__1, k, &q__1,
+			     &work[work_offset], ldwork, &v[(*k + 1) * v_dim1
+			    + 1], ldv, &c_b56, &c__[(*k + 1) * c_dim1 + 1],
+			    ldc);
+		}
 
-    DLAM   (output) DOUBLE PRECISION
-           The computed lambda_I, the I-th updated eigenvalue.
+/*              W := W * V1 */
 
-    Further Details
-    ===============
+		ctrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b56,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
 
-    Based on contributions by
-       Ren-Cang Li, Computer Science Division, University of California
-       at Berkeley, USA
+/*              C1 := C1 - W */
 
-    =====================================================================
-*/
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			i__5 = i__ + j * work_dim1;
+			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
+				i__4].i - work[i__5].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L170: */
+		    }
+/* L180: */
+		}
 
+	    }
 
-    /* Parameter adjustments */
-    --delta;
-    --z__;
-    --d__;
+	} else {
 
-    /* Function Body */
-    del = d__[2] - d__[1];
-    if (*i__ == 1) {
-	w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.;
-	if (w > 0.) {
-	    b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
-	    c__ = *rho * z__[1] * z__[1] * del;
+/*
+             Let  V =  ( V1  V2 )    (V2: last K columns)
+             where  V2  is unit lower triangular.
+*/
 
-/*           B > ZERO, always */
+	    if (lsame_(side, "L")) {
 
-	    tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
-	    *dlam = d__[1] + tau;
-	    delta[1] = -z__[1] / tau;
-	    delta[2] = z__[2] / (del - tau);
-	} else {
-	    b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
-	    c__ = *rho * z__[2] * z__[2] * del;
-	    if (b > 0.) {
-		tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
-	    } else {
-		tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
-	    }
-	    *dlam = d__[2] + tau;
-	    delta[1] = -z__[1] / (del + tau);
-	    delta[2] = -z__[2] / tau;
-	}
-	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
-	delta[1] /= temp;
-	delta[2] /= temp;
-    } else {
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
 
-/*     Now I=2 */
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
 
-	b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
-	c__ = *rho * z__[2] * z__[2] * del;
-	if (b > 0.) {
-	    tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
-	} else {
-	    tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
-	}
-	*dlam = d__[2] + tau;
-	delta[1] = -z__[1] / (del + tau);
-	delta[2] = -z__[2] / tau;
-	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
-	delta[1] /= temp;
-	delta[2] /= temp;
-    }
-    return 0;
+                W := C2'
+*/
 
-/*     End OF DLAED5 */
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
+/* L190: */
+		}
 
-} /* dlaed5_ */
+/*              W := W * V2' */
 
-/* Subroutine */ int dlaed6_(integer *kniter, logical *orgati, doublereal *
-	rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal *
-	tau, integer *info)
-{
-    /* Initialized data */
+		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k,
+			&c_b56, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (*m > *k) {
 
-    static logical first = TRUE_;
+/*                 W := W + C1'*V1' */
 
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1, d__2, d__3, d__4;
+		    i__1 = *m - *k;
+		    cgemm_("Conjugate transpose", "Conjugate transpose", n, k,
+			     &i__1, &c_b56, &c__[c_offset], ldc, &v[v_offset],
+			     ldv, &c_b56, &work[work_offset], ldwork);
+		}
 
-    /* Builtin functions */
-    double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *);
+/*              W := W * T'  or  W * T */
 
-    /* Local variables */
-    static doublereal a, b, c__, f;
-    static integer i__;
-    static doublereal fc, df, ddf, eta, eps, base;
-    static integer iter;
-    static doublereal temp, temp1, temp2, temp3, temp4;
-    static logical scale;
-    static integer niter;
-    static doublereal small1, small2, sminv1, sminv2;
+		ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
-    static doublereal dscale[3], sclfac, zscale[3], erretm, sclinv;
+/*              C := C - V' * W' */
 
+		if (*m > *k) {
 
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
+/*                 C1 := C1 - V1' * W' */
 
+		    i__1 = *m - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("Conjugate transpose", "Conjugate transpose", &
+			    i__1, n, k, &q__1, &v[v_offset], ldv, &work[
+			    work_offset], ldwork, &c_b56, &c__[c_offset], ldc);
+		}
 
-    Purpose
-    =======
+/*              W := W * V2 */
 
-    DLAED6 computes the positive or negative root (closest to the origin)
-    of
-                     z(1)        z(2)        z(3)
-    f(x) =   rho + --------- + ---------- + ---------
-                    d(1)-x      d(2)-x      d(3)-x
+		ctrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b56,
+			 &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
 
-    It is assumed that
+/*              C2 := C2 - W' */
 
-          if ORGATI = .true. the root is between d(2) and d(3);
-          otherwise it is between d(1) and d(2)
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = *m - *k + j + i__ * c_dim1;
+			i__4 = *m - *k + j + i__ * c_dim1;
+			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
+			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
+				q__2.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L200: */
+		    }
+/* L210: */
+		}
 
-    This routine will be called by DLAED4 when necessary. In most cases,
-    the root sought is the smallest in magnitude, though it might not be
-    in some extremely rare situations.
+	    } else if (lsame_(side, "R")) {
 
-    Arguments
-    =========
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
-    KNITER       (input) INTEGER
-                 Refer to DLAED4 for its significance.
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
 
-    ORGATI       (input) LOGICAL
-                 If ORGATI is true, the needed root is between d(2) and
-                 d(3); otherwise it is between d(1) and d(2).  See
-                 DLAED4 for further details.
+                W := C2
+*/
 
-    RHO          (input) DOUBLE PRECISION
-                 Refer to the equation f(x) above.
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L220: */
+		}
 
-    D            (input) DOUBLE PRECISION array, dimension (3)
-                 D satisfies d(1) < d(2) < d(3).
+/*              W := W * V2' */
 
-    Z            (input) DOUBLE PRECISION array, dimension (3)
-                 Each of the elements in z must be positive.
+		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k,
+			&c_b56, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (*n > *k) {
 
-    FINIT        (input) DOUBLE PRECISION
-                 The value of f at 0. It is more accurate than the one
-                 evaluated inside this routine (if someone wants to do
-                 so).
+/*                 W := W + C1 * V1' */
 
-    TAU          (output) DOUBLE PRECISION
-                 The root of the equation f(x).
+		    i__1 = *n - *k;
+		    cgemm_("No transpose", "Conjugate transpose", m, k, &i__1,
+			     &c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b56, &work[work_offset], ldwork);
+		}
 
-    INFO         (output) INTEGER
-                 = 0: successful exit
-                 > 0: if INFO = 1, failure to converge
+/*              W := W * T  or  W * T' */
 
-    Further Details
-    ===============
+		ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
 
-    Based on contributions by
-       Ren-Cang Li, Computer Science Division, University of California
-       at Berkeley, USA
+/*              C := C - W * V */
 
-    =====================================================================
-*/
+		if (*n > *k) {
 
-    /* Parameter adjustments */
-    --z__;
-    --d__;
+/*                 C1 := C1 - W * V1 */
 
-    /* Function Body */
+		    i__1 = *n - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "No transpose", m, &i__1, k, &q__1,
+			     &work[work_offset], ldwork, &v[v_offset], ldv, &
+			    c_b56, &c__[c_offset], ldc);
+		}
 
-    *info = 0;
+/*              W := W * V2 */
+
+		ctrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b56,
+			 &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + (*n - *k + j) * c_dim1;
+			i__4 = i__ + (*n - *k + j) * c_dim1;
+			i__5 = i__ + j * work_dim1;
+			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
+				i__4].i - work[i__5].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L230: */
+		    }
+/* L240: */
+		}
+
+	    }
 
-    niter = 1;
-    *tau = 0.;
-    if (*kniter == 2) {
-	if (*orgati) {
-	    temp = (d__[3] - d__[2]) / 2.;
-	    c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
-	    a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
-	    b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
-	} else {
-	    temp = (d__[1] - d__[2]) / 2.;
-	    c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
-	    a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
-	    b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
-	}
-/* Computing MAX */
-	d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
-	temp = max(d__1,d__2);
-	a /= temp;
-	b /= temp;
-	c__ /= temp;
-	if (c__ == 0.) {
-	    *tau = b / a;
-	} else if (a <= 0.) {
-	    *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-		    c__ * 2.);
-	} else {
-	    *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))
-		    ));
-	}
-	temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) +
-		z__[3] / (d__[3] - *tau);
-	if (abs(*finit) <= abs(temp)) {
-	    *tau = 0.;
 	}
     }
 
-/*
-       On first call to routine, get machine parameters for
-       possible scaling to avoid overflow
-*/
+    return 0;
+
+/*     End of CLARFB */
+
+} /* clarfb_ */
+
+/* Subroutine */ int clarfg_(integer *n, complex *alpha, complex *x, integer *
+	incx, complex *tau)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    double r_imag(complex *), r_sign(real *, real *);
+
+    /* Local variables */
+    static integer j, knt;
+    static real beta;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *);
+    static real alphi, alphr, xnorm;
+    extern doublereal scnrm2_(integer *, complex *, integer *), slapy3_(real *
+	    , real *, real *);
+    extern /* Complex */ VOID cladiv_(complex *, complex *, complex *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *);
+    static real safmin, rsafmn;
 
-    if (first) {
-	eps = EPSILON;
-	base = BASE;
-	i__1 = (integer) (log(SAFEMINIMUM) / log(base) / 3.);
-	small1 = pow_di(&base, &i__1);
-	sminv1 = 1. / small1;
-	small2 = small1 * small1;
-	sminv2 = sminv1 * sminv1;
-	first = FALSE_;
-    }
 
 /*
-       Determine if scaling of inputs necessary to avoid overflow
-       when computing 1/TEMP**3
-*/
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-    if (*orgati) {
-/* Computing MIN */
-	d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - *
-		tau, abs(d__2));
-	temp = min(d__3,d__4);
-    } else {
-/* Computing MIN */
-	d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - *
-		tau, abs(d__2));
-	temp = min(d__3,d__4);
-    }
-    scale = FALSE_;
-    if (temp <= small1) {
-	scale = TRUE_;
-	if (temp <= small2) {
 
-/*        Scale up by power of radix nearest 1/SAFMIN**(2/3) */
+    Purpose
+    =======
 
-	    sclfac = sminv2;
-	    sclinv = small2;
-	} else {
+    CLARFG generates a complex elementary reflector H of order n, such
+    that
 
-/*        Scale up by power of radix nearest 1/SAFMIN**(1/3) */
+          H' * ( alpha ) = ( beta ),   H' * H = I.
+               (   x   )   (   0  )
 
-	    sclfac = sminv1;
-	    sclinv = small1;
-	}
+    where alpha and beta are scalars, with beta real, and x is an
+    (n-1)-element complex vector. H is represented in the form
 
-/*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
+          H = I - tau * ( 1 ) * ( 1 v' ) ,
+                        ( v )
 
-	for (i__ = 1; i__ <= 3; ++i__) {
-	    dscale[i__ - 1] = d__[i__] * sclfac;
-	    zscale[i__ - 1] = z__[i__] * sclfac;
-/* L10: */
-	}
-	*tau *= sclfac;
-    } else {
+    where tau is a complex scalar and v is a complex (n-1)-element
+    vector. Note that H is not hermitian.
 
-/*        Copy D and Z to DSCALE and ZSCALE */
+    If the elements of x are all zero and alpha is real, then tau = 0
+    and H is taken to be the unit matrix.
 
-	for (i__ = 1; i__ <= 3; ++i__) {
-	    dscale[i__ - 1] = d__[i__];
-	    zscale[i__ - 1] = z__[i__];
-/* L20: */
-	}
-    }
+    Otherwise  1 <= real(tau) <= 2  and  abs(tau-1) <= 1 .
 
-    fc = 0.;
-    df = 0.;
-    ddf = 0.;
-    for (i__ = 1; i__ <= 3; ++i__) {
-	temp = 1. / (dscale[i__ - 1] - *tau);
-	temp1 = zscale[i__ - 1] * temp;
-	temp2 = temp1 * temp;
-	temp3 = temp2 * temp;
-	fc += temp1 / dscale[i__ - 1];
-	df += temp2;
-	ddf += temp3;
-/* L30: */
-    }
-    f = *finit + *tau * fc;
+    Arguments
+    =========
 
-    if (abs(f) <= 0.) {
-	goto L60;
-    }
+    N       (input) INTEGER
+            The order of the elementary reflector.
 
-/*
-          Iteration begins
+    ALPHA   (input/output) COMPLEX
+            On entry, the value alpha.
+            On exit, it is overwritten with the value beta.
 
-       It is not hard to see that
+    X       (input/output) COMPLEX array, dimension
+                           (1+(N-2)*abs(INCX))
+            On entry, the vector x.
+            On exit, it is overwritten with the vector v.
 
-             1) Iterations will go up monotonically
-                if FINIT < 0;
+    INCX    (input) INTEGER
+            The increment between elements of X. INCX > 0.
 
-             2) Iterations will go down monotonically
-                if FINIT > 0.
+    TAU     (output) COMPLEX
+            The value tau.
+
+    =====================================================================
 */
 
-    iter = niter + 1;
 
-    for (niter = iter; niter <= 20; ++niter) {
+    /* Parameter adjustments */
+    --x;
 
-	if (*orgati) {
-	    temp1 = dscale[1] - *tau;
-	    temp2 = dscale[2] - *tau;
-	} else {
-	    temp1 = dscale[0] - *tau;
-	    temp2 = dscale[1] - *tau;
-	}
-	a = (temp1 + temp2) * f - temp1 * temp2 * df;
-	b = temp1 * temp2 * f;
-	c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
-/* Computing MAX */
-	d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
-	temp = max(d__1,d__2);
-	a /= temp;
-	b /= temp;
-	c__ /= temp;
-	if (c__ == 0.) {
-	    eta = b / a;
-	} else if (a <= 0.) {
-	    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
-		    * 2.);
-	} else {
-	    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
-		    );
-	}
-	if (f * eta >= 0.) {
-	    eta = -f / df;
-	}
+    /* Function Body */
+    if (*n <= 0) {
+	tau->r = 0.f, tau->i = 0.f;
+	return 0;
+    }
 
-	temp = eta + *tau;
-	if (*orgati) {
-	    if (eta > 0. && temp >= dscale[2]) {
-		eta = (dscale[2] - *tau) / 2.;
+    i__1 = *n - 1;
+    xnorm = scnrm2_(&i__1, &x[1], incx);
+    alphr = alpha->r;
+    alphi = r_imag(alpha);
+
+    if (xnorm == 0.f && alphi == 0.f) {
+
+/*        H  =  I */
+
+	tau->r = 0.f, tau->i = 0.f;
+    } else {
+
+/*        general case */
+
+	r__1 = slapy3_(&alphr, &alphi, &xnorm);
+	beta = -r_sign(&r__1, &alphr);
+	safmin = slamch_("S") / slamch_("E");
+	rsafmn = 1.f / safmin;
+
+	if (dabs(beta) < safmin) {
+
+/*           XNORM, BETA may be inaccurate; scale X and recompute them */
+
+	    knt = 0;
+L10:
+	    ++knt;
+	    i__1 = *n - 1;
+	    csscal_(&i__1, &rsafmn, &x[1], incx);
+	    beta *= rsafmn;
+	    alphi *= rsafmn;
+	    alphr *= rsafmn;
+	    if (dabs(beta) < safmin) {
+		goto L10;
 	    }
-	    if (eta < 0. && temp <= dscale[1]) {
-		eta = (dscale[1] - *tau) / 2.;
+
+/*           New BETA is at most 1, at least SAFMIN */
+
+	    i__1 = *n - 1;
+	    xnorm = scnrm2_(&i__1, &x[1], incx);
+	    q__1.r = alphr, q__1.i = alphi;
+	    alpha->r = q__1.r, alpha->i = q__1.i;
+	    r__1 = slapy3_(&alphr, &alphi, &xnorm);
+	    beta = -r_sign(&r__1, &alphr);
+	    r__1 = (beta - alphr) / beta;
+	    r__2 = -alphi / beta;
+	    q__1.r = r__1, q__1.i = r__2;
+	    tau->r = q__1.r, tau->i = q__1.i;
+	    q__2.r = alpha->r - beta, q__2.i = alpha->i;
+	    cladiv_(&q__1, &c_b56, &q__2);
+	    alpha->r = q__1.r, alpha->i = q__1.i;
+	    i__1 = *n - 1;
+	    cscal_(&i__1, alpha, &x[1], incx);
+
+/*           If ALPHA is subnormal, it may lose relative accuracy */
+
+	    alpha->r = beta, alpha->i = 0.f;
+	    i__1 = knt;
+	    for (j = 1; j <= i__1; ++j) {
+		q__1.r = safmin * alpha->r, q__1.i = safmin * alpha->i;
+		alpha->r = q__1.r, alpha->i = q__1.i;
+/* L20: */
 	    }
 	} else {
-	    if (eta > 0. && temp >= dscale[1]) {
-		eta = (dscale[1] - *tau) / 2.;
-	    }
-	    if (eta < 0. && temp <= dscale[0]) {
-		eta = (dscale[0] - *tau) / 2.;
-	    }
-	}
-	*tau += eta;
-
-	fc = 0.;
-	erretm = 0.;
-	df = 0.;
-	ddf = 0.;
-	for (i__ = 1; i__ <= 3; ++i__) {
-	    temp = 1. / (dscale[i__ - 1] - *tau);
-	    temp1 = zscale[i__ - 1] * temp;
-	    temp2 = temp1 * temp;
-	    temp3 = temp2 * temp;
-	    temp4 = temp1 / dscale[i__ - 1];
-	    fc += temp4;
-	    erretm += abs(temp4);
-	    df += temp2;
-	    ddf += temp3;
-/* L40: */
-	}
-	f = *finit + *tau * fc;
-	erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df;
-	if (abs(f) <= eps * erretm) {
-	    goto L60;
+	    r__1 = (beta - alphr) / beta;
+	    r__2 = -alphi / beta;
+	    q__1.r = r__1, q__1.i = r__2;
+	    tau->r = q__1.r, tau->i = q__1.i;
+	    q__2.r = alpha->r - beta, q__2.i = alpha->i;
+	    cladiv_(&q__1, &c_b56, &q__2);
+	    alpha->r = q__1.r, alpha->i = q__1.i;
+	    i__1 = *n - 1;
+	    cscal_(&i__1, alpha, &x[1], incx);
+	    alpha->r = beta, alpha->i = 0.f;
 	}
-/* L50: */
     }
-    *info = 1;
-L60:
-
-/*     Undo scaling */
 
-    if (scale) {
-	*tau *= sclinv;
-    }
     return 0;
 
-/*     End of DLAED6 */
+/*     End of CLARFG */
 
-} /* dlaed6_ */
+} /* clarfg_ */
 
-/* Subroutine */ int dlaed7_(integer *icompq, integer *n, integer *qsiz,
-	integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__,
-	doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer
-	*cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer *
-	perm, integer *givptr, integer *givcol, doublereal *givnum,
-	doublereal *work, integer *iwork, integer *info)
+/* Subroutine */ int clarft_(char *direct, char *storev, integer *n, integer *
+	k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt)
 {
     /* System generated locals */
-    integer q_dim1, q_offset, i__1, i__2;
-
-    /* Builtin functions */
-    integer pow_ii(integer *, integer *);
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4;
+    complex q__1;
 
     /* Local variables */
-    static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer indxc, indxp;
-    extern /* Subroutine */ int dlaed8_(integer *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
-	     integer *, doublereal *, integer *, integer *, integer *,
-	    doublereal *, integer *, integer *, integer *), dlaed9_(integer *,
-	     integer *, integer *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     integer *, integer *), dlaeda_(integer *, integer *, integer *,
-	    integer *, integer *, integer *, integer *, integer *, doublereal
-	    *, doublereal *, integer *, doublereal *, doublereal *, integer *)
-	    ;
-    static integer idlmda;
-    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), xerbla_(char *, integer *);
-    static integer coltyp;
+    static integer i__, j;
+    static complex vii;
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *,
+	    complex *, integer *, complex *, integer *), clacgv_(integer *, complex *, integer *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
+    -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
        September 30, 1994
@@ -11654,1266 +12546,2799 @@ L60:
     Purpose
     =======
 
-    DLAED7 computes the updated eigensystem of a diagonal
-    matrix after modification by a rank-one symmetric matrix. This
-    routine is used only for the eigenproblem which requires all
-    eigenvalues and optionally eigenvectors of a dense symmetric matrix
-    that has been reduced to tridiagonal form.  DLAED1 handles
-    the case in which all eigenvalues and eigenvectors of a symmetric
-    tridiagonal matrix are desired.
-
-      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+    CLARFT forms the triangular factor T of a complex block reflector H
+    of order n, which is defined as a product of k elementary reflectors.
 
-       where Z = Q'u, u is a vector of length N with ones in the
-       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+    If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
 
-       The eigenvectors of the original matrix are stored in Q, and the
-       eigenvalues are in D.  The algorithm consists of three stages:
+    If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
 
-          The first stage consists of deflating the size of the problem
-          when there are multiple eigenvalues or if there is a zero in
-          the Z vector.  For each such occurence the dimension of the
-          secular equation problem is reduced by one.  This stage is
-          performed by the routine DLAED8.
+    If STOREV = 'C', the vector which defines the elementary reflector
+    H(i) is stored in the i-th column of the array V, and
 
-          The second stage consists of calculating the updated
-          eigenvalues. This is done by finding the roots of the secular
-          equation via the routine DLAED4 (as called by DLAED9).
-          This routine also calculates the eigenvectors of the current
-          problem.
+       H  =  I - V * T * V'
 
-          The final stage consists of computing the updated eigenvectors
-          directly using the updated eigenvalues.  The eigenvectors for
-          the current problem are multiplied with the eigenvectors from
-          the overall problem.
+    If STOREV = 'R', the vector which defines the elementary reflector
+    H(i) is stored in the i-th row of the array V, and
+
+       H  =  I - V' * T * V
 
     Arguments
     =========
 
-    ICOMPQ  (input) INTEGER
-            = 0:  Compute eigenvalues only.
-            = 1:  Compute eigenvectors of original dense symmetric matrix
-                  also.  On entry, Q contains the orthogonal matrix used
-                  to reduce the original matrix to tridiagonal form.
-
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+    DIRECT  (input) CHARACTER*1
+            Specifies the order in which the elementary reflectors are
+            multiplied to form the block reflector:
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
 
-    QSIZ   (input) INTEGER
-           The dimension of the orthogonal matrix used to reduce
-           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+    STOREV  (input) CHARACTER*1
+            Specifies how the vectors which define the elementary
+            reflectors are stored (see also Further Details):
+            = 'C': columnwise
+            = 'R': rowwise
 
-    TLVLS  (input) INTEGER
-           The total number of merging levels in the overall divide and
-           conquer tree.
+    N       (input) INTEGER
+            The order of the block reflector H. N >= 0.
 
-    CURLVL (input) INTEGER
-           The current level in the overall merge routine,
-           0 <= CURLVL <= TLVLS.
+    K       (input) INTEGER
+            The order of the triangular factor T (= the number of
+            elementary reflectors). K >= 1.
 
-    CURPBM (input) INTEGER
-           The current problem in the current level in the overall
-           merge routine (counting from upper left to lower right).
+    V       (input/output) COMPLEX array, dimension
+                                 (LDV,K) if STOREV = 'C'
+                                 (LDV,N) if STOREV = 'R'
+            The matrix V. See further details.
 
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry, the eigenvalues of the rank-1-perturbed matrix.
-           On exit, the eigenvalues of the repaired matrix.
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
 
-    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
-           On entry, the eigenvectors of the rank-1-perturbed matrix.
-           On exit, the eigenvectors of the repaired tridiagonal matrix.
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i).
 
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max(1,N).
+    T       (output) COMPLEX array, dimension (LDT,K)
+            The k by k triangular factor T of the block reflector.
+            If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+            lower triangular. The rest of the array is not used.
 
-    INDXQ  (output) INTEGER array, dimension (N)
-           The permutation which will reintegrate the subproblem just
-           solved back into sorted order, i.e., D( INDXQ( I = 1, N ) )
-           will be in ascending order.
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
 
-    RHO    (input) DOUBLE PRECISION
-           The subdiagonal element used to create the rank-1
-           modification.
+    Further Details
+    ===============
 
-    CUTPNT (input) INTEGER
-           Contains the location of the last eigenvalue in the leading
-           sub-matrix.  min(1,N) <= CUTPNT <= N.
+    The shape of the matrix V and the storage of the vectors which define
+    the H(i) is best illustrated by the following example with n = 5 and
+    k = 3. The elements equal to 1 are not stored; the corresponding
+    array elements are modified but restored on exit. The rest of the
+    array is not used.
 
-    QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1)
-           Stores eigenvectors of submatrices encountered during
-           divide and conquer, packed together. QPTR points to
-           beginning of the submatrices.
+    DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
 
-    QPTR   (input/output) INTEGER array, dimension (N+2)
-           List of indices pointing to beginning of submatrices stored
-           in QSTORE. The submatrices are numbered starting at the
-           bottom left of the divide and conquer tree, from left to
-           right and bottom to top.
+                 V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+                     ( v1  1    )                     (     1 v2 v2 v2 )
+                     ( v1 v2  1 )                     (        1 v3 v3 )
+                     ( v1 v2 v3 )
+                     ( v1 v2 v3 )
 
-    PRMPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in PERM a
-           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
-           indicates the size of the permutation and also the size of
-           the full, non-deflated problem.
+    DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
 
-    PERM   (input) INTEGER array, dimension (N lg N)
-           Contains the permutations (from deflation and sorting) to be
-           applied to each eigenblock.
+                 V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+                     ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+                     (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+                     (     1 v3 )
+                     (        1 )
 
-    GIVPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in GIVCOL a
-           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
-           indicates the number of Givens rotations.
+    =====================================================================
 
-    GIVCOL (input) INTEGER array, dimension (2, N lg N)
-           Each pair of numbers indicates a pair of columns to take place
-           in a Givens rotation.
 
-    GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
-           Each number indicates the S value to be used in the
-           corresponding Givens rotation.
+       Quick return if possible
+*/
 
-    WORK   (workspace) DOUBLE PRECISION array, dimension (3*N+QSIZ*N)
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    --tau;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
 
-    IWORK  (workspace) INTEGER array, dimension (4*N)
+    /* Function Body */
+    if (*n == 0) {
+	return 0;
+    }
 
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an eigenvalue did not converge
+    if (lsame_(direct, "F")) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__;
+	    if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) {
 
-    Further Details
-    ===============
+/*              H(i)  =  I */
 
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
+		i__2 = i__;
+		for (j = 1; j <= i__2; ++j) {
+		    i__3 = j + i__ * t_dim1;
+		    t[i__3].r = 0.f, t[i__3].i = 0.f;
+/* L10: */
+		}
+	    } else {
 
-    =====================================================================
+/*              general case */
 
+		i__2 = i__ + i__ * v_dim1;
+		vii.r = v[i__2].r, vii.i = v[i__2].i;
+		i__2 = i__ + i__ * v_dim1;
+		v[i__2].r = 1.f, v[i__2].i = 0.f;
+		if (lsame_(storev, "C")) {
 
-       Test the input parameters.
-*/
+/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
 
-    /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --indxq;
-    --qstore;
-    --qptr;
-    --prmptr;
-    --perm;
-    --givptr;
-    givcol -= 3;
-    givnum -= 3;
-    --work;
-    --iwork;
+		    i__2 = *n - i__ + 1;
+		    i__3 = i__ - 1;
+		    i__4 = i__;
+		    q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
+		    cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__
+			    + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, &
+			    c_b55, &t[i__ * t_dim1 + 1], &c__1);
+		} else {
 
-    /* Function Body */
-    *info = 0;
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
 
-    if (*icompq < 0 || *icompq > 1) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*icompq == 1 && *qsiz < *n) {
-	*info = -4;
-    } else if (*ldq < max(1,*n)) {
-	*info = -9;
-    } else if (min(1,*n) > *cutpnt || *n < *cutpnt) {
-	*info = -12;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAED7", &i__1);
-	return 0;
-    }
+		    if (i__ < *n) {
+			i__2 = *n - i__;
+			clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv);
+		    }
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__ + 1;
+		    i__4 = i__;
+		    q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
+		    cgemv_("No transpose", &i__2, &i__3, &q__1, &v[i__ *
+			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
+			    c_b55, &t[i__ * t_dim1 + 1], &c__1);
+		    if (i__ < *n) {
+			i__2 = *n - i__;
+			clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv);
+		    }
+		}
+		i__2 = i__ + i__ * v_dim1;
+		v[i__2].r = vii.r, v[i__2].i = vii.i;
 
-/*     Quick return if possible */
+/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
 
-    if (*n == 0) {
-	return 0;
-    }
+		i__2 = i__ - 1;
+		ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+		i__2 = i__ + i__ * t_dim1;
+		i__3 = i__;
+		t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
+	    }
+/* L20: */
+	}
+    } else {
+	for (i__ = *k; i__ >= 1; --i__) {
+	    i__1 = i__;
+	    if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) {
 
-/*
-       The following values are for bookkeeping purposes only.  They are
-       integer pointers which indicate the portion of the workspace
-       used by a particular array in DLAED8 and DLAED9.
-*/
+/*              H(i)  =  I */
 
-    if (*icompq == 1) {
-	ldq2 = *qsiz;
-    } else {
-	ldq2 = *n;
-    }
+		i__1 = *k;
+		for (j = i__; j <= i__1; ++j) {
+		    i__2 = j + i__ * t_dim1;
+		    t[i__2].r = 0.f, t[i__2].i = 0.f;
+/* L30: */
+		}
+	    } else {
 
-    iz = 1;
-    idlmda = iz + *n;
-    iw = idlmda + *n;
-    iq2 = iw + *n;
-    is = iq2 + *n * ldq2;
+/*              general case */
 
-    indx = 1;
-    indxc = indx + *n;
-    coltyp = indxc + *n;
-    indxp = coltyp + *n;
+		if (i__ < *k) {
+		    if (lsame_(storev, "C")) {
+			i__1 = *n - *k + i__ + i__ * v_dim1;
+			vii.r = v[i__1].r, vii.i = v[i__1].i;
+			i__1 = *n - *k + i__ + i__ * v_dim1;
+			v[i__1].r = 1.f, v[i__1].i = 0.f;
 
 /*
-       Form the z-vector which consists of the last row of Q_1 and the
-       first row of Q_2.
+                      T(i+1:k,i) :=
+                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
 */
 
-    ptr = pow_ii(&c__2, tlvls) + 1;
-    i__1 = *curlvl - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = *tlvls - i__;
-	ptr += pow_ii(&c__2, &i__2);
-/* L10: */
-    }
-    curr = ptr + *curpbm;
-    dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
-	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz
-	    + *n], info);
+			i__1 = *n - *k + i__;
+			i__2 = *k - i__;
+			i__3 = i__;
+			q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
+			cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[
+				(i__ + 1) * v_dim1 + 1], ldv, &v[i__ * v_dim1
+				+ 1], &c__1, &c_b55, &t[i__ + 1 + i__ *
+				t_dim1], &c__1);
+			i__1 = *n - *k + i__ + i__ * v_dim1;
+			v[i__1].r = vii.r, v[i__1].i = vii.i;
+		    } else {
+			i__1 = i__ + (*n - *k + i__) * v_dim1;
+			vii.r = v[i__1].r, vii.i = v[i__1].i;
+			i__1 = i__ + (*n - *k + i__) * v_dim1;
+			v[i__1].r = 1.f, v[i__1].i = 0.f;
 
 /*
-       When solving the final problem, we no longer need the stored data,
-       so we will overwrite the data from this level onto the previously
-       used storage space.
+                      T(i+1:k,i) :=
+                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
 */
 
-    if (*curlvl == *tlvls) {
-	qptr[curr] = 1;
-	prmptr[curr] = 1;
-	givptr[curr] = 1;
-    }
-
-/*     Sort and Deflate eigenvalues. */
-
-    dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho,
-	    cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], &
-	    perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1)
-	     + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[
-	    indx], info);
-    prmptr[curr + 1] = prmptr[curr] + *n;
-    givptr[curr + 1] += givptr[curr];
-
-/*     Solve Secular Equation. */
-
-    if (k != 0) {
-	dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda],
-		&work[iw], &qstore[qptr[curr]], &k, info);
-	if (*info != 0) {
-	    goto L30;
-	}
-	if (*icompq == 1) {
-	    dgemm_("N", "N", qsiz, &k, &k, &c_b15, &work[iq2], &ldq2, &qstore[
-		    qptr[curr]], &k, &c_b29, &q[q_offset], ldq);
-	}
-/* Computing 2nd power */
-	i__1 = k;
-	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
+			i__1 = *n - *k + i__ - 1;
+			clacgv_(&i__1, &v[i__ + v_dim1], ldv);
+			i__1 = *k - i__;
+			i__2 = *n - *k + i__;
+			i__3 = i__;
+			q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
+			cgemv_("No transpose", &i__1, &i__2, &q__1, &v[i__ +
+				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
+				c_b55, &t[i__ + 1 + i__ * t_dim1], &c__1);
+			i__1 = *n - *k + i__ - 1;
+			clacgv_(&i__1, &v[i__ + v_dim1], ldv);
+			i__1 = i__ + (*n - *k + i__) * v_dim1;
+			v[i__1].r = vii.r, v[i__1].i = vii.i;
+		    }
 
-/*     Prepare the INDXQ sorting permutation. */
+/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
 
-	n1 = k;
-	n2 = *n - k;
-	dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
-    } else {
-	qptr[curr + 1] = qptr[curr];
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    indxq[i__] = i__;
-/* L20: */
+		    i__1 = *k - i__;
+		    ctrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
+			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+			     t_dim1], &c__1)
+			    ;
+		}
+		i__1 = i__ + i__ * t_dim1;
+		i__2 = i__;
+		t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
+	    }
+/* L40: */
 	}
     }
-
-L30:
     return 0;
 
-/*     End of DLAED7 */
+/*     End of CLARFT */
 
-} /* dlaed7_ */
+} /* clarft_ */
 
-/* Subroutine */ int dlaed8_(integer *icompq, integer *k, integer *n, integer
-	*qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
-	doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda,
-	 doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer
-	*givptr, integer *givcol, doublereal *givnum, integer *indxp, integer
-	*indx, integer *info)
+/* Subroutine */ int clarfx_(char *side, integer *m, integer *n, complex *v,
+	complex *tau, complex *c__, integer *ldc, complex *work)
 {
     /* System generated locals */
-    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
-    doublereal d__1;
+    integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8,
+	    i__9, i__10, i__11;
+    complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8, q__9, q__10,
+	    q__11, q__12, q__13, q__14, q__15, q__16, q__17, q__18, q__19;
 
     /* Builtin functions */
-    double sqrt(doublereal);
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
-    static doublereal c__;
-    static integer i__, j;
-    static doublereal s, t;
-    static integer k2, n1, n2, jp, n1p1;
-    static doublereal eps, tau, tol;
-    static integer jlam, imax, jmax;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *), dscal_(
-	    integer *, doublereal *, doublereal *, integer *), dcopy_(integer
-	    *, doublereal *, integer *, doublereal *, integer *);
-
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), dlacpy_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
+    static integer j;
+    static complex t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6,
+	     v7, v8, v9, t10, v10, sum;
+    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *,
+	    complex *, integer *, complex *, integer *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
        September 30, 1994
 
 
     Purpose
     =======
 
-    DLAED8 merges the two sets of eigenvalues together into a single
-    sorted set.  Then it tries to deflate the size of the problem.
-    There are two ways in which deflation can occur:  when two or more
-    eigenvalues are close together or if there is a tiny element in the
-    Z vector.  For each such occurrence the order of the related secular
-    equation problem is reduced by one.
-
-    Arguments
-    =========
-
-    ICOMPQ  (input) INTEGER
-            = 0:  Compute eigenvalues only.
-            = 1:  Compute eigenvectors of original dense symmetric matrix
-                  also.  On entry, Q contains the orthogonal matrix used
-                  to reduce the original matrix to tridiagonal form.
-
-    K      (output) INTEGER
-           The number of non-deflated eigenvalues, and the order of the
-           related secular equation.
+    CLARFX applies a complex elementary reflector H to a complex m by n
+    matrix C, from either the left or the right. H is represented in the
+    form
 
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+          H = I - tau * v * v'
 
-    QSIZ   (input) INTEGER
-           The dimension of the orthogonal matrix used to reduce
-           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+    where tau is a complex scalar and v is a complex vector.
 
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry, the eigenvalues of the two submatrices to be
-           combined.  On exit, the trailing (N-K) updated eigenvalues
-           (those which were deflated) sorted into increasing order.
+    If tau = 0, then H is taken to be the unit matrix
 
-    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
-           If ICOMPQ = 0, Q is not referenced.  Otherwise,
-           on entry, Q contains the eigenvectors of the partially solved
-           system which has been previously updated in matrix
-           multiplies with other partially solved eigensystems.
-           On exit, Q contains the trailing (N-K) updated eigenvectors
-           (those which were deflated) in its last N-K columns.
+    This version uses inline code if H has order < 11.
 
-    LDQ    (input) INTEGER
-           The leading dimension of the array Q.  LDQ >= max(1,N).
+    Arguments
+    =========
 
-    INDXQ  (input) INTEGER array, dimension (N)
-           The permutation which separately sorts the two sub-problems
-           in D into ascending order.  Note that elements in the second
-           half of this permutation must first have CUTPNT added to
-           their values in order to be accurate.
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
 
-    RHO    (input/output) DOUBLE PRECISION
-           On entry, the off-diagonal element associated with the rank-1
-           cut which originally split the two submatrices which are now
-           being recombined.
-           On exit, RHO has been modified to the value required by
-           DLAED3.
+    M       (input) INTEGER
+            The number of rows of the matrix C.
 
-    CUTPNT (input) INTEGER
-           The location of the last eigenvalue in the leading
-           sub-matrix.  min(1,N) <= CUTPNT <= N.
+    N       (input) INTEGER
+            The number of columns of the matrix C.
 
-    Z      (input) DOUBLE PRECISION array, dimension (N)
-           On entry, Z contains the updating vector (the last row of
-           the first sub-eigenvector matrix and the first row of the
-           second sub-eigenvector matrix).
-           On exit, the contents of Z are destroyed by the updating
-           process.
+    V       (input) COMPLEX array, dimension (M) if SIDE = 'L'
+                                          or (N) if SIDE = 'R'
+            The vector v in the representation of H.
 
-    DLAMDA (output) DOUBLE PRECISION array, dimension (N)
-           A copy of the first K eigenvalues which will be used by
-           DLAED3 to form the secular equation.
+    TAU     (input) COMPLEX
+            The value tau in the representation of H.
 
-    Q2     (output) DOUBLE PRECISION array, dimension (LDQ2,N)
-           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
-           a copy of the first K eigenvectors which will be used by
-           DLAED7 in a matrix multiply (DGEMM) to update the new
-           eigenvectors.
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
 
-    LDQ2   (input) INTEGER
-           The leading dimension of the array Q2.  LDQ2 >= max(1,N).
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDA >= max(1,M).
 
-    W      (output) DOUBLE PRECISION array, dimension (N)
-           The first k values of the final deflation-altered z-vector and
-           will be passed to DLAED3.
+    WORK    (workspace) COMPLEX array, dimension (N) if SIDE = 'L'
+                                              or (M) if SIDE = 'R'
+            WORK is not referenced if H has order < 11.
 
-    PERM   (output) INTEGER array, dimension (N)
-           The permutations (from deflation and sorting) to be applied
-           to each eigenblock.
+    =====================================================================
+*/
 
-    GIVPTR (output) INTEGER
-           The number of Givens rotations which took place in this
-           subproblem.
 
-    GIVCOL (output) INTEGER array, dimension (2, N)
-           Each pair of numbers indicates a pair of columns to take place
-           in a Givens rotation.
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
 
-    GIVNUM (output) DOUBLE PRECISION array, dimension (2, N)
-           Each number indicates the S value to be used in the
-           corresponding Givens rotation.
+    /* Function Body */
+    if (tau->r == 0.f && tau->i == 0.f) {
+	return 0;
+    }
+    if (lsame_(side, "L")) {
 
-    INDXP  (workspace) INTEGER array, dimension (N)
-           The permutation used to place deflated values of D at the end
-           of the array.  INDXP(1:K) points to the nondeflated D-values
-           and INDXP(K+1:N) points to the deflated eigenvalues.
+/*        Form  H * C, where H has order m. */
 
-    INDX   (workspace) INTEGER array, dimension (N)
-           The permutation used to sort the contents of D into ascending
-           order.
+	switch (*m) {
+	    case 1:  goto L10;
+	    case 2:  goto L30;
+	    case 3:  goto L50;
+	    case 4:  goto L70;
+	    case 5:  goto L90;
+	    case 6:  goto L110;
+	    case 7:  goto L130;
+	    case 8:  goto L150;
+	    case 9:  goto L170;
+	    case 10:  goto L190;
+	}
 
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+/*
+          Code for general M
 
-    Further Details
-    ===============
+          w := C'*v
+*/
 
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
+	cgemv_("Conjugate transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1]
+		, &c__1, &c_b55, &work[1], &c__1);
 
-    =====================================================================
+/*        C := C - tau * v * w' */
 
+	q__1.r = -tau->r, q__1.i = -tau->i;
+	cgerc_(m, n, &q__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset],
+		ldc);
+	goto L410;
+L10:
 
-       Test the input parameters.
-*/
+/*        Special code for 1 x 1 Householder */
 
-    /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --indxq;
-    --z__;
-    --dlamda;
-    q2_dim1 = *ldq2;
-    q2_offset = 1 + q2_dim1;
-    q2 -= q2_offset;
-    --w;
-    --perm;
-    givcol -= 3;
-    givnum -= 3;
-    --indxp;
-    --indx;
-
-    /* Function Body */
-    *info = 0;
-
-    if (*icompq < 0 || *icompq > 1) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -3;
-    } else if (*icompq == 1 && *qsiz < *n) {
-	*info = -4;
-    } else if (*ldq < max(1,*n)) {
-	*info = -7;
-    } else if (*cutpnt < min(1,*n) || *cutpnt > *n) {
-	*info = -10;
-    } else if (*ldq2 < max(1,*n)) {
-	*info = -14;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAED8", &i__1);
-	return 0;
-    }
+	q__3.r = tau->r * v[1].r - tau->i * v[1].i, q__3.i = tau->r * v[1].i
+		+ tau->i * v[1].r;
+	r_cnjg(&q__4, &v[1]);
+	q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i
+		+ q__3.i * q__4.r;
+	q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i;
+	t1.r = q__1.r, t1.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, q__1.i = t1.r *
+		    c__[i__3].i + t1.i * c__[i__3].r;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L20: */
+	}
+	goto L410;
+L30:
 
-/*     Quick return if possible */
+/*        Special code for 2 x 2 Householder */
 
-    if (*n == 0) {
-	return 0;
-    }
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__2.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__3.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L40: */
+	}
+	goto L410;
+L50:
 
-    n1 = *cutpnt;
-    n2 = *n - n1;
-    n1p1 = n1 + 1;
+/*        Special code for 3 x 3 Householder */
 
-    if (*rho < 0.) {
-	dscal_(&n2, &c_b151, &z__[n1p1], &c__1);
-    }
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__3.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__4.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__5.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L60: */
+	}
+	goto L410;
+L70:
 
-/*     Normalize z so that norm(z) = 1 */
+/*        Special code for 4 x 4 Householder */
 
-    t = 1. / sqrt(2.);
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	indx[j] = j;
-/* L10: */
-    }
-    dscal_(n, &t, &z__[1], &c__1);
-    *rho = (d__1 = *rho * 2., abs(d__1));
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__4.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__5.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__3.r = q__4.r + q__5.r, q__3.i = q__4.i + q__5.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__6.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__2.r = q__3.r + q__6.r, q__2.i = q__3.i + q__6.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__7.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    q__1.r = q__2.r + q__7.r, q__1.i = q__2.i + q__7.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L80: */
+	}
+	goto L410;
+L90:
 
-/*     Sort the eigenvalues into increasing order */
+/*        Special code for 5 x 5 Householder */
 
-    i__1 = *n;
-    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
-	indxq[i__] += *cutpnt;
-/* L20: */
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dlamda[i__] = d__[indxq[i__]];
-	w[i__] = z__[indxq[i__]];
-/* L30: */
-    }
-    i__ = 1;
-    j = *cutpnt + 1;
-    dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	d__[i__] = dlamda[indx[i__]];
-	z__[i__] = w[indx[i__]];
-/* L40: */
-    }
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	r_cnjg(&q__1, &v[5]);
+	v5.r = q__1.r, v5.i = q__1.i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__5.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__6.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__4.r = q__5.r + q__6.r, q__4.i = q__5.i + q__6.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__7.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__8.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    q__2.r = q__3.r + q__8.r, q__2.i = q__3.i + q__8.i;
+	    i__6 = j * c_dim1 + 5;
+	    q__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__9.i = v5.r *
+		    c__[i__6].i + v5.i * c__[i__6].r;
+	    q__1.r = q__2.r + q__9.r, q__1.i = q__2.i + q__9.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L100: */
+	}
+	goto L410;
+L110:
 
-/*     Calculate the allowable deflation tolerence */
+/*        Special code for 6 x 6 Householder */
 
-    imax = idamax_(n, &z__[1], &c__1);
-    jmax = idamax_(n, &d__[1], &c__1);
-    eps = EPSILON;
-    tol = eps * 8. * (d__1 = d__[jmax], abs(d__1));
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	r_cnjg(&q__1, &v[5]);
+	v5.r = q__1.r, v5.i = q__1.i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	r_cnjg(&q__1, &v[6]);
+	v6.r = q__1.r, v6.i = q__1.i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__6.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__7.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__5.r = q__6.r + q__7.r, q__5.i = q__6.i + q__7.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__8.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__4.r = q__5.r + q__8.r, q__4.i = q__5.i + q__8.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__9.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    q__3.r = q__4.r + q__9.r, q__3.i = q__4.i + q__9.i;
+	    i__6 = j * c_dim1 + 5;
+	    q__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__10.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__2.r = q__3.r + q__10.r, q__2.i = q__3.i + q__10.i;
+	    i__7 = j * c_dim1 + 6;
+	    q__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__11.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__1.r = q__2.r + q__11.r, q__1.i = q__2.i + q__11.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L120: */
+	}
+	goto L410;
+L130:
 
-/*
-       If the rank-1 modifier is small enough, no more needs to be done
-       except to reorganize Q so that its columns correspond with the
-       elements in D.
-*/
+/*        Special code for 7 x 7 Householder */
 
-    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
-	*k = 0;
-	if (*icompq == 0) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		perm[j] = indxq[indx[j]];
-/* L50: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		perm[j] = indxq[indx[j]];
-		dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1
-			+ 1], &c__1);
-/* L60: */
-	    }
-	    dlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	r_cnjg(&q__1, &v[5]);
+	v5.r = q__1.r, v5.i = q__1.i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	r_cnjg(&q__1, &v[6]);
+	v6.r = q__1.r, v6.i = q__1.i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	r_cnjg(&q__1, &v[7]);
+	v7.r = q__1.r, v7.i = q__1.i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__7.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__8.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__6.r = q__7.r + q__8.r, q__6.i = q__7.i + q__8.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__9.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__5.r = q__6.r + q__9.r, q__5.i = q__6.i + q__9.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__10.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__4.r = q__5.r + q__10.r, q__4.i = q__5.i + q__10.i;
+	    i__6 = j * c_dim1 + 5;
+	    q__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__11.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__3.r = q__4.r + q__11.r, q__3.i = q__4.i + q__11.i;
+	    i__7 = j * c_dim1 + 6;
+	    q__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__12.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__2.r = q__3.r + q__12.r, q__2.i = q__3.i + q__12.i;
+	    i__8 = j * c_dim1 + 7;
+	    q__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__13.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__1.r = q__2.r + q__13.r, q__1.i = q__2.i + q__13.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 7;
+	    i__3 = j * c_dim1 + 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L140: */
 	}
-	return 0;
-    }
+	goto L410;
+L150:
 
-/*
-       If there are multiple eigenvalues then the problem deflates.  Here
-       the number of equal eigenvalues are found.  As each equal
-       eigenvalue is found, an elementary reflector is computed to rotate
-       the corresponding eigensubspace so that the corresponding
-       components of Z are zero in this new basis.
-*/
+/*        Special code for 8 x 8 Householder */
 
-    *k = 0;
-    *givptr = 0;
-    k2 = *n + 1;
-    i__1 = *n;
-    for (j = 1; j <= i__1; ++j) {
-	if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	r_cnjg(&q__1, &v[5]);
+	v5.r = q__1.r, v5.i = q__1.i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	r_cnjg(&q__1, &v[6]);
+	v6.r = q__1.r, v6.i = q__1.i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	r_cnjg(&q__1, &v[7]);
+	v7.r = q__1.r, v7.i = q__1.i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	r_cnjg(&q__1, &v[8]);
+	v8.r = q__1.r, v8.i = q__1.i;
+	r_cnjg(&q__2, &v8);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t8.r = q__1.r, t8.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__8.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__9.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__7.r = q__8.r + q__9.r, q__7.i = q__8.i + q__9.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__10.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    q__6.r = q__7.r + q__10.r, q__6.i = q__7.i + q__10.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__11.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__5.r = q__6.r + q__11.r, q__5.i = q__6.i + q__11.i;
+	    i__6 = j * c_dim1 + 5;
+	    q__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__12.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__4.r = q__5.r + q__12.r, q__4.i = q__5.i + q__12.i;
+	    i__7 = j * c_dim1 + 6;
+	    q__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__13.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__3.r = q__4.r + q__13.r, q__3.i = q__4.i + q__13.i;
+	    i__8 = j * c_dim1 + 7;
+	    q__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__14.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__2.r = q__3.r + q__14.r, q__2.i = q__3.i + q__14.i;
+	    i__9 = j * c_dim1 + 8;
+	    q__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__15.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    q__1.r = q__2.r + q__15.r, q__1.i = q__2.i + q__15.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 7;
+	    i__3 = j * c_dim1 + 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 8;
+	    i__3 = j * c_dim1 + 8;
+	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L160: */
+	}
+	goto L410;
+L170:
 
-/*           Deflate due to small z component. */
+/*        Special code for 9 x 9 Householder */
 
-	    --k2;
-	    indxp[k2] = j;
-	    if (j == *n) {
-		goto L110;
-	    }
-	} else {
-	    jlam = j;
-	    goto L80;
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	r_cnjg(&q__1, &v[5]);
+	v5.r = q__1.r, v5.i = q__1.i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	r_cnjg(&q__1, &v[6]);
+	v6.r = q__1.r, v6.i = q__1.i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	r_cnjg(&q__1, &v[7]);
+	v7.r = q__1.r, v7.i = q__1.i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	r_cnjg(&q__1, &v[8]);
+	v8.r = q__1.r, v8.i = q__1.i;
+	r_cnjg(&q__2, &v8);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t8.r = q__1.r, t8.i = q__1.i;
+	r_cnjg(&q__1, &v[9]);
+	v9.r = q__1.r, v9.i = q__1.i;
+	r_cnjg(&q__2, &v9);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t9.r = q__1.r, t9.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__9.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__10.i = v2.r
+		    * c__[i__3].i + v2.i * c__[i__3].r;
+	    q__8.r = q__9.r + q__10.r, q__8.i = q__9.i + q__10.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__11.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    q__7.r = q__8.r + q__11.r, q__7.i = q__8.i + q__11.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__12.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__6.r = q__7.r + q__12.r, q__6.i = q__7.i + q__12.i;
+	    i__6 = j * c_dim1 + 5;
+	    q__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__13.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__5.r = q__6.r + q__13.r, q__5.i = q__6.i + q__13.i;
+	    i__7 = j * c_dim1 + 6;
+	    q__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__14.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__4.r = q__5.r + q__14.r, q__4.i = q__5.i + q__14.i;
+	    i__8 = j * c_dim1 + 7;
+	    q__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__15.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__3.r = q__4.r + q__15.r, q__3.i = q__4.i + q__15.i;
+	    i__9 = j * c_dim1 + 8;
+	    q__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__16.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    q__2.r = q__3.r + q__16.r, q__2.i = q__3.i + q__16.i;
+	    i__10 = j * c_dim1 + 9;
+	    q__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__17.i =
+		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
+	    q__1.r = q__2.r + q__17.r, q__1.i = q__2.i + q__17.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 7;
+	    i__3 = j * c_dim1 + 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 8;
+	    i__3 = j * c_dim1 + 8;
+	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 9;
+	    i__3 = j * c_dim1 + 9;
+	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
+		    sum.i * t9.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L180: */
 	}
-/* L70: */
-    }
-L80:
-    ++j;
-    if (j > *n) {
-	goto L100;
-    }
-    if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
+	goto L410;
+L190:
 
-/*        Deflate due to small z component. */
+/*        Special code for 10 x 10 Householder */
 
-	--k2;
-	indxp[k2] = j;
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	r_cnjg(&q__1, &v[5]);
+	v5.r = q__1.r, v5.i = q__1.i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	r_cnjg(&q__1, &v[6]);
+	v6.r = q__1.r, v6.i = q__1.i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	r_cnjg(&q__1, &v[7]);
+	v7.r = q__1.r, v7.i = q__1.i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	r_cnjg(&q__1, &v[8]);
+	v8.r = q__1.r, v8.i = q__1.i;
+	r_cnjg(&q__2, &v8);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t8.r = q__1.r, t8.i = q__1.i;
+	r_cnjg(&q__1, &v[9]);
+	v9.r = q__1.r, v9.i = q__1.i;
+	r_cnjg(&q__2, &v9);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t9.r = q__1.r, t9.i = q__1.i;
+	r_cnjg(&q__1, &v[10]);
+	v10.r = q__1.r, v10.i = q__1.i;
+	r_cnjg(&q__2, &v10);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t10.r = q__1.r, t10.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__10.i = v1.r
+		    * c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__11.i = v2.r
+		    * c__[i__3].i + v2.i * c__[i__3].r;
+	    q__9.r = q__10.r + q__11.r, q__9.i = q__10.i + q__11.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__12.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    q__8.r = q__9.r + q__12.r, q__8.i = q__9.i + q__12.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__13.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__7.r = q__8.r + q__13.r, q__7.i = q__8.i + q__13.i;
+	    i__6 = j * c_dim1 + 5;
+	    q__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__14.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__6.r = q__7.r + q__14.r, q__6.i = q__7.i + q__14.i;
+	    i__7 = j * c_dim1 + 6;
+	    q__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__15.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__5.r = q__6.r + q__15.r, q__5.i = q__6.i + q__15.i;
+	    i__8 = j * c_dim1 + 7;
+	    q__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__16.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__4.r = q__5.r + q__16.r, q__4.i = q__5.i + q__16.i;
+	    i__9 = j * c_dim1 + 8;
+	    q__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__17.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    q__3.r = q__4.r + q__17.r, q__3.i = q__4.i + q__17.i;
+	    i__10 = j * c_dim1 + 9;
+	    q__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__18.i =
+		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
+	    q__2.r = q__3.r + q__18.r, q__2.i = q__3.i + q__18.i;
+	    i__11 = j * c_dim1 + 10;
+	    q__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, q__19.i =
+		    v10.r * c__[i__11].i + v10.i * c__[i__11].r;
+	    q__1.r = q__2.r + q__19.r, q__1.i = q__2.i + q__19.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 7;
+	    i__3 = j * c_dim1 + 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 8;
+	    i__3 = j * c_dim1 + 8;
+	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 9;
+	    i__3 = j * c_dim1 + 9;
+	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
+		    sum.i * t9.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 10;
+	    i__3 = j * c_dim1 + 10;
+	    q__2.r = sum.r * t10.r - sum.i * t10.i, q__2.i = sum.r * t10.i +
+		    sum.i * t10.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L200: */
+	}
+	goto L410;
     } else {
 
-/*        Check if eigenvalues are close enough to allow deflation. */
+/*        Form  C * H, where H has order n. */
 
-	s = z__[jlam];
-	c__ = z__[j];
+	switch (*n) {
+	    case 1:  goto L210;
+	    case 2:  goto L230;
+	    case 3:  goto L250;
+	    case 4:  goto L270;
+	    case 5:  goto L290;
+	    case 6:  goto L310;
+	    case 7:  goto L330;
+	    case 8:  goto L350;
+	    case 9:  goto L370;
+	    case 10:  goto L390;
+	}
 
 /*
-          Find sqrt(a**2+b**2) without overflow or
-          destructive underflow.
+          Code for general N
+
+          w := C * v
 */
 
-	tau = dlapy2_(&c__, &s);
-	t = d__[j] - d__[jlam];
-	c__ /= tau;
-	s = -s / tau;
-	if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
+	cgemv_("No transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1], &
+		c__1, &c_b55, &work[1], &c__1);
 
-/*           Deflation is possible. */
+/*        C := C - tau * w * v' */
 
-	    z__[j] = tau;
-	    z__[jlam] = 0.;
+	q__1.r = -tau->r, q__1.i = -tau->i;
+	cgerc_(m, n, &q__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset],
+		ldc);
+	goto L410;
+L210:
 
-/*           Record the appropriate Givens rotation */
+/*        Special code for 1 x 1 Householder */
 
-	    ++(*givptr);
-	    givcol[(*givptr << 1) + 1] = indxq[indx[jlam]];
-	    givcol[(*givptr << 1) + 2] = indxq[indx[j]];
-	    givnum[(*givptr << 1) + 1] = c__;
-	    givnum[(*givptr << 1) + 2] = s;
-	    if (*icompq == 1) {
-		drot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[
-			indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
-	    }
-	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
-	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
-	    d__[jlam] = t;
-	    --k2;
-	    i__ = 1;
-L90:
-	    if (k2 + i__ <= *n) {
-		if (d__[jlam] < d__[indxp[k2 + i__]]) {
-		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
-		    indxp[k2 + i__] = jlam;
-		    ++i__;
-		    goto L90;
-		} else {
-		    indxp[k2 + i__ - 1] = jlam;
-		}
-	    } else {
-		indxp[k2 + i__ - 1] = jlam;
-	    }
-	    jlam = j;
-	} else {
-	    ++(*k);
-	    w[*k] = z__[jlam];
-	    dlamda[*k] = d__[jlam];
-	    indxp[*k] = jlam;
-	    jlam = j;
+	q__3.r = tau->r * v[1].r - tau->i * v[1].i, q__3.i = tau->r * v[1].i
+		+ tau->i * v[1].r;
+	r_cnjg(&q__4, &v[1]);
+	q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i
+		+ q__3.i * q__4.r;
+	q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i;
+	t1.r = q__1.r, t1.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, q__1.i = t1.r *
+		    c__[i__3].i + t1.i * c__[i__3].r;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L220: */
 	}
-    }
-    goto L80;
-L100:
+	goto L410;
+L230:
 
-/*     Record the last eigenvalue. */
+/*        Special code for 2 x 2 Householder */
 
-    ++(*k);
-    w[*k] = z__[jlam];
-    dlamda[*k] = d__[jlam];
-    indxp[*k] = jlam;
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__2.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + (c_dim1 << 1);
+	    q__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__3.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L240: */
+	}
+	goto L410;
+L250:
 
-L110:
+/*        Special code for 3 x 3 Householder */
 
-/*
-       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
-       and Q2 respectively.  The eigenvalues/vectors which were not
-       deflated go into the first K slots of DLAMDA and Q2 respectively,
-       while those which were deflated go into the last N - K slots.
-*/
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__3.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + (c_dim1 << 1);
+	    q__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__4.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__5.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L260: */
+	}
+	goto L410;
+L270:
 
-    if (*icompq == 0) {
-	i__1 = *n;
+/*        Special code for 4 x 4 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
-	    jp = indxp[j];
-	    dlamda[j] = d__[jp];
-	    perm[j] = indxq[indx[jp]];
-/* L120: */
+	    i__2 = j + c_dim1;
+	    q__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__4.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + (c_dim1 << 1);
+	    q__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__5.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__3.r = q__4.r + q__5.r, q__3.i = q__4.i + q__5.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__6.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__2.r = q__3.r + q__6.r, q__2.i = q__3.i + q__6.i;
+	    i__5 = j + (c_dim1 << 2);
+	    q__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__7.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    q__1.r = q__2.r + q__7.r, q__1.i = q__2.i + q__7.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L280: */
 	}
-    } else {
-	i__1 = *n;
+	goto L410;
+L290:
+
+/*        Special code for 5 x 5 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
-	    jp = indxp[j];
-	    dlamda[j] = d__[jp];
-	    perm[j] = indxq[indx[jp]];
-	    dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
-		    , &c__1);
-/* L130: */
+	    i__2 = j + c_dim1;
+	    q__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__5.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + (c_dim1 << 1);
+	    q__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__6.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__4.r = q__5.r + q__6.r, q__4.i = q__5.i + q__6.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__7.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i;
+	    i__5 = j + (c_dim1 << 2);
+	    q__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__8.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    q__2.r = q__3.r + q__8.r, q__2.i = q__3.i + q__8.i;
+	    i__6 = j + c_dim1 * 5;
+	    q__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__9.i = v5.r *
+		    c__[i__6].i + v5.i * c__[i__6].r;
+	    q__1.r = q__2.r + q__9.r, q__1.i = q__2.i + q__9.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L300: */
 	}
-    }
+	goto L410;
+L310:
 
-/*
-       The deflated eigenvalues and their corresponding vectors go back
-       into the last N - K slots of D and Q respectively.
-*/
+/*        Special code for 6 x 6 Householder */
 
-    if (*k < *n) {
-	if (*icompq == 0) {
-	    i__1 = *n - *k;
-	    dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
-	} else {
-	    i__1 = *n - *k;
-	    dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
-	    i__1 = *n - *k;
-	    dlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*
-		    k + 1) * q_dim1 + 1], ldq);
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__6.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + (c_dim1 << 1);
+	    q__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__7.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__5.r = q__6.r + q__7.r, q__5.i = q__6.i + q__7.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__8.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__4.r = q__5.r + q__8.r, q__4.i = q__5.i + q__8.i;
+	    i__5 = j + (c_dim1 << 2);
+	    q__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__9.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    q__3.r = q__4.r + q__9.r, q__3.i = q__4.i + q__9.i;
+	    i__6 = j + c_dim1 * 5;
+	    q__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__10.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__2.r = q__3.r + q__10.r, q__2.i = q__3.i + q__10.i;
+	    i__7 = j + c_dim1 * 6;
+	    q__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__11.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__1.r = q__2.r + q__11.r, q__1.i = q__2.i + q__11.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L320: */
 	}
-    }
+	goto L410;
+L330:
+
+/*        Special code for 7 x 7 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	v7.r = v[7].r, v7.i = v[7].i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__7.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + (c_dim1 << 1);
+	    q__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__8.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__6.r = q__7.r + q__8.r, q__6.i = q__7.i + q__8.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__9.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__5.r = q__6.r + q__9.r, q__5.i = q__6.i + q__9.i;
+	    i__5 = j + (c_dim1 << 2);
+	    q__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__10.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__4.r = q__5.r + q__10.r, q__4.i = q__5.i + q__10.i;
+	    i__6 = j + c_dim1 * 5;
+	    q__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__11.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__3.r = q__4.r + q__11.r, q__3.i = q__4.i + q__11.i;
+	    i__7 = j + c_dim1 * 6;
+	    q__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__12.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__2.r = q__3.r + q__12.r, q__2.i = q__3.i + q__12.i;
+	    i__8 = j + c_dim1 * 7;
+	    q__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__13.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__1.r = q__2.r + q__13.r, q__1.i = q__2.i + q__13.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 7;
+	    i__3 = j + c_dim1 * 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L340: */
+	}
+	goto L410;
+L350:
+
+/*        Special code for 8 x 8 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	v7.r = v[7].r, v7.i = v[7].i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	v8.r = v[8].r, v8.i = v[8].i;
+	r_cnjg(&q__2, &v8);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t8.r = q__1.r, t8.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__8.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + (c_dim1 << 1);
+	    q__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__9.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__7.r = q__8.r + q__9.r, q__7.i = q__8.i + q__9.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__10.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    q__6.r = q__7.r + q__10.r, q__6.i = q__7.i + q__10.i;
+	    i__5 = j + (c_dim1 << 2);
+	    q__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__11.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__5.r = q__6.r + q__11.r, q__5.i = q__6.i + q__11.i;
+	    i__6 = j + c_dim1 * 5;
+	    q__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__12.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__4.r = q__5.r + q__12.r, q__4.i = q__5.i + q__12.i;
+	    i__7 = j + c_dim1 * 6;
+	    q__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__13.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__3.r = q__4.r + q__13.r, q__3.i = q__4.i + q__13.i;
+	    i__8 = j + c_dim1 * 7;
+	    q__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__14.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__2.r = q__3.r + q__14.r, q__2.i = q__3.i + q__14.i;
+	    i__9 = j + (c_dim1 << 3);
+	    q__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__15.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    q__1.r = q__2.r + q__15.r, q__1.i = q__2.i + q__15.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 7;
+	    i__3 = j + c_dim1 * 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 3);
+	    i__3 = j + (c_dim1 << 3);
+	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L360: */
+	}
+	goto L410;
+L370:
+
+/*        Special code for 9 x 9 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	v7.r = v[7].r, v7.i = v[7].i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	v8.r = v[8].r, v8.i = v[8].i;
+	r_cnjg(&q__2, &v8);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t8.r = q__1.r, t8.i = q__1.i;
+	v9.r = v[9].r, v9.i = v[9].i;
+	r_cnjg(&q__2, &v9);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t9.r = q__1.r, t9.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__9.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + (c_dim1 << 1);
+	    q__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__10.i = v2.r
+		    * c__[i__3].i + v2.i * c__[i__3].r;
+	    q__8.r = q__9.r + q__10.r, q__8.i = q__9.i + q__10.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__11.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    q__7.r = q__8.r + q__11.r, q__7.i = q__8.i + q__11.i;
+	    i__5 = j + (c_dim1 << 2);
+	    q__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__12.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__6.r = q__7.r + q__12.r, q__6.i = q__7.i + q__12.i;
+	    i__6 = j + c_dim1 * 5;
+	    q__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__13.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__5.r = q__6.r + q__13.r, q__5.i = q__6.i + q__13.i;
+	    i__7 = j + c_dim1 * 6;
+	    q__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__14.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__4.r = q__5.r + q__14.r, q__4.i = q__5.i + q__14.i;
+	    i__8 = j + c_dim1 * 7;
+	    q__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__15.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__3.r = q__4.r + q__15.r, q__3.i = q__4.i + q__15.i;
+	    i__9 = j + (c_dim1 << 3);
+	    q__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__16.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    q__2.r = q__3.r + q__16.r, q__2.i = q__3.i + q__16.i;
+	    i__10 = j + c_dim1 * 9;
+	    q__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__17.i =
+		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
+	    q__1.r = q__2.r + q__17.r, q__1.i = q__2.i + q__17.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 7;
+	    i__3 = j + c_dim1 * 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 3);
+	    i__3 = j + (c_dim1 << 3);
+	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 9;
+	    i__3 = j + c_dim1 * 9;
+	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
+		    sum.i * t9.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L380: */
+	}
+	goto L410;
+L390:
 
+/*        Special code for 10 x 10 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	v7.r = v[7].r, v7.i = v[7].i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	v8.r = v[8].r, v8.i = v[8].i;
+	r_cnjg(&q__2, &v8);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t8.r = q__1.r, t8.i = q__1.i;
+	v9.r = v[9].r, v9.i = v[9].i;
+	r_cnjg(&q__2, &v9);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t9.r = q__1.r, t9.i = q__1.i;
+	v10.r = v[10].r, v10.i = v[10].i;
+	r_cnjg(&q__2, &v10);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t10.r = q__1.r, t10.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__10.i = v1.r
+		    * c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + (c_dim1 << 1);
+	    q__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__11.i = v2.r
+		    * c__[i__3].i + v2.i * c__[i__3].r;
+	    q__9.r = q__10.r + q__11.r, q__9.i = q__10.i + q__11.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__12.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    q__8.r = q__9.r + q__12.r, q__8.i = q__9.i + q__12.i;
+	    i__5 = j + (c_dim1 << 2);
+	    q__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__13.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__7.r = q__8.r + q__13.r, q__7.i = q__8.i + q__13.i;
+	    i__6 = j + c_dim1 * 5;
+	    q__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__14.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__6.r = q__7.r + q__14.r, q__6.i = q__7.i + q__14.i;
+	    i__7 = j + c_dim1 * 6;
+	    q__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__15.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__5.r = q__6.r + q__15.r, q__5.i = q__6.i + q__15.i;
+	    i__8 = j + c_dim1 * 7;
+	    q__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__16.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__4.r = q__5.r + q__16.r, q__4.i = q__5.i + q__16.i;
+	    i__9 = j + (c_dim1 << 3);
+	    q__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__17.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    q__3.r = q__4.r + q__17.r, q__3.i = q__4.i + q__17.i;
+	    i__10 = j + c_dim1 * 9;
+	    q__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__18.i =
+		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
+	    q__2.r = q__3.r + q__18.r, q__2.i = q__3.i + q__18.i;
+	    i__11 = j + c_dim1 * 10;
+	    q__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, q__19.i =
+		    v10.r * c__[i__11].i + v10.i * c__[i__11].r;
+	    q__1.r = q__2.r + q__19.r, q__1.i = q__2.i + q__19.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 1);
+	    i__3 = j + (c_dim1 << 1);
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 2);
+	    i__3 = j + (c_dim1 << 2);
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 7;
+	    i__3 = j + c_dim1 * 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + (c_dim1 << 3);
+	    i__3 = j + (c_dim1 << 3);
+	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 9;
+	    i__3 = j + c_dim1 * 9;
+	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
+		    sum.i * t9.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 10;
+	    i__3 = j + c_dim1 * 10;
+	    q__2.r = sum.r * t10.r - sum.i * t10.i, q__2.i = sum.r * t10.i +
+		    sum.i * t10.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L400: */
+	}
+	goto L410;
+    }
+L410:
     return 0;
 
-/*     End of DLAED8 */
+/*     End of CLARFX */
 
-} /* dlaed8_ */
+} /* clarfx_ */
 
-/* Subroutine */ int dlaed9_(integer *k, integer *kstart, integer *kstop,
-	integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal *
-	rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds,
+/* Subroutine */ int clascl_(char *type__, integer *kl, integer *ku, real *
+	cfrom, real *cto, integer *m, integer *n, complex *a, integer *lda,
 	integer *info)
 {
     /* System generated locals */
-    integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1;
 
     /* Local variables */
-    static integer i__, j;
-    static doublereal temp;
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlaed4_(integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, integer *);
-    extern doublereal dlamc3_(doublereal *, doublereal *);
+    static integer i__, j, k1, k2, k3, k4;
+    static real mul, cto1;
+    static logical done;
+    static real ctoc;
+    extern logical lsame_(char *, char *);
+    static integer itype;
+    static real cfrom1;
+    extern doublereal slamch_(char *);
+    static real cfromc;
     extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real bignum, smlnum;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
 
 
     Purpose
     =======
 
-    DLAED9 finds the roots of the secular equation, as defined by the
-    values in D, Z, and RHO, between KSTART and KSTOP.  It makes the
-    appropriate calls to DLAED4 and then stores the new matrix of
-    eigenvectors for use in calculating the next level of Z vectors.
+    CLASCL multiplies the M by N complex matrix A by the real scalar
+    CTO/CFROM.  This is done without over/underflow as long as the final
+    result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
+    A may be full, upper triangular, lower triangular, upper Hessenberg,
+    or banded.
 
     Arguments
     =========
 
-    K       (input) INTEGER
-            The number of terms in the rational function to be solved by
-            DLAED4.  K >= 0.
-
-    KSTART  (input) INTEGER
-    KSTOP   (input) INTEGER
-            The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP
-            are to be computed.  1 <= KSTART <= KSTOP <= K.
-
-    N       (input) INTEGER
-            The number of rows and columns in the Q matrix.
-            N >= K (delation may result in N > K).
-
-    D       (output) DOUBLE PRECISION array, dimension (N)
-            D(I) contains the updated eigenvalues
-            for KSTART <= I <= KSTOP.
+    TYPE    (input) CHARACTER*1
+            TYPE indices the storage type of the input matrix.
+            = 'G':  A is a full matrix.
+            = 'L':  A is a lower triangular matrix.
+            = 'U':  A is an upper triangular matrix.
+            = 'H':  A is an upper Hessenberg matrix.
+            = 'B':  A is a symmetric band matrix with lower bandwidth KL
+                    and upper bandwidth KU and with the only the lower
+                    half stored.
+            = 'Q':  A is a symmetric band matrix with lower bandwidth KL
+                    and upper bandwidth KU and with the only the upper
+                    half stored.
+            = 'Z':  A is a band matrix with lower bandwidth KL and upper
+                    bandwidth KU.
 
-    Q       (workspace) DOUBLE PRECISION array, dimension (LDQ,N)
+    KL      (input) INTEGER
+            The lower bandwidth of A.  Referenced only if TYPE = 'B',
+            'Q' or 'Z'.
 
-    LDQ     (input) INTEGER
-            The leading dimension of the array Q.  LDQ >= max( 1, N ).
+    KU      (input) INTEGER
+            The upper bandwidth of A.  Referenced only if TYPE = 'B',
+            'Q' or 'Z'.
 
-    RHO     (input) DOUBLE PRECISION
-            The value of the parameter in the rank one update equation.
-            RHO >= 0 required.
+    CFROM   (input) REAL
+    CTO     (input) REAL
+            The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
+            without over/underflow if the final result CTO*A(I,J)/CFROM
+            can be represented without over/underflow.  CFROM must be
+            nonzero.
 
-    DLAMDA  (input) DOUBLE PRECISION array, dimension (K)
-            The first K elements of this array contain the old roots
-            of the deflated updating problem.  These are the poles
-            of the secular equation.
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
 
-    W       (input) DOUBLE PRECISION array, dimension (K)
-            The first K elements of this array contain the components
-            of the deflation-adjusted updating vector.
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
 
-    S       (output) DOUBLE PRECISION array, dimension (LDS, K)
-            Will contain the eigenvectors of the repaired matrix which
-            will be stored for subsequent Z vector calculation and
-            multiplied by the previously accumulated eigenvectors
-            to update the system.
+    A       (input/output) COMPLEX array, dimension (LDA,M)
+            The matrix to be multiplied by CTO/CFROM.  See TYPE for the
+            storage type.
 
-    LDS     (input) INTEGER
-            The leading dimension of S.  LDS >= max( 1, K ).
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
 
     INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an eigenvalue did not converge
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
+            0  - successful exit
+            <0 - if INFO = -i, the i-th argument had an illegal value.
 
     =====================================================================
 
 
-       Test the input parameters.
+       Test the input arguments
 */
 
     /* Parameter adjustments */
-    --d__;
-    q_dim1 = *ldq;
-    q_offset = 1 + q_dim1;
-    q -= q_offset;
-    --dlamda;
-    --w;
-    s_dim1 = *lds;
-    s_offset = 1 + s_dim1;
-    s -= s_offset;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
     /* Function Body */
     *info = 0;
 
-    if (*k < 0) {
+    if (lsame_(type__, "G")) {
+	itype = 0;
+    } else if (lsame_(type__, "L")) {
+	itype = 1;
+    } else if (lsame_(type__, "U")) {
+	itype = 2;
+    } else if (lsame_(type__, "H")) {
+	itype = 3;
+    } else if (lsame_(type__, "B")) {
+	itype = 4;
+    } else if (lsame_(type__, "Q")) {
+	itype = 5;
+    } else if (lsame_(type__, "Z")) {
+	itype = 6;
+    } else {
+	itype = -1;
+    }
+
+    if (itype == -1) {
 	*info = -1;
-    } else if (*kstart < 1 || *kstart > max(1,*k)) {
-	*info = -2;
-    } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) {
-	*info = -3;
-    } else if (*n < *k) {
+    } else if (*cfrom == 0.f) {
 	*info = -4;
-    } else if (*ldq < max(1,*k)) {
+    } else if (*m < 0) {
+	*info = -6;
+    } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) {
 	*info = -7;
-    } else if (*lds < max(1,*k)) {
-	*info = -12;
+    } else if (itype <= 3 && *lda < max(1,*m)) {
+	*info = -9;
+    } else if (itype >= 4) {
+/* Computing MAX */
+	i__1 = *m - 1;
+	if (*kl < 0 || *kl > max(i__1,0)) {
+	    *info = -2;
+	} else /* if(complicated condition) */ {
+/* Computing MAX */
+	    i__1 = *n - 1;
+	    if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) &&
+		    *kl != *ku) {
+		*info = -3;
+	    } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < *
+		    ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) {
+		*info = -9;
+	    }
+	}
     }
+
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLAED9", &i__1);
+	xerbla_("CLASCL", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
-    if (*k == 0) {
+    if (*n == 0 || *m == 0) {
 	return 0;
     }
 
-/*
-       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
-       be computed with high relative accuracy (barring over/underflow).
-       This is a problem on machines without a guard digit in
-       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
-       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
-       which on any of these machines zeros out the bottommost
-       bit of DLAMDA(I) if it is 1; this makes the subsequent
-       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
-       occurs. On binary machines with a guard digit (almost all
-       machines) it does not change DLAMDA(I) at all. On hexadecimal
-       and decimal machines with a guard digit, it slightly
-       changes the bottommost bits of DLAMDA(I). It does not account
-       for hexadecimal or decimal machines without guard digits
-       (we know of none). We use a subroutine call to compute
-       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
-       this code.
-*/
+/*     Get machine parameters */
 
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
-/* L10: */
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+
+    cfromc = *cfrom;
+    ctoc = *cto;
+
+L10:
+    cfrom1 = cfromc * smlnum;
+    cto1 = ctoc / bignum;
+    if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) {
+	mul = smlnum;
+	done = FALSE_;
+	cfromc = cfrom1;
+    } else if (dabs(cto1) > dabs(cfromc)) {
+	mul = bignum;
+	done = FALSE_;
+	ctoc = cto1;
+    } else {
+	mul = ctoc / cfromc;
+	done = TRUE_;
     }
 
-    i__1 = *kstop;
-    for (j = *kstart; j <= i__1; ++j) {
-	dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
-		info);
+    if (itype == 0) {
 
-/*        If the zero finder fails, the computation is terminated. */
+/*        Full matrix */
 
-	if (*info != 0) {
-	    goto L120;
-	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
 /* L20: */
-    }
-
-    if (*k == 1 || *k == 2) {
-	i__1 = *k;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    i__2 = *k;
-	    for (j = 1; j <= i__2; ++j) {
-		s[j + i__ * s_dim1] = q[j + i__ * q_dim1];
-/* L30: */
 	    }
-/* L40: */
+/* L30: */
 	}
-	goto L120;
-    }
-
-/*     Compute updated W. */
 
-    dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1);
+    } else if (itype == 1) {
 
-/*     Initialize W(I) = Q(I,I) */
+/*        Lower triangular matrix */
 
-    i__1 = *ldq + 1;
-    dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = j - 1;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L40: */
+	    }
 /* L50: */
 	}
-	i__2 = *k;
-	for (i__ = j + 1; i__ <= i__2; ++i__) {
-	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+
+    } else if (itype == 2) {
+
+/*        Upper triangular matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
 /* L60: */
-	}
+	    }
 /* L70: */
-    }
-    i__1 = *k;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	d__1 = sqrt(-w[i__]);
-	w[i__] = d_sign(&d__1, &s[i__ + s_dim1]);
-/* L80: */
-    }
+	}
 
-/*     Compute eigenvectors of the modified rank-1 modification. */
+    } else if (itype == 3) {
 
-    i__1 = *k;
-    for (j = 1; j <= i__1; ++j) {
-	i__2 = *k;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1];
+/*        Upper Hessenberg matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = j + 1;
+	    i__2 = min(i__3,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L80: */
+	    }
 /* L90: */
 	}
-	temp = dnrm2_(k, &q[j * q_dim1 + 1], &c__1);
-	i__2 = *k;
-	for (i__ = 1; i__ <= i__2; ++i__) {
-	    s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp;
+
+    } else if (itype == 4) {
+
+/*        Lower half of a symmetric band matrix */
+
+	k3 = *kl + 1;
+	k4 = *n + 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = k3, i__4 = k4 - j;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
 /* L100: */
-	}
+	    }
 /* L110: */
+	}
+
+    } else if (itype == 5) {
+
+/*        Upper half of a symmetric band matrix */
+
+	k1 = *ku + 2;
+	k3 = *ku + 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__2 = k1 - j;
+	    i__3 = k3;
+	    for (i__ = max(i__2,1); i__ <= i__3; ++i__) {
+		i__2 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L120: */
+	    }
+/* L130: */
+	}
+
+    } else if (itype == 6) {
+
+/*        Band matrix */
+
+	k1 = *kl + *ku + 2;
+	k2 = *kl + 1;
+	k3 = (*kl << 1) + *ku + 1;
+	k4 = *kl + *ku + 1 + *m;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__3 = k1 - j;
+/* Computing MIN */
+	    i__4 = k3, i__5 = k4 - j;
+	    i__2 = min(i__4,i__5);
+	    for (i__ = max(i__3,k2); i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L140: */
+	    }
+/* L150: */
+	}
+
+    }
+
+    if (! done) {
+	goto L10;
     }
 
-L120:
     return 0;
 
-/*     End of DLAED9 */
+/*     End of CLASCL */
 
-} /* dlaed9_ */
+} /* clascl_ */
 
-/* Subroutine */ int dlaeda_(integer *n, integer *tlvls, integer *curlvl,
-	integer *curpbm, integer *prmptr, integer *perm, integer *givptr,
-	integer *givcol, doublereal *givnum, doublereal *q, integer *qptr,
-	doublereal *z__, doublereal *ztemp, integer *info)
+/* Subroutine */ int claset_(char *uplo, integer *m, integer *n, complex *
+	alpha, complex *beta, complex *a, integer *lda)
 {
     /* System generated locals */
-    integer i__1, i__2, i__3;
-
-    /* Builtin functions */
-    integer pow_ii(integer *, integer *);
-    double sqrt(doublereal);
+    integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
-    static integer i__, k, mid, ptr;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static integer curr, bsiz1, bsiz2, psiz1, psiz2, zptr1;
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *), dcopy_(integer *,
-	    doublereal *, integer *, doublereal *, integer *), xerbla_(char *,
-	     integer *);
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
+    -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+       October 31, 1992
 
 
     Purpose
     =======
 
-    DLAEDA computes the Z vector corresponding to the merge step in the
-    CURLVLth step of the merge process with TLVLS steps for the CURPBMth
-    problem.
+    CLASET initializes a 2-D array A to BETA on the diagonal and
+    ALPHA on the offdiagonals.
 
     Arguments
     =========
 
-    N      (input) INTEGER
-           The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    TLVLS  (input) INTEGER
-           The total number of merging levels in the overall divide and
-           conquer tree.
-
-    CURLVL (input) INTEGER
-           The current level in the overall merge routine,
-           0 <= curlvl <= tlvls.
-
-    CURPBM (input) INTEGER
-           The current problem in the current level in the overall
-           merge routine (counting from upper left to lower right).
-
-    PRMPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in PERM a
-           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
-           indicates the size of the permutation and incidentally the
-           size of the full, non-deflated problem.
-
-    PERM   (input) INTEGER array, dimension (N lg N)
-           Contains the permutations (from deflation and sorting) to be
-           applied to each eigenblock.
-
-    GIVPTR (input) INTEGER array, dimension (N lg N)
-           Contains a list of pointers which indicate where in GIVCOL a
-           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
-           indicates the number of Givens rotations.
-
-    GIVCOL (input) INTEGER array, dimension (2, N lg N)
-           Each pair of numbers indicates a pair of columns to take place
-           in a Givens rotation.
-
-    GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
-           Each number indicates the S value to be used in the
-           corresponding Givens rotation.
-
-    Q      (input) DOUBLE PRECISION array, dimension (N**2)
-           Contains the square eigenblocks from previous levels, the
-           starting positions for blocks are given by QPTR.
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be set.
+            = 'U':      Upper triangular part is set. The lower triangle
+                        is unchanged.
+            = 'L':      Lower triangular part is set. The upper triangle
+                        is unchanged.
+            Otherwise:  All of the matrix A is set.
 
-    QPTR   (input) INTEGER array, dimension (N+2)
-           Contains a list of pointers which indicate where in Q an
-           eigenblock is stored.  SQRT( QPTR(i+1) - QPTR(i) ) indicates
-           the size of the block.
+    M       (input) INTEGER
+            On entry, M specifies the number of rows of A.
 
-    Z      (output) DOUBLE PRECISION array, dimension (N)
-           On output this vector contains the updating vector (the last
-           row of the first sub-eigenvector matrix and the first row of
-           the second sub-eigenvector matrix).
+    N       (input) INTEGER
+            On entry, N specifies the number of columns of A.
 
-    ZTEMP  (workspace) DOUBLE PRECISION array, dimension (N)
+    ALPHA   (input) COMPLEX
+            All the offdiagonal array elements are set to ALPHA.
 
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+    BETA    (input) COMPLEX
+            All the diagonal array elements are set to BETA.
 
-    Further Details
-    ===============
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j;
+                     A(i,i) = BETA , 1 <= i <= min(m,n)
 
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
 
     =====================================================================
-
-
-       Test the input parameters.
 */
 
+
     /* Parameter adjustments */
-    --ztemp;
-    --z__;
-    --qptr;
-    --q;
-    givnum -= 3;
-    givcol -= 3;
-    --givptr;
-    --perm;
-    --prmptr;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
     /* Function Body */
-    *info = 0;
-
-    if (*n < 0) {
-	*info = -1;
-    }
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("DLAEDA", &i__1);
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-
-/*     Determine location of first number in second half. */
-
-    mid = *n / 2 + 1;
-
-/*     Gather last/first rows of appropriate eigenblocks into center of Z */
-
-    ptr = 1;
-
-/*
-       Determine location of lowest level subproblem in the full storage
-       scheme
-*/
-
-    i__1 = *curlvl - 1;
-    curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
+    if (lsame_(uplo, "U")) {
 
 /*
-       Determine size of these matrices.  We add HALF to the value of
-       the SQRT in case the machine underestimates one of these square
-       roots.
+          Set the diagonal to BETA and the strictly upper triangular
+          part of the array to ALPHA.
 */
 
-    bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5);
-    bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])) +
-	    .5);
-    i__1 = mid - bsiz1 - 1;
-    for (k = 1; k <= i__1; ++k) {
-	z__[k] = 0.;
+	i__1 = *n;
+	for (j = 2; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = j - 1;
+	    i__2 = min(i__3,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = alpha->r, a[i__3].i = alpha->i;
 /* L10: */
-    }
-    dcopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], &
-	    c__1);
-    dcopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
-    i__1 = *n;
-    for (k = mid + bsiz2; k <= i__1; ++k) {
-	z__[k] = 0.;
+	    }
 /* L20: */
-    }
+	}
+	i__1 = min(*n,*m);
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = beta->r, a[i__2].i = beta->i;
+/* L30: */
+	}
+
+    } else if (lsame_(uplo, "L")) {
 
 /*
-       Loop thru remaining levels 1 -> CURLVL applying the Givens
-       rotations and permutation and then multiplying the center matrices
-       against the current Z.
+          Set the diagonal to BETA and the strictly lower triangular
+          part of the array to ALPHA.
 */
 
-    ptr = pow_ii(&c__2, tlvls) + 1;
-    i__1 = *curlvl - 1;
-    for (k = 1; k <= i__1; ++k) {
-	i__2 = *curlvl - k;
-	i__3 = *curlvl - k - 1;
-	curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
-		1;
-	psiz1 = prmptr[curr + 1] - prmptr[curr];
-	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
-	zptr1 = mid - psiz1;
-
-/*       Apply Givens at CURR and CURR+1 */
-
-	i__2 = givptr[curr + 1] - 1;
-	for (i__ = givptr[curr]; i__ <= i__2; ++i__) {
-	    drot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, &
-		    z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[(
-		    i__ << 1) + 1], &givnum[(i__ << 1) + 2]);
-/* L30: */
-	}
-	i__2 = givptr[curr + 2] - 1;
-	for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) {
-	    drot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[
-		    mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ <<
-		    1) + 1], &givnum[(i__ << 1) + 2]);
+	i__1 = min(*m,*n);
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j + 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = alpha->r, a[i__3].i = alpha->i;
 /* L40: */
-	}
-	psiz1 = prmptr[curr + 1] - prmptr[curr];
-	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
-	i__2 = psiz1 - 1;
-	for (i__ = 0; i__ <= i__2; ++i__) {
-	    ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
+	    }
 /* L50: */
 	}
-	i__2 = psiz2 - 1;
-	for (i__ = 0; i__ <= i__2; ++i__) {
-	    ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] -
-		    1];
+	i__1 = min(*n,*m);
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = beta->r, a[i__2].i = beta->i;
 /* L60: */
 	}
 
-/*
-          Multiply Blocks at CURR and CURR+1
+    } else {
 
-          Determine size of these matrices.  We add HALF to the value of
-          the SQRT in case the machine underestimates one of these
-          square roots.
+/*
+          Set the array to BETA on the diagonal and ALPHA on the
+          offdiagonal.
 */
 
-	bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) +
-		.5);
-	bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])
-		) + .5);
-	if (bsiz1 > 0) {
-	    dgemv_("T", &bsiz1, &bsiz1, &c_b15, &q[qptr[curr]], &bsiz1, &
-		    ztemp[1], &c__1, &c_b29, &z__[zptr1], &c__1);
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = alpha->r, a[i__3].i = alpha->i;
+/* L70: */
+	    }
+/* L80: */
 	}
-	i__2 = psiz1 - bsiz1;
-	dcopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
-	if (bsiz2 > 0) {
-	    dgemv_("T", &bsiz2, &bsiz2, &c_b15, &q[qptr[curr + 1]], &bsiz2, &
-		    ztemp[psiz1 + 1], &c__1, &c_b29, &z__[mid], &c__1);
+	i__1 = min(*m,*n);
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = beta->r, a[i__2].i = beta->i;
+/* L90: */
 	}
-	i__2 = psiz2 - bsiz2;
-	dcopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], &
-		c__1);
-
-	i__2 = *tlvls - k;
-	ptr += pow_ii(&c__2, &i__2);
-/* L70: */
     }
 
     return 0;
 
-/*     End of DLAEDA */
+/*     End of CLASET */
 
-} /* dlaeda_ */
+} /* claset_ */
 
-/* Subroutine */ int dlaev2_(doublereal *a, doublereal *b, doublereal *c__,
-	doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1)
+/* Subroutine */ int clasr_(char *side, char *pivot, char *direct, integer *m,
+	 integer *n, real *c__, real *s, complex *a, integer *lda)
 {
     /* System generated locals */
-    doublereal d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    complex q__1, q__2, q__3;
 
     /* Local variables */
-    static doublereal ab, df, cs, ct, tb, sm, tn, rt, adf, acs;
-    static integer sgn1, sgn2;
-    static doublereal acmn, acmx;
+    static integer i__, j, info;
+    static complex temp;
+    extern logical lsame_(char *, char *);
+    static real ctemp, stemp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
 
 
 /*
@@ -12926,193 +15351,538 @@ L120:
     Purpose
     =======
 
-    DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
-       [  A   B  ]
-       [  B   C  ].
-    On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
-    eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
-    eigenvector for RT1, giving the decomposition
+    CLASR   performs the transformation
 
-       [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
-       [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
+       A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
 
-    Arguments
-    =========
+       A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
 
-    A       (input) DOUBLE PRECISION
-            The (1,1) element of the 2-by-2 matrix.
+    where A is an m by n complex matrix and P is an orthogonal matrix,
+    consisting of a sequence of plane rotations determined by the
+    parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
+    and z = n when SIDE = 'R' or 'r' ):
 
-    B       (input) DOUBLE PRECISION
-            The (1,2) element and the conjugate of the (2,1) element of
-            the 2-by-2 matrix.
+    When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
 
-    C       (input) DOUBLE PRECISION
-            The (2,2) element of the 2-by-2 matrix.
+       P = P( z - 1 )*...*P( 2 )*P( 1 ),
 
-    RT1     (output) DOUBLE PRECISION
-            The eigenvalue of larger absolute value.
+    and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
 
-    RT2     (output) DOUBLE PRECISION
-            The eigenvalue of smaller absolute value.
+       P = P( 1 )*P( 2 )*...*P( z - 1 ),
 
-    CS1     (output) DOUBLE PRECISION
-    SN1     (output) DOUBLE PRECISION
-            The vector (CS1, SN1) is a unit right eigenvector for RT1.
+    where  P( k ) is a plane rotation matrix for the following planes:
 
-    Further Details
-    ===============
+       when  PIVOT = 'V' or 'v'  ( Variable pivot ),
+          the plane ( k, k + 1 )
 
-    RT1 is accurate to a few ulps barring over/underflow.
+       when  PIVOT = 'T' or 't'  ( Top pivot ),
+          the plane ( 1, k + 1 )
 
-    RT2 may be inaccurate if there is massive cancellation in the
-    determinant A*C-B*B; higher precision or correctly rounded or
-    correctly truncated arithmetic would be needed to compute RT2
-    accurately in all cases.
+       when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
+          the plane ( k, z )
 
-    CS1 and SN1 are accurate to a few ulps barring over/underflow.
+    c( k ) and s( k )  must contain the  cosine and sine that define the
+    matrix  P( k ).  The two by two plane rotation part of the matrix
+    P( k ), R( k ), is assumed to be of the form
 
-    Overflow is possible only if RT1 is within a factor of 5 of overflow.
-    Underflow is harmless if the input data is 0 or exceeds
-       underflow_threshold / macheps.
+       R( k ) = (  c( k )  s( k ) ).
+                ( -s( k )  c( k ) )
 
-   =====================================================================
+    Arguments
+    =========
 
+    SIDE    (input) CHARACTER*1
+            Specifies whether the plane rotation matrix P is applied to
+            A on the left or the right.
+            = 'L':  Left, compute A := P*A
+            = 'R':  Right, compute A:= A*P'
 
-       Compute the eigenvalues
-*/
+    DIRECT  (input) CHARACTER*1
+            Specifies whether P is a forward or backward sequence of
+            plane rotations.
+            = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
+            = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
 
-    sm = *a + *c__;
-    df = *a - *c__;
-    adf = abs(df);
-    tb = *b + *b;
-    ab = abs(tb);
-    if (abs(*a) > abs(*c__)) {
-	acmx = *a;
-	acmn = *c__;
-    } else {
-	acmx = *c__;
-	acmn = *a;
-    }
-    if (adf > ab) {
-/* Computing 2nd power */
-	d__1 = ab / adf;
-	rt = adf * sqrt(d__1 * d__1 + 1.);
-    } else if (adf < ab) {
-/* Computing 2nd power */
-	d__1 = adf / ab;
-	rt = ab * sqrt(d__1 * d__1 + 1.);
-    } else {
+    PIVOT   (input) CHARACTER*1
+            Specifies the plane for which P(k) is a plane rotation
+            matrix.
+            = 'V':  Variable pivot, the plane (k,k+1)
+            = 'T':  Top pivot, the plane (1,k+1)
+            = 'B':  Bottom pivot, the plane (k,z)
 
-/*        Includes case AB=ADF=0 */
+    M       (input) INTEGER
+            The number of rows of the matrix A.  If m <= 1, an immediate
+            return is effected.
 
-	rt = ab * sqrt(2.);
-    }
-    if (sm < 0.) {
-	*rt1 = (sm - rt) * .5;
-	sgn1 = -1;
+    N       (input) INTEGER
+            The number of columns of the matrix A.  If n <= 1, an
+            immediate return is effected.
 
-/*
-          Order of execution important.
-          To get fully accurate smaller eigenvalue,
-          next line needs to be executed in higher precision.
-*/
+    C, S    (input) REAL arrays, dimension
+                    (M-1) if SIDE = 'L'
+                    (N-1) if SIDE = 'R'
+            c(k) and s(k) contain the cosine and sine that define the
+            matrix P(k).  The two by two plane rotation part of the
+            matrix P(k), R(k), is assumed to be of the form
+            R( k ) = (  c( k )  s( k ) ).
+                     ( -s( k )  c( k ) )
 
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else if (sm > 0.) {
-	*rt1 = (sm + rt) * .5;
-	sgn1 = 1;
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            The m by n matrix A.  On exit, A is overwritten by P*A if
+            SIDE = 'R' or by A*P' if SIDE = 'L'.
 
-/*
-          Order of execution important.
-          To get fully accurate smaller eigenvalue,
-          next line needs to be executed in higher precision.
-*/
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
 
-	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
-    } else {
+    =====================================================================
 
-/*        Includes case RT1 = RT2 = 0 */
 
-	*rt1 = rt * .5;
-	*rt2 = rt * -.5;
-	sgn1 = 1;
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --c__;
+    --s;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (! (lsame_(side, "L") || lsame_(side, "R"))) {
+	info = 1;
+    } else if (! (lsame_(pivot, "V") || lsame_(pivot,
+	    "T") || lsame_(pivot, "B"))) {
+	info = 2;
+    } else if (! (lsame_(direct, "F") || lsame_(direct,
+	    "B"))) {
+	info = 3;
+    } else if (*m < 0) {
+	info = 4;
+    } else if (*n < 0) {
+	info = 5;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("CLASR ", &info);
+	return 0;
     }
 
-/*     Compute the eigenvector */
+/*     Quick return if possible */
 
-    if (df >= 0.) {
-	cs = df + rt;
-	sgn2 = 1;
-    } else {
-	cs = df - rt;
-	sgn2 = -1;
+    if (*m == 0 || *n == 0) {
+	return 0;
     }
-    acs = abs(cs);
-    if (acs > ab) {
-	ct = -tb / cs;
-	*sn1 = 1. / sqrt(ct * ct + 1.);
-	*cs1 = ct * *sn1;
-    } else {
-	if (ab == 0.) {
-	    *cs1 = 1.;
-	    *sn1 = 0.;
-	} else {
-	    tn = -cs / tb;
-	    *cs1 = 1. / sqrt(tn * tn + 1.);
-	    *sn1 = tn * *cs1;
+    if (lsame_(side, "L")) {
+
+/*        Form  P * A */
+
+	if (lsame_(pivot, "V")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1.f || stemp != 0.f) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = j + 1 + i__ * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = j + 1 + i__ * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__4 = j + i__ * a_dim1;
+			    q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			    i__3 = j + i__ * a_dim1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__4 = j + i__ * a_dim1;
+			    q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L10: */
+			}
+		    }
+/* L20: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1.f || stemp != 0.f) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = j + 1 + i__ * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = j + 1 + i__ * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__3 = j + i__ * a_dim1;
+			    q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+			    i__2 = j + i__ * a_dim1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__3 = j + i__ * a_dim1;
+			    q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L30: */
+			}
+		    }
+/* L40: */
+		}
+	    }
+	} else if (lsame_(pivot, "T")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m;
+		for (j = 2; j <= i__1; ++j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if (ctemp != 1.f || stemp != 0.f) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = j + i__ * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = j + i__ * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__4 = i__ * a_dim1 + 1;
+			    q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			    i__3 = i__ * a_dim1 + 1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__4 = i__ * a_dim1 + 1;
+			    q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L50: */
+			}
+		    }
+/* L60: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m; j >= 2; --j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if (ctemp != 1.f || stemp != 0.f) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = j + i__ * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = j + i__ * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__3 = i__ * a_dim1 + 1;
+			    q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+			    i__2 = i__ * a_dim1 + 1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__3 = i__ * a_dim1 + 1;
+			    q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L70: */
+			}
+		    }
+/* L80: */
+		}
+	    }
+	} else if (lsame_(pivot, "B")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1.f || stemp != 0.f) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = j + i__ * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = j + i__ * a_dim1;
+			    i__4 = *m + i__ * a_dim1;
+			    q__2.r = stemp * a[i__4].r, q__2.i = stemp * a[
+				    i__4].i;
+			    q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			    i__3 = *m + i__ * a_dim1;
+			    i__4 = *m + i__ * a_dim1;
+			    q__2.r = ctemp * a[i__4].r, q__2.i = ctemp * a[
+				    i__4].i;
+			    q__3.r = stemp * temp.r, q__3.i = stemp * temp.i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L90: */
+			}
+		    }
+/* L100: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1.f || stemp != 0.f) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = j + i__ * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = j + i__ * a_dim1;
+			    i__3 = *m + i__ * a_dim1;
+			    q__2.r = stemp * a[i__3].r, q__2.i = stemp * a[
+				    i__3].i;
+			    q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+			    i__2 = *m + i__ * a_dim1;
+			    i__3 = *m + i__ * a_dim1;
+			    q__2.r = ctemp * a[i__3].r, q__2.i = ctemp * a[
+				    i__3].i;
+			    q__3.r = stemp * temp.r, q__3.i = stemp * temp.i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+	    }
+	}
+    } else if (lsame_(side, "R")) {
+
+/*        Form A * P' */
+
+	if (lsame_(pivot, "V")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1.f || stemp != 0.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + (j + 1) * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = i__ + (j + 1) * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__4 = i__ + j * a_dim1;
+			    q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			    i__3 = i__ + j * a_dim1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__4 = i__ + j * a_dim1;
+			    q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L130: */
+			}
+		    }
+/* L140: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1.f || stemp != 0.f) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + (j + 1) * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = i__ + (j + 1) * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__3 = i__ + j * a_dim1;
+			    q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+			    i__2 = i__ + j * a_dim1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__3 = i__ + j * a_dim1;
+			    q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L150: */
+			}
+		    }
+/* L160: */
+		}
+	    }
+	} else if (lsame_(pivot, "T")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n;
+		for (j = 2; j <= i__1; ++j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if (ctemp != 1.f || stemp != 0.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = i__ + j * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__4 = i__ + a_dim1;
+			    q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			    i__3 = i__ + a_dim1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__4 = i__ + a_dim1;
+			    q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L170: */
+			}
+		    }
+/* L180: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n; j >= 2; --j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if (ctemp != 1.f || stemp != 0.f) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + j * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = i__ + j * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__3 = i__ + a_dim1;
+			    q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+			    i__2 = i__ + a_dim1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__3 = i__ + a_dim1;
+			    q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L190: */
+			}
+		    }
+/* L200: */
+		}
+	    }
+	} else if (lsame_(pivot, "B")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1.f || stemp != 0.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = i__ + *n * a_dim1;
+			    q__2.r = stemp * a[i__4].r, q__2.i = stemp * a[
+				    i__4].i;
+			    q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			    i__3 = i__ + *n * a_dim1;
+			    i__4 = i__ + *n * a_dim1;
+			    q__2.r = ctemp * a[i__4].r, q__2.i = ctemp * a[
+				    i__4].i;
+			    q__3.r = stemp * temp.r, q__3.i = stemp * temp.i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L210: */
+			}
+		    }
+/* L220: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1.f || stemp != 0.f) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + j * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = i__ + j * a_dim1;
+			    i__3 = i__ + *n * a_dim1;
+			    q__2.r = stemp * a[i__3].r, q__2.i = stemp * a[
+				    i__3].i;
+			    q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+			    i__2 = i__ + *n * a_dim1;
+			    i__3 = i__ + *n * a_dim1;
+			    q__2.r = ctemp * a[i__3].r, q__2.i = ctemp * a[
+				    i__3].i;
+			    q__3.r = stemp * temp.r, q__3.i = stemp * temp.i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L230: */
+			}
+		    }
+/* L240: */
+		}
+	    }
 	}
     }
-    if (sgn1 == sgn2) {
-	tn = *cs1;
-	*cs1 = -(*sn1);
-	*sn1 = tn;
-    }
+
     return 0;
 
-/*     End of DLAEV2 */
+/*     End of CLASR */
 
-} /* dlaev2_ */
+} /* clasr_ */
 
-/* Subroutine */ int dlahqr_(logical *wantt, logical *wantz, integer *n,
-	integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal
-	*wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__,
-	integer *ldz, integer *info)
+/* Subroutine */ int classq_(integer *n, complex *x, integer *incx, real *
+	scale, real *sumsq)
 {
     /* System generated locals */
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
-    doublereal d__1, d__2;
+    integer i__1, i__2, i__3;
+    real r__1;
 
     /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+    double r_imag(complex *);
 
     /* Local variables */
-    static integer i__, j, k, l, m;
-    static doublereal s, v[3];
-    static integer i1, i2;
-    static doublereal t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22,
-	    h33, h44;
-    static integer nh;
-    static doublereal cs;
-    static integer nr;
-    static doublereal sn;
-    static integer nz;
-    static doublereal ave, h33s, h44s;
-    static integer itn, its;
-    static doublereal ulp, sum, tst1, h43h34, disc, unfl, ovfl;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static doublereal work[1];
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlanv2_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *), dlabad_(
-	    doublereal *, doublereal *);
-
-    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
-	     integer *, doublereal *);
-    extern doublereal dlanhs_(char *, integer *, doublereal *, integer *,
-	    doublereal *);
-    static doublereal smlnum;
+    static integer ix;
+    static real temp1;
 
 
 /*
@@ -13125,1485 +15895,2044 @@ L120:
     Purpose
     =======
 
-    DLAHQR is an auxiliary routine called by DHSEQR to update the
-    eigenvalues and Schur decomposition already computed by DHSEQR, by
-    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
-
-    Arguments
-    =========
+    CLASSQ returns the values scl and ssq such that
 
-    WANTT   (input) LOGICAL
-            = .TRUE. : the full Schur form T is required;
-            = .FALSE.: only eigenvalues are required.
+       ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
 
-    WANTZ   (input) LOGICAL
-            = .TRUE. : the matrix of Schur vectors Z is required;
-            = .FALSE.: Schur vectors are not required.
+    where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is
+    assumed to be at least unity and the value of ssq will then satisfy
 
-    N       (input) INTEGER
-            The order of the matrix H.  N >= 0.
+       1.0 .le. ssq .le. ( sumsq + 2*n ).
 
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that H is already upper quasi-triangular in
-            rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless
-            ILO = 1). DLAHQR works primarily with the Hessenberg
-            submatrix in rows and columns ILO to IHI, but applies
-            transformations to all of H if WANTT is .TRUE..
-            1 <= ILO <= max(1,IHI); IHI <= N.
+    scale is assumed to be non-negative and scl returns the value
 
-    H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
-            On entry, the upper Hessenberg matrix H.
-            On exit, if WANTT is .TRUE., H is upper quasi-triangular in
-            rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in
-            standard form. If WANTT is .FALSE., the contents of H are
-            unspecified on exit.
+       scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ),
+              i
 
-    LDH     (input) INTEGER
-            The leading dimension of the array H. LDH >= max(1,N).
+    scale and sumsq must be supplied in SCALE and SUMSQ respectively.
+    SCALE and SUMSQ are overwritten by scl and ssq respectively.
 
-    WR      (output) DOUBLE PRECISION array, dimension (N)
-    WI      (output) DOUBLE PRECISION array, dimension (N)
-            The real and imaginary parts, respectively, of the computed
-            eigenvalues ILO to IHI are stored in the corresponding
-            elements of WR and WI. If two eigenvalues are computed as a
-            complex conjugate pair, they are stored in consecutive
-            elements of WR and WI, say the i-th and (i+1)th, with
-            WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
-            eigenvalues are stored in the same order as on the diagonal
-            of the Schur form returned in H, with WR(i) = H(i,i), and, if
-            H(i:i+1,i:i+1) is a 2-by-2 diagonal block,
-            WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i).
+    The routine makes only one pass through the vector X.
 
-    ILOZ    (input) INTEGER
-    IHIZ    (input) INTEGER
-            Specify the rows of Z to which transformations must be
-            applied if WANTZ is .TRUE..
-            1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
+    Arguments
+    =========
 
-    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
-            If WANTZ is .TRUE., on entry Z must contain the current
-            matrix Z of transformations accumulated by DHSEQR, and on
-            exit Z has been updated; transformations are applied only to
-            the submatrix Z(ILOZ:IHIZ,ILO:IHI).
-            If WANTZ is .FALSE., Z is not referenced.
+    N       (input) INTEGER
+            The number of elements to be used from the vector X.
 
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z. LDZ >= max(1,N).
+    X       (input) COMPLEX array, dimension (N)
+            The vector x as described above.
+               x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
 
-    INFO    (output) INTEGER
-            = 0: successful exit
-            > 0: DLAHQR failed to compute all the eigenvalues ILO to IHI
-                 in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
-                 elements i+1:ihi of WR and WI contain those eigenvalues
-                 which have been successfully computed.
+    INCX    (input) INTEGER
+            The increment between successive values of the vector X.
+            INCX > 0.
 
-    Further Details
-    ===============
+    SCALE   (input/output) REAL
+            On entry, the value  scale  in the equation above.
+            On exit, SCALE is overwritten with the value  scl .
 
-    2-96 Based on modifications by
-       David Day, Sandia National Laboratory, USA
+    SUMSQ   (input/output) REAL
+            On entry, the value  sumsq  in the equation above.
+            On exit, SUMSQ is overwritten with the value  ssq .
 
-    =====================================================================
+   =====================================================================
 */
 
 
     /* Parameter adjustments */
-    h_dim1 = *ldh;
-    h_offset = 1 + h_dim1;
-    h__ -= h_offset;
-    --wr;
-    --wi;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
+    --x;
 
     /* Function Body */
-    *info = 0;
+    if (*n > 0) {
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    i__3 = ix;
+	    if (x[i__3].r != 0.f) {
+		i__3 = ix;
+		temp1 = (r__1 = x[i__3].r, dabs(r__1));
+		if (*scale < temp1) {
+/* Computing 2nd power */
+		    r__1 = *scale / temp1;
+		    *sumsq = *sumsq * (r__1 * r__1) + 1;
+		    *scale = temp1;
+		} else {
+/* Computing 2nd power */
+		    r__1 = temp1 / *scale;
+		    *sumsq += r__1 * r__1;
+		}
+	    }
+	    if (r_imag(&x[ix]) != 0.f) {
+		temp1 = (r__1 = r_imag(&x[ix]), dabs(r__1));
+		if (*scale < temp1) {
+/* Computing 2nd power */
+		    r__1 = *scale / temp1;
+		    *sumsq = *sumsq * (r__1 * r__1) + 1;
+		    *scale = temp1;
+		} else {
+/* Computing 2nd power */
+		    r__1 = temp1 / *scale;
+		    *sumsq += r__1 * r__1;
+		}
+	    }
+/* L10: */
+	}
+    }
 
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-    if (*ilo == *ihi) {
-	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
-	wi[*ilo] = 0.;
-	return 0;
-    }
-
-    nh = *ihi - *ilo + 1;
-    nz = *ihiz - *iloz + 1;
-
-/*
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
+    return 0;
 
-    unfl = SAFEMINIMUM;
-    ovfl = 1. / unfl;
-    dlabad_(&unfl, &ovfl);
-    ulp = PRECISION;
-    smlnum = unfl * (nh / ulp);
+/*     End of CLASSQ */
 
-/*
-       I1 and I2 are the indices of the first row and last column of H
-       to which transformations must be applied. If eigenvalues only are
-       being computed, I1 and I2 are set inside the main loop.
-*/
+} /* classq_ */
 
-    if (*wantt) {
-	i1 = 1;
-	i2 = *n;
-    }
+/* Subroutine */ int claswp_(integer *n, complex *a, integer *lda, integer *
+	k1, integer *k2, integer *ipiv, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
 
-/*     ITN is the total number of QR iterations allowed. */
+    /* Local variables */
+    static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc;
+    static complex temp;
 
-    itn = nh * 30;
 
 /*
-       The main loop begins here. I is the loop index and decreases from
-       IHI to ILO in steps of 1 or 2. Each iteration of the loop works
-       with the active submatrix in rows and columns L to I.
-       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
-       H(L,L-1) is negligible so that the matrix splits.
-*/
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-    i__ = *ihi;
-L10:
-    l = *ilo;
-    if (i__ < *ilo) {
-	goto L150;
-    }
 
-/*
-       Perform QR iterations on rows and columns ILO to I until a
-       submatrix of order 1 or 2 splits off at the bottom because a
-       subdiagonal element has become negligible.
-*/
+    Purpose
+    =======
 
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
+    CLASWP performs a series of row interchanges on the matrix A.
+    One row interchange is initiated for each of rows K1 through K2 of A.
 
-/*        Look for a single small subdiagonal element. */
+    Arguments
+    =========
 
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
-		     h__[k + k * h_dim1], abs(d__2));
-	    if (tst1 == 0.) {
-		i__3 = i__ - l + 1;
-		tst1 = dlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work);
-	    }
-/* Computing MAX */
-	    d__2 = ulp * tst1;
-	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2,
-		    smlnum)) {
-		goto L30;
-	    }
-/* L20: */
-	}
-L30:
-	l = k;
-	if (l > *ilo) {
+    N       (input) INTEGER
+            The number of columns of the matrix A.
 
-/*           H(L,L-1) is negligible */
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the matrix of column dimension N to which the row
+            interchanges will be applied.
+            On exit, the permuted matrix.
 
-	    h__[l + (l - 1) * h_dim1] = 0.;
-	}
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
 
-/*        Exit from loop if a submatrix of order 1 or 2 has split off. */
+    K1      (input) INTEGER
+            The first element of IPIV for which a row interchange will
+            be done.
 
-	if (l >= i__ - 1) {
-	    goto L140;
-	}
+    K2      (input) INTEGER
+            The last element of IPIV for which a row interchange will
+            be done.
 
-/*
-          Now the active submatrix is in rows and columns L to I. If
-          eigenvalues only are being computed, only the active submatrix
-          need be transformed.
-*/
+    IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+            The vector of pivot indices.  Only the elements in positions
+            K1 through K2 of IPIV are accessed.
+            IPIV(K) = L implies rows K and L are to be interchanged.
 
-	if (! (*wantt)) {
-	    i1 = l;
-	    i2 = i__;
-	}
+    INCX    (input) INTEGER
+            The increment between successive values of IPIV.  If IPIV
+            is negative, the pivots are applied in reverse order.
 
-	if (its == 10 || its == 20) {
+    Further Details
+    ===============
 
-/*           Exceptional shift. */
+    Modified by
+     R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA
 
-	    s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 =
-		    h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2));
-	    h44 = s * .75 + h__[i__ + i__ * h_dim1];
-	    h33 = h44;
-	    h43h34 = s * -.4375 * s;
-	} else {
+   =====================================================================
 
-/*
-             Prepare to use Francis' double shift
-             (i.e. 2nd degree generalized Rayleigh quotient)
+
+       Interchange row I with row IPIV(I) for each of rows K1 through K2.
 */
 
-	    h44 = h__[i__ + i__ * h_dim1];
-	    h33 = h__[i__ - 1 + (i__ - 1) * h_dim1];
-	    h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ *
-		    h_dim1];
-	    s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) *
-		    h_dim1];
-	    disc = (h33 - h44) * .5;
-	    disc = disc * disc + h43h34;
-	    if (disc > 0.) {
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
 
-/*              Real roots: use Wilkinson's shift twice */
+    /* Function Body */
+    if (*incx > 0) {
+	ix0 = *k1;
+	i1 = *k1;
+	i2 = *k2;
+	inc = 1;
+    } else if (*incx < 0) {
+	ix0 = (1 - *k2) * *incx + 1;
+	i1 = *k2;
+	i2 = *k1;
+	inc = -1;
+    } else {
+	return 0;
+    }
 
-		disc = sqrt(disc);
-		ave = (h33 + h44) * .5;
-		if (abs(h33) - abs(h44) > 0.) {
-		    h33 = h33 * h44 - h43h34;
-		    h44 = h33 / (d_sign(&disc, &ave) + ave);
-		} else {
-		    h44 = d_sign(&disc, &ave) + ave;
+    n32 = *n / 32 << 5;
+    if (n32 != 0) {
+	i__1 = n32;
+	for (j = 1; j <= i__1; j += 32) {
+	    ix = ix0;
+	    i__2 = i2;
+	    i__3 = inc;
+	    for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3)
+		    {
+		ip = ipiv[ix];
+		if (ip != i__) {
+		    i__4 = j + 31;
+		    for (k = j; k <= i__4; ++k) {
+			i__5 = i__ + k * a_dim1;
+			temp.r = a[i__5].r, temp.i = a[i__5].i;
+			i__5 = i__ + k * a_dim1;
+			i__6 = ip + k * a_dim1;
+			a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i;
+			i__5 = ip + k * a_dim1;
+			a[i__5].r = temp.r, a[i__5].i = temp.i;
+/* L10: */
+		    }
 		}
-		h33 = h44;
-		h43h34 = 0.;
+		ix += *incx;
+/* L20: */
 	    }
+/* L30: */
 	}
-
-/*        Look for two consecutive small subdiagonal elements. */
-
-	i__2 = l;
-	for (m = i__ - 2; m >= i__2; --m) {
-/*
-             Determine the effect of starting the double-shift QR
-             iteration at row M, and see if this would make H(M,M-1)
-             negligible.
-*/
-
-	    h11 = h__[m + m * h_dim1];
-	    h22 = h__[m + 1 + (m + 1) * h_dim1];
-	    h21 = h__[m + 1 + m * h_dim1];
-	    h12 = h__[m + (m + 1) * h_dim1];
-	    h44s = h44 - h11;
-	    h33s = h33 - h11;
-	    v1 = (h33s * h44s - h43h34) / h21 + h12;
-	    v2 = h22 - h11 - h33s - h44s;
-	    v3 = h__[m + 2 + (m + 1) * h_dim1];
-	    s = abs(v1) + abs(v2) + abs(v3);
-	    v1 /= s;
-	    v2 /= s;
-	    v3 /= s;
-	    v[0] = v1;
-	    v[1] = v2;
-	    v[2] = v3;
-	    if (m == l) {
-		goto L50;
-	    }
-	    h00 = h__[m - 1 + (m - 1) * h_dim1];
-	    h10 = h__[m + (m - 1) * h_dim1];
-	    tst1 = abs(v1) * (abs(h00) + abs(h11) + abs(h22));
-	    if (abs(h10) * (abs(v2) + abs(v3)) <= ulp * tst1) {
-		goto L50;
-	    }
+    }
+    if (n32 != *n) {
+	++n32;
+	ix = ix0;
+	i__1 = i2;
+	i__3 = inc;
+	for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) {
+	    ip = ipiv[ix];
+	    if (ip != i__) {
+		i__2 = *n;
+		for (k = n32; k <= i__2; ++k) {
+		    i__4 = i__ + k * a_dim1;
+		    temp.r = a[i__4].r, temp.i = a[i__4].i;
+		    i__4 = i__ + k * a_dim1;
+		    i__5 = ip + k * a_dim1;
+		    a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i;
+		    i__4 = ip + k * a_dim1;
+		    a[i__4].r = temp.r, a[i__4].i = temp.i;
 /* L40: */
+		}
+	    }
+	    ix += *incx;
+/* L50: */
 	}
-L50:
+    }
 
-/*        Double-shift QR step */
+    return 0;
 
-	i__2 = i__ - 1;
-	for (k = m; k <= i__2; ++k) {
+/*     End of CLASWP */
 
-/*
-             The first iteration of this loop determines a reflection G
-             from the vector V and applies it from left and right to H,
-             thus creating a nonzero bulge below the subdiagonal.
+} /* claswp_ */
 
-             Each subsequent iteration determines a reflection G to
-             restore the Hessenberg form in the (K-1)th column, and thus
-             chases the bulge one step toward the bottom of the active
-             submatrix. NR is the order of G.
+/* Subroutine */ int clatrd_(char *uplo, integer *n, integer *nb, complex *a,
+	integer *lda, real *e, complex *tau, complex *w, integer *ldw)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
+    real r__1;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Local variables */
+    static integer i__, iw;
+    static complex alpha;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *);
+    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
+	    *, complex *, integer *);
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), chemv_(char *, integer *, complex *,
+	    complex *, integer *, complex *, integer *, complex *, complex *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *,
+	    integer *, complex *, integer *), clarfg_(integer *, complex *,
+	    complex *, integer *, complex *), clacgv_(integer *, complex *,
+	    integer *);
 
-   Computing MIN
-*/
-	    i__3 = 3, i__4 = i__ - k + 1;
-	    nr = min(i__3,i__4);
-	    if (k > m) {
-		dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
-	    }
-	    dlarfg_(&nr, v, &v[1], &c__1, &t1);
-	    if (k > m) {
-		h__[k + (k - 1) * h_dim1] = v[0];
-		h__[k + 1 + (k - 1) * h_dim1] = 0.;
-		if (k < i__ - 1) {
-		    h__[k + 2 + (k - 1) * h_dim1] = 0.;
-		}
-	    } else if (m > l) {
-		h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1];
-	    }
-	    v2 = v[1];
-	    t2 = t1 * v2;
-	    if (nr == 3) {
-		v3 = v[2];
-		t3 = t1 * v3;
 
 /*
-                Apply G from the left to transform the rows of the matrix
-                in columns K to I2.
-*/
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-		i__3 = i2;
-		for (j = k; j <= i__3; ++j) {
-		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]
-			    + v3 * h__[k + 2 + j * h_dim1];
-		    h__[k + j * h_dim1] -= sum * t1;
-		    h__[k + 1 + j * h_dim1] -= sum * t2;
-		    h__[k + 2 + j * h_dim1] -= sum * t3;
-/* L60: */
-		}
 
-/*
-                Apply G from the right to transform the columns of the
-                matrix in rows I1 to min(K+3,I).
+    Purpose
+    =======
 
-   Computing MIN
-*/
-		i__4 = k + 3;
-		i__3 = min(i__4,i__);
-		for (j = i1; j <= i__3; ++j) {
-		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
-			     + v3 * h__[j + (k + 2) * h_dim1];
-		    h__[j + k * h_dim1] -= sum * t1;
-		    h__[j + (k + 1) * h_dim1] -= sum * t2;
-		    h__[j + (k + 2) * h_dim1] -= sum * t3;
-/* L70: */
-		}
+    CLATRD reduces NB rows and columns of a complex Hermitian matrix A to
+    Hermitian tridiagonal form by a unitary similarity
+    transformation Q' * A * Q, and returns the matrices V and W which are
+    needed to apply the transformation to the unreduced part of A.
 
-		if (*wantz) {
+    If UPLO = 'U', CLATRD reduces the last NB rows and columns of a
+    matrix, of which the upper triangle is supplied;
+    if UPLO = 'L', CLATRD reduces the first NB rows and columns of a
+    matrix, of which the lower triangle is supplied.
 
-/*                 Accumulate transformations in the matrix Z */
+    This is an auxiliary routine called by CHETRD.
 
-		    i__3 = *ihiz;
-		    for (j = *iloz; j <= i__3; ++j) {
-			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
-				z_dim1] + v3 * z__[j + (k + 2) * z_dim1];
-			z__[j + k * z_dim1] -= sum * t1;
-			z__[j + (k + 1) * z_dim1] -= sum * t2;
-			z__[j + (k + 2) * z_dim1] -= sum * t3;
-/* L80: */
-		    }
-		}
-	    } else if (nr == 2) {
+    Arguments
+    =========
 
-/*
-                Apply G from the left to transform the rows of the matrix
-                in columns K to I2.
-*/
+    UPLO    (input) CHARACTER
+            Specifies whether the upper or lower triangular part of the
+            Hermitian matrix A is stored:
+            = 'U': Upper triangular
+            = 'L': Lower triangular
 
-		i__3 = i2;
-		for (j = k; j <= i__3; ++j) {
-		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1];
-		    h__[k + j * h_dim1] -= sum * t1;
-		    h__[k + 1 + j * h_dim1] -= sum * t2;
-/* L90: */
-		}
+    N       (input) INTEGER
+            The order of the matrix A.
 
-/*
-                Apply G from the right to transform the columns of the
-                matrix in rows I1 to min(K+3,I).
-*/
+    NB      (input) INTEGER
+            The number of rows and columns to be reduced.
 
-		i__3 = i__;
-		for (j = i1; j <= i__3; ++j) {
-		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
-			    ;
-		    h__[j + k * h_dim1] -= sum * t1;
-		    h__[j + (k + 1) * h_dim1] -= sum * t2;
-/* L100: */
-		}
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            n-by-n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n-by-n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit:
+            if UPLO = 'U', the last NB columns have been reduced to
+              tridiagonal form, with the diagonal elements overwriting
+              the diagonal elements of A; the elements above the diagonal
+              with the array TAU, represent the unitary matrix Q as a
+              product of elementary reflectors;
+            if UPLO = 'L', the first NB columns have been reduced to
+              tridiagonal form, with the diagonal elements overwriting
+              the diagonal elements of A; the elements below the diagonal
+              with the array TAU, represent the  unitary matrix Q as a
+              product of elementary reflectors.
+            See Further Details.
 
-		if (*wantz) {
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-/*                 Accumulate transformations in the matrix Z */
+    E       (output) REAL array, dimension (N-1)
+            If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
+            elements of the last NB columns of the reduced matrix;
+            if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
+            the first NB columns of the reduced matrix.
 
-		    i__3 = *ihiz;
-		    for (j = *iloz; j <= i__3; ++j) {
-			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
-				z_dim1];
-			z__[j + k * z_dim1] -= sum * t1;
-			z__[j + (k + 1) * z_dim1] -= sum * t2;
-/* L110: */
-		    }
-		}
-	    }
-/* L120: */
-	}
+    TAU     (output) COMPLEX array, dimension (N-1)
+            The scalar factors of the elementary reflectors, stored in
+            TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
+            See Further Details.
 
-/* L130: */
-    }
+    W       (output) COMPLEX array, dimension (LDW,NB)
+            The n-by-nb matrix W required to update the unreduced part
+            of A.
 
-/*     Failure to converge in remaining number of iterations */
+    LDW     (input) INTEGER
+            The leading dimension of the array W. LDW >= max(1,N).
 
-    *info = i__;
-    return 0;
+    Further Details
+    ===============
 
-L140:
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
 
-    if (l == i__) {
+       Q = H(n) H(n-1) . . . H(n-nb+1).
 
-/*        H(I,I-1) is negligible: one eigenvalue has converged. */
+    Each H(i) has the form
 
-	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.;
-    } else if (l == i__ - 1) {
+       H(i) = I - tau * v * v'
 
-/*
-          H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
+    where tau is a complex scalar, and v is a complex vector with
+    v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
+    and tau in TAU(i-1).
 
-          Transform the 2-by-2 submatrix to standard Schur form,
-          and compute and store the eigenvalues.
-*/
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
 
-	dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ *
-		h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ *
-		h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs,
-		&sn);
+       Q = H(1) H(2) . . . H(nb).
 
-	if (*wantt) {
+    Each H(i) has the form
 
-/*           Apply the transformation to the rest of H. */
+       H(i) = I - tau * v * v'
 
-	    if (i2 > i__) {
-		i__1 = i2 - i__;
-		drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[
-			i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn);
-	    }
-	    i__1 = i__ - i1 - 1;
-	    drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ *
-		     h_dim1], &c__1, &cs, &sn);
-	}
-	if (*wantz) {
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+    and tau in TAU(i).
 
-/*           Apply the transformation to Z. */
+    The elements of the vectors v together form the n-by-nb matrix V
+    which is needed, with W, to apply the transformation to the unreduced
+    part of the matrix, using a Hermitian rank-2k update of the form:
+    A := A - V*W' - W*V'.
 
-	    drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz +
-		    i__ * z_dim1], &c__1, &cs, &sn);
-	}
-    }
+    The contents of A on exit are illustrated by the following examples
+    with n = 5 and nb = 2:
 
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with new value of I.
-*/
+    if UPLO = 'U':                       if UPLO = 'L':
 
-    itn -= its;
-    i__ = l - 1;
-    goto L10;
+      (  a   a   a   v4  v5 )              (  d                  )
+      (      a   a   v4  v5 )              (  1   d              )
+      (          a   1   v5 )              (  v1  1   a          )
+      (              d   1  )              (  v1  v2  a   a      )
+      (                  d  )              (  v1  v2  a   a   a  )
 
-L150:
-    return 0;
+    where d denotes a diagonal element of the reduced matrix, a denotes
+    an element of the original matrix that is unchanged, and vi denotes
+    an element of the vector defining H(i).
 
-/*     End of DLAHQR */
+    =====================================================================
 
-} /* dlahqr_ */
 
-/* Subroutine */ int dlahrd_(integer *n, integer *k, integer *nb, doublereal *
-	a, integer *lda, doublereal *tau, doublereal *t, integer *ldt,
-	doublereal *y, integer *ldy)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
-	    i__3;
-    doublereal d__1;
+       Quick return if possible
+*/
 
-    /* Local variables */
-    static integer i__;
-    static doublereal ei;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *), dgemv_(char *, integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *), dcopy_(integer *, doublereal *,
-	    integer *, doublereal *, integer *), daxpy_(integer *, doublereal
-	    *, doublereal *, integer *, doublereal *, integer *), dtrmv_(char
-	    *, char *, char *, integer *, doublereal *, integer *, doublereal
-	    *, integer *), dlarfg_(integer *,
-	    doublereal *, doublereal *, integer *, doublereal *);
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --e;
+    --tau;
+    w_dim1 = *ldw;
+    w_offset = 1 + w_dim1;
+    w -= w_offset;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+
+    if (lsame_(uplo, "U")) {
+
+/*        Reduce last NB columns of upper triangle */
+
+	i__1 = *n - *nb + 1;
+	for (i__ = *n; i__ >= i__1; --i__) {
+	    iw = i__ - *n + *nb;
+	    if (i__ < *n) {
+
+/*              Update A(1:i,i) */
+
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = i__ + i__ * a_dim1;
+		r__1 = a[i__3].r;
+		a[i__2].r = r__1, a[i__2].i = 0.f;
+		i__2 = *n - i__;
+		clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw);
+		i__2 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__, &i__2, &q__1, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
+			c_b56, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw);
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__, &i__2, &q__1, &w[(iw + 1) *
+			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b56, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = i__ + i__ * a_dim1;
+		r__1 = a[i__3].r;
+		a[i__2].r = r__1, a[i__2].i = 0.f;
+	    }
+	    if (i__ > 1) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(1:i-2,i)
+*/
+
+		i__2 = i__ - 1 + i__ * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = i__ - 1;
+		clarfg_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &tau[i__
+			- 1]);
+		i__2 = i__ - 1;
+		e[i__2] = alpha.r;
+		i__2 = i__ - 1 + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Compute W(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		chemv_("Upper", &i__2, &c_b56, &a[a_offset], lda, &a[i__ *
+			a_dim1 + 1], &c__1, &c_b55, &w[iw * w_dim1 + 1], &
+			c__1);
+		if (i__ < *n) {
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &w[(
+			    iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1],
+			    &c__1, &c_b55, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[(i__ + 1) *
+			     a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], &
+			    c__1, &c_b56, &w[iw * w_dim1 + 1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[(
+			    i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1],
+			     &c__1, &c_b55, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemv_("No transpose", &i__2, &i__3, &q__1, &w[(iw + 1) *
+			    w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
+			    c__1, &c_b56, &w[iw * w_dim1 + 1], &c__1);
+		}
+		i__2 = i__ - 1;
+		cscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
+		q__3.r = -.5f, q__3.i = -0.f;
+		i__2 = i__ - 1;
+		q__2.r = q__3.r * tau[i__2].r - q__3.i * tau[i__2].i, q__2.i =
+			 q__3.r * tau[i__2].i + q__3.i * tau[i__2].r;
+		i__3 = i__ - 1;
+		cdotc_(&q__4, &i__3, &w[iw * w_dim1 + 1], &c__1, &a[i__ *
+			a_dim1 + 1], &c__1);
+		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r *
+			q__4.i + q__2.i * q__4.r;
+		alpha.r = q__1.r, alpha.i = q__1.i;
+		i__2 = i__ - 1;
+		caxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
+			w_dim1 + 1], &c__1);
+	    }
+
+/* L10: */
+	}
+    } else {
+
+/*        Reduce first NB columns of lower triangle */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i:n,i) */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    i__3 = i__ + i__ * a_dim1;
+	    r__1 = a[i__3].r;
+	    a[i__2].r = r__1, a[i__2].i = 0.f;
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &w[i__ + w_dim1], ldw);
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda,
+		     &w[i__ + w_dim1], ldw, &c_b56, &a[i__ + i__ * a_dim1], &
+		    c__1);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &w[i__ + w_dim1], ldw);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + w_dim1], ldw,
+		     &a[i__ + a_dim1], lda, &c_b56, &a[i__ + i__ * a_dim1], &
+		    c__1);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    i__2 = i__ + i__ * a_dim1;
+	    i__3 = i__ + i__ * a_dim1;
+	    r__1 = a[i__3].r;
+	    a[i__2].r = r__1, a[i__2].i = 0.f;
+	    if (i__ < *n) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:n,i)
+*/
+
+		i__2 = i__ + 1 + i__ * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1,
+			 &tau[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + 1 + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Compute W(i+1:n,i) */
+
+		i__2 = *n - i__;
+		chemv_("Lower", &i__2, &c_b56, &a[i__ + 1 + (i__ + 1) *
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b55, &w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &w[i__ +
+			1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b55, &w[i__ * w_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 +
+			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b56, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b55, &w[i__ * w_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + 1 +
+			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b56, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		cscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
+		q__3.r = -.5f, q__3.i = -0.f;
+		i__2 = i__;
+		q__2.r = q__3.r * tau[i__2].r - q__3.i * tau[i__2].i, q__2.i =
+			 q__3.r * tau[i__2].i + q__3.i * tau[i__2].r;
+		i__3 = *n - i__;
+		cdotc_(&q__4, &i__3, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[
+			i__ + 1 + i__ * a_dim1], &c__1);
+		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r *
+			q__4.i + q__2.i * q__4.r;
+		alpha.r = q__1.r, alpha.i = q__1.i;
+		i__2 = *n - i__;
+		caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
+	    }
+
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of CLATRD */
+
+} /* clatrd_ */
+
+/* Subroutine */ int clatrs_(char *uplo, char *trans, char *diag, char *
+	normin, integer *n, complex *a, integer *lda, complex *x, real *scale,
+	 real *cnorm, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1, r__2, r__3, r__4;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j;
+    static real xj, rec, tjj;
+    static integer jinc;
+    static real xbnd;
+    static integer imax;
+    static real tmax;
+    static complex tjjs;
+    static real xmax, grow;
+    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
+	    *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static real tscal;
+    static complex uscal;
+    static integer jlast;
+    extern /* Complex */ VOID cdotu_(complex *, integer *, complex *, integer
+	    *, complex *, integer *);
+    static complex csumj;
+    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *,
+	    integer *, complex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int ctrsv_(char *, char *, char *, integer *,
+	    complex *, integer *, complex *, integer *), slabad_(real *, real *);
+    extern integer icamax_(integer *, complex *, integer *);
+    extern /* Complex */ VOID cladiv_(complex *, complex *, complex *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *), xerbla_(char *, integer *);
+    static real bignum;
+    extern integer isamax_(integer *, real *, integer *);
+    extern doublereal scasum_(integer *, complex *, integer *);
+    static logical notran;
+    static integer jfirst;
+    static real smlnum;
+    static logical nounit;
 
 
 /*
     -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+       June 30, 1992
 
 
     Purpose
     =======
 
-    DLAHRD reduces the first NB columns of a real general n-by-(n-k+1)
-    matrix A so that elements below the k-th subdiagonal are zero. The
-    reduction is performed by an orthogonal similarity transformation
-    Q' * A * Q. The routine returns the matrices V and T which determine
-    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+    CLATRS solves one of the triangular systems
 
-    This is an auxiliary routine called by DGEHRD.
+       A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
+
+    with scaling to prevent overflow.  Here A is an upper or lower
+    triangular matrix, A**T denotes the transpose of A, A**H denotes the
+    conjugate transpose of A, x and b are n-element vectors, and s is a
+    scaling factor, usually less than or equal to 1, chosen so that the
+    components of x will be less than the overflow threshold.  If the
+    unscaled problem will not cause overflow, the Level 2 BLAS routine
+    CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j),
+    then s is set to 0 and a non-trivial solution to A*x = 0 is returned.
 
     Arguments
     =========
 
-    N       (input) INTEGER
-            The order of the matrix A.
+    UPLO    (input) CHARACTER*1
+            Specifies whether the matrix A is upper or lower triangular.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
 
-    K       (input) INTEGER
-            The offset for the reduction. Elements below the k-th
-            subdiagonal in the first NB columns are reduced to zero.
+    TRANS   (input) CHARACTER*1
+            Specifies the operation applied to A.
+            = 'N':  Solve A * x = s*b     (No transpose)
+            = 'T':  Solve A**T * x = s*b  (Transpose)
+            = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
+
+    DIAG    (input) CHARACTER*1
+            Specifies whether or not the matrix A is unit triangular.
+            = 'N':  Non-unit triangular
+            = 'U':  Unit triangular
+
+    NORMIN  (input) CHARACTER*1
+            Specifies whether CNORM has been set or not.
+            = 'Y':  CNORM contains the column norms on entry
+            = 'N':  CNORM is not set on entry.  On exit, the norms will
+                    be computed and stored in CNORM.
 
-    NB      (input) INTEGER
-            The number of columns to be reduced.
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1)
-            On entry, the n-by-(n-k+1) general matrix A.
-            On exit, the elements on and above the k-th subdiagonal in
-            the first NB columns are overwritten with the corresponding
-            elements of the reduced matrix; the elements below the k-th
-            subdiagonal, with the array TAU, represent the matrix Q as a
-            product of elementary reflectors. The other columns of A are
-            unchanged. See Further Details.
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The triangular matrix A.  If UPLO = 'U', the leading n by n
+            upper triangular part of the array A contains the upper
+            triangular matrix, and the strictly lower triangular part of
+            A is not referenced.  If UPLO = 'L', the leading n by n lower
+            triangular part of the array A contains the lower triangular
+            matrix, and the strictly upper triangular part of A is not
+            referenced.  If DIAG = 'U', the diagonal elements of A are
+            also not referenced and are assumed to be 1.
 
     LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(1,N).
+            The leading dimension of the array A.  LDA >= max (1,N).
 
-    TAU     (output) DOUBLE PRECISION array, dimension (NB)
-            The scalar factors of the elementary reflectors. See Further
-            Details.
+    X       (input/output) COMPLEX array, dimension (N)
+            On entry, the right hand side b of the triangular system.
+            On exit, X is overwritten by the solution vector x.
 
-    T       (output) DOUBLE PRECISION array, dimension (LDT,NB)
-            The upper triangular matrix T.
+    SCALE   (output) REAL
+            The scaling factor s for the triangular system
+               A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
+            If SCALE = 0, the matrix A is singular or badly scaled, and
+            the vector x is an exact or approximate solution to A*x = 0.
 
-    LDT     (input) INTEGER
-            The leading dimension of the array T.  LDT >= NB.
+    CNORM   (input or output) REAL array, dimension (N)
 
-    Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
-            The n-by-nb matrix Y.
+            If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+            contains the norm of the off-diagonal part of the j-th column
+            of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+            to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+            must be greater than or equal to the 1-norm.
 
-    LDY     (input) INTEGER
-            The leading dimension of the array Y. LDY >= N.
+            If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+            returns the 1-norm of the offdiagonal part of the j-th column
+            of A.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -k, the k-th argument had an illegal value
 
     Further Details
-    ===============
+    ======= =======
 
-    The matrix Q is represented as a product of nb elementary reflectors
+    A rough bound on x is computed; if that is less than overflow, CTRSV
+    is called, otherwise, specific code is used which checks for possible
+    overflow or divide-by-zero at every operation.
 
-       Q = H(1) H(2) . . . H(nb).
+    A columnwise scheme is used for solving A*x = b.  The basic algorithm
+    if A is lower triangular is
 
-    Each H(i) has the form
+         x[1:n] := b[1:n]
+         for j = 1, ..., n
+              x(j) := x(j) / A(j,j)
+              x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+         end
 
-       H(i) = I - tau * v * v'
+    Define bounds on the components of x after j iterations of the loop:
+       M(j) = bound on x[1:j]
+       G(j) = bound on x[j+1:n]
+    Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
 
-    where tau is a real scalar, and v is a real vector with
-    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
-    A(i+k+1:n,i), and tau in TAU(i).
+    Then for iteration j+1 we have
+       M(j+1) <= G(j) / | A(j+1,j+1) |
+       G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+              <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
 
-    The elements of the vectors v together form the (n-k+1)-by-nb matrix
-    V which is needed, with T and Y, to apply the transformation to the
-    unreduced part of the matrix, using an update of the form:
-    A := (I - V*T*V') * (A - Y*V').
+    where CNORM(j+1) is greater than or equal to the infinity-norm of
+    column j+1 of A, not counting the diagonal.  Hence
 
-    The contents of A on exit are illustrated by the following example
-    with n = 7, k = 3 and nb = 2:
+       G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+                    1<=i<=j
+    and
 
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
-       ( h   h   a   a   a )
-       ( v1  h   a   a   a )
-       ( v1  v2  a   a   a )
-       ( v1  v2  a   a   a )
+       |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+                                     1<=i< j
 
-    where a denotes an element of the original matrix A, h denotes a
-    modified element of the upper Hessenberg matrix H, and vi denotes an
-    element of the vector defining H(i).
+    Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTRSV if the
+    reciprocal of the largest M(j), j=1,..,n, is larger than
+    max(underflow, 1/overflow).
 
-    =====================================================================
+    The bound on x(j) is also used to determine when a step in the
+    columnwise method can be performed without fear of overflow.  If
+    the computed bound is greater than a large constant, x is scaled to
+    prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+    1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
 
+    Similarly, a row-wise scheme is used to solve A**T *x = b  or
+    A**H *x = b.  The basic algorithm for A upper triangular is
 
-       Quick return if possible
+         for j = 1, ..., n
+              x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+         end
+
+    We simultaneously compute two bounds
+         G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+         M(j) = bound on x(i), 1<=i<=j
+
+    The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+    add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+    Then the bound on x(j) is
+
+         M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+
+              <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+                        1<=i<=j
+
+    and we can safely call CTRSV if 1/M(n) and 1/G(n) are both greater
+    than max(underflow, 1/overflow).
+
+    =====================================================================
 */
 
+
     /* Parameter adjustments */
-    --tau;
     a_dim1 = *lda;
     a_offset = 1 + a_dim1;
     a -= a_offset;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-    y_dim1 = *ldy;
-    y_offset = 1 + y_dim1;
-    y -= y_offset;
+    --x;
+    --cnorm;
 
     /* Function Body */
-    if (*n <= 1) {
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    notran = lsame_(trans, "N");
+    nounit = lsame_(diag, "N");
+
+/*     Test the input parameters. */
+
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && !
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -3;
+    } else if (! lsame_(normin, "Y") && ! lsame_(normin,
+	     "N")) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLATRS", &i__1);
 	return 0;
     }
 
-    i__1 = *nb;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if (i__ > 1) {
+/*     Quick return if possible */
 
-/*
-             Update A(1:n,i)
+    if (*n == 0) {
+	return 0;
+    }
 
-             Compute i-th column of A - Y * V'
-*/
+/*     Determine machine dependent parameters to control overflow. */
 
-	    i__2 = i__ - 1;
-	    dgemv_("No transpose", n, &i__2, &c_b151, &y[y_offset], ldy, &a[*
-		    k + i__ - 1 + a_dim1], lda, &c_b15, &a[i__ * a_dim1 + 1],
-		    &c__1);
+    smlnum = slamch_("Safe minimum");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum /= slamch_("Precision");
+    bignum = 1.f / smlnum;
+    *scale = 1.f;
 
-/*
-             Apply I - V * T' * V' to this column (call it b) from the
-             left, using the last column of T as workspace
+    if (lsame_(normin, "N")) {
 
-             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
-                      ( V2 )             ( b2 )
+/*        Compute the 1-norm of each column, not including the diagonal. */
 
-             where V1 is unit lower triangular
+	if (upper) {
 
-             w := V1' * b1
-*/
+/*           A is upper triangular. */
 
-	    i__2 = i__ - 1;
-	    dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
-		    1], &c__1);
-	    i__2 = i__ - 1;
-	    dtrmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1],
-		    lda, &t[*nb * t_dim1 + 1], &c__1);
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j - 1;
+		cnorm[j] = scasum_(&i__2, &a[j * a_dim1 + 1], &c__1);
+/* L10: */
+	    }
+	} else {
 
-/*           w := w + V2'*b2 */
+/*           A is lower triangular. */
 
-	    i__2 = *n - *k - i__ + 1;
-	    i__3 = i__ - 1;
-	    dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1],
-		    lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b15, &t[*nb *
-		    t_dim1 + 1], &c__1);
+	    i__1 = *n - 1;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n - j;
+		cnorm[j] = scasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1);
+/* L20: */
+	    }
+	    cnorm[*n] = 0.f;
+	}
+    }
 
-/*           w := T'*w */
+/*
+       Scale the column norms by TSCAL if the maximum element in CNORM is
+       greater than BIGNUM/2.
+*/
 
-	    i__2 = i__ - 1;
-	    dtrmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt,
-		     &t[*nb * t_dim1 + 1], &c__1);
+    imax = isamax_(n, &cnorm[1], &c__1);
+    tmax = cnorm[imax];
+    if (tmax <= bignum * .5f) {
+	tscal = 1.f;
+    } else {
+	tscal = .5f / (smlnum * tmax);
+	sscal_(n, &tscal, &cnorm[1], &c__1);
+    }
 
-/*           b2 := b2 - V2*w */
+/*
+       Compute a bound on the computed solution vector to see if the
+       Level 2 BLAS routine CTRSV can be used.
+*/
 
-	    i__2 = *n - *k - i__ + 1;
-	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[*k + i__ +
-		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b15, &a[*k
-		    + i__ + i__ * a_dim1], &c__1);
+    xmax = 0.f;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	i__2 = j;
+	r__3 = xmax, r__4 = (r__1 = x[i__2].r / 2.f, dabs(r__1)) + (r__2 =
+		r_imag(&x[j]) / 2.f, dabs(r__2));
+	xmax = dmax(r__3,r__4);
+/* L30: */
+    }
+    xbnd = xmax;
 
-/*           b1 := b1 - V1*w */
+    if (notran) {
 
-	    i__2 = i__ - 1;
-	    dtrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
-		    , lda, &t[*nb * t_dim1 + 1], &c__1);
-	    i__2 = i__ - 1;
-	    daxpy_(&i__2, &c_b151, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 +
-		    i__ * a_dim1], &c__1);
+/*        Compute the growth in A * x = b. */
 
-	    a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei;
+	if (upper) {
+	    jfirst = *n;
+	    jlast = 1;
+	    jinc = -1;
+	} else {
+	    jfirst = 1;
+	    jlast = *n;
+	    jinc = 1;
+	}
+
+	if (tscal != 1.f) {
+	    grow = 0.f;
+	    goto L60;
 	}
 
+	if (nounit) {
+
 /*
-          Generate the elementary reflector H(i) to annihilate
-          A(k+i+1:n,i)
-*/
+             A is non-unit triangular.
 
-	i__2 = *n - *k - i__ + 1;
-/* Computing MIN */
-	i__3 = *k + i__ + 1;
-	dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ *
-		a_dim1], &c__1, &tau[i__]);
-	ei = a[*k + i__ + i__ * a_dim1];
-	a[*k + i__ + i__ * a_dim1] = 1.;
+             Compute GROW = 1/G(j) and XBND = 1/M(j).
+             Initially, G(0) = max{x(i), i=1,...,n}.
+*/
 
-/*        Compute  Y(1:n,i) */
+	    grow = .5f / dmax(xbnd,smlnum);
+	    xbnd = grow;
+	    i__1 = jlast;
+	    i__2 = jinc;
+	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
 
-	i__2 = *n - *k - i__ + 1;
-	dgemv_("No transpose", n, &i__2, &c_b15, &a[(i__ + 1) * a_dim1 + 1],
-		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ *
-		y_dim1 + 1], &c__1);
-	i__2 = *n - *k - i__ + 1;
-	i__3 = i__ - 1;
-	dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1], lda,
-		&a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &t[i__ * t_dim1 +
-		1], &c__1);
-	i__2 = i__ - 1;
-	dgemv_("No transpose", n, &i__2, &c_b151, &y[y_offset], ldy, &t[i__ *
-		t_dim1 + 1], &c__1, &c_b15, &y[i__ * y_dim1 + 1], &c__1);
-	dscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+/*              Exit the loop if the growth factor is too small. */
 
-/*        Compute T(1:i,i) */
+		if (grow <= smlnum) {
+		    goto L60;
+		}
 
-	i__2 = i__ - 1;
-	d__1 = -tau[i__];
-	dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1);
-	i__2 = i__ - 1;
-	dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
-		&t[i__ * t_dim1 + 1], &c__1)
-		;
-	t[i__ + i__ * t_dim1] = tau[i__];
+		i__3 = j + j * a_dim1;
+		tjjs.r = a[i__3].r, tjjs.i = a[i__3].i;
+		tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			dabs(r__2));
 
-/* L10: */
-    }
-    a[*k + *nb + *nb * a_dim1] = ei;
+		if (tjj >= smlnum) {
 
-    return 0;
+/*
+                   M(j) = G(j-1) / abs(A(j,j))
 
-/*     End of DLAHRD */
+   Computing MIN
+*/
+		    r__1 = xbnd, r__2 = dmin(1.f,tjj) * grow;
+		    xbnd = dmin(r__1,r__2);
+		} else {
 
-} /* dlahrd_ */
+/*                 M(j) could overflow, set XBND to 0. */
 
-/* Subroutine */ int dlaln2_(logical *ltrans, integer *na, integer *nw,
-	doublereal *smin, doublereal *ca, doublereal *a, integer *lda,
-	doublereal *d1, doublereal *d2, doublereal *b, integer *ldb,
-	doublereal *wr, doublereal *wi, doublereal *x, integer *ldx,
-	doublereal *scale, doublereal *xnorm, integer *info)
-{
-    /* Initialized data */
+		    xbnd = 0.f;
+		}
 
-    static logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
-    static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
-    static integer ipivot[16]	/* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2,
-	    4,3,2,1 };
+		if (tjj + cnorm[j] >= smlnum) {
 
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset;
-    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
-    static doublereal equiv_0[4], equiv_1[4];
+/*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */
 
-    /* Local variables */
-    static integer j;
-#define ci (equiv_0)
-#define cr (equiv_1)
-    static doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22,
-	    cr21, cr22, li21, csi, ui11, lr21, ui12, ui22;
-#define civ (equiv_0)
-    static doublereal csr, ur11, ur12, ur22;
-#define crv (equiv_1)
-    static doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs;
-    static integer icmax;
-    static doublereal bnorm, cnorm, smini;
+		    grow *= tjj / (tjj + cnorm[j]);
+		} else {
 
-    extern /* Subroutine */ int dladiv_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *);
-    static doublereal bignum, smlnum;
+/*                 G(j) could overflow, set GROW to 0. */
 
+		    grow = 0.f;
+		}
+/* L40: */
+	    }
+	    grow = xbnd;
+	} else {
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+             A is unit triangular.
 
+             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
 
-    Purpose
-    =======
+   Computing MIN
+*/
+	    r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum);
+	    grow = dmin(r__1,r__2);
+	    i__2 = jlast;
+	    i__1 = jinc;
+	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
 
-    DLALN2 solves a system of the form  (ca A - w D ) X = s B
-    or (ca A' - w D) X = s B   with possible scaling ("s") and
-    perturbation of A.  (A' means A-transpose.)
+/*              Exit the loop if the growth factor is too small. */
 
-    A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
-    real diagonal matrix, w is a real or complex value, and X and B are
-    NA x 1 matrices -- real if w is real, complex if w is complex.  NA
-    may be 1 or 2.
+		if (grow <= smlnum) {
+		    goto L60;
+		}
 
-    If w is complex, X and B are represented as NA x 2 matrices,
-    the first column of each being the real part and the second
-    being the imaginary part.
+/*              G(j) = G(j-1)*( 1 + CNORM(j) ) */
 
-    "s" is a scaling factor (.LE. 1), computed by DLALN2, which is
-    so chosen that X can be computed without overflow.  X is further
-    scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
-    than overflow.
+		grow *= 1.f / (cnorm[j] + 1.f);
+/* L50: */
+	    }
+	}
+L60:
 
-    If both singular values of (ca A - w D) are less than SMIN,
-    SMIN*identity will be used instead of (ca A - w D).  If only one
-    singular value is less than SMIN, one element of (ca A - w D) will be
-    perturbed enough to make the smallest singular value roughly SMIN.
-    If both singular values are at least SMIN, (ca A - w D) will not be
-    perturbed.  In any case, the perturbation will be at most some small
-    multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
-    are computed by infinity-norm approximations, and thus will only be
-    correct to a factor of 2 or so.
+	;
+    } else {
 
-    Note: all input quantities are assumed to be smaller than overflow
-    by a reasonable factor.  (See BIGNUM.)
+/*        Compute the growth in A**T * x = b  or  A**H * x = b. */
 
-    Arguments
-    ==========
+	if (upper) {
+	    jfirst = 1;
+	    jlast = *n;
+	    jinc = 1;
+	} else {
+	    jfirst = *n;
+	    jlast = 1;
+	    jinc = -1;
+	}
 
-    LTRANS  (input) LOGICAL
-            =.TRUE.:  A-transpose will be used.
-            =.FALSE.: A will be used (not transposed.)
+	if (tscal != 1.f) {
+	    grow = 0.f;
+	    goto L90;
+	}
 
-    NA      (input) INTEGER
-            The size of the matrix A.  It may (only) be 1 or 2.
+	if (nounit) {
 
-    NW      (input) INTEGER
-            1 if "w" is real, 2 if "w" is complex.  It may only be 1
-            or 2.
+/*
+             A is non-unit triangular.
 
-    SMIN    (input) DOUBLE PRECISION
-            The desired lower bound on the singular values of A.  This
-            should be a safe distance away from underflow or overflow,
-            say, between (underflow/machine precision) and  (machine
-            precision * overflow ).  (See BIGNUM and ULP.)
+             Compute GROW = 1/G(j) and XBND = 1/M(j).
+             Initially, M(0) = max{x(i), i=1,...,n}.
+*/
 
-    CA      (input) DOUBLE PRECISION
-            The coefficient c, which A is multiplied by.
+	    grow = .5f / dmax(xbnd,smlnum);
+	    xbnd = grow;
+	    i__1 = jlast;
+	    i__2 = jinc;
+	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
 
-    A       (input) DOUBLE PRECISION array, dimension (LDA,NA)
-            The NA x NA matrix A.
+/*              Exit the loop if the growth factor is too small. */
 
-    LDA     (input) INTEGER
-            The leading dimension of A.  It must be at least NA.
+		if (grow <= smlnum) {
+		    goto L90;
+		}
 
-    D1      (input) DOUBLE PRECISION
-            The 1,1 element in the diagonal matrix D.
+/*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */
 
-    D2      (input) DOUBLE PRECISION
-            The 2,2 element in the diagonal matrix D.  Not used if NW=1.
+		xj = cnorm[j] + 1.f;
+/* Computing MIN */
+		r__1 = grow, r__2 = xbnd / xj;
+		grow = dmin(r__1,r__2);
 
-    B       (input) DOUBLE PRECISION array, dimension (LDB,NW)
-            The NA x NW matrix B (right-hand side).  If NW=2 ("w" is
-            complex), column 1 contains the real part of B and column 2
-            contains the imaginary part.
+		i__3 = j + j * a_dim1;
+		tjjs.r = a[i__3].r, tjjs.i = a[i__3].i;
+		tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			dabs(r__2));
 
-    LDB     (input) INTEGER
-            The leading dimension of B.  It must be at least NA.
+		if (tjj >= smlnum) {
 
-    WR      (input) DOUBLE PRECISION
-            The real part of the scalar "w".
+/*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */
 
-    WI      (input) DOUBLE PRECISION
-            The imaginary part of the scalar "w".  Not used if NW=1.
+		    if (xj > tjj) {
+			xbnd *= tjj / xj;
+		    }
+		} else {
 
-    X       (output) DOUBLE PRECISION array, dimension (LDX,NW)
-            The NA x NW matrix X (unknowns), as computed by DLALN2.
-            If NW=2 ("w" is complex), on exit, column 1 will contain
-            the real part of X and column 2 will contain the imaginary
-            part.
+/*                 M(j) could overflow, set XBND to 0. */
 
-    LDX     (input) INTEGER
-            The leading dimension of X.  It must be at least NA.
+		    xbnd = 0.f;
+		}
+/* L70: */
+	    }
+	    grow = dmin(grow,xbnd);
+	} else {
 
-    SCALE   (output) DOUBLE PRECISION
-            The scale factor that B must be multiplied by to insure
-            that overflow does not occur when computing X.  Thus,
-            (ca A - w D) X  will be SCALE*B, not B (ignoring
-            perturbations of A.)  It will be at most 1.
+/*
+             A is unit triangular.
 
-    XNORM   (output) DOUBLE PRECISION
-            The infinity-norm of X, when X is regarded as an NA x NW
-            real matrix.
+             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
 
-    INFO    (output) INTEGER
-            An error flag.  It will be set to zero if no error occurs,
-            a negative number if an argument is in error, or a positive
-            number if  ca A - w D  had to be perturbed.
-            The possible values are:
-            = 0: No error occurred, and (ca A - w D) did not have to be
-                   perturbed.
-            = 1: (ca A - w D) had to be perturbed to make its smallest
-                 (or only) singular value greater than SMIN.
-            NOTE: In the interests of speed, this routine does not
-                  check the inputs for errors.
+   Computing MIN
+*/
+	    r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum);
+	    grow = dmin(r__1,r__2);
+	    i__2 = jlast;
+	    i__1 = jinc;
+	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
 
-   =====================================================================
+/*              Exit the loop if the growth factor is too small. */
+
+		if (grow <= smlnum) {
+		    goto L90;
+		}
+
+/*              G(j) = ( 1 + CNORM(j) )*G(j-1) */
+
+		xj = cnorm[j] + 1.f;
+		grow /= xj;
+/* L80: */
+	    }
+	}
+L90:
+	;
+    }
+
+    if (grow * tscal > smlnum) {
+
+/*
+          Use the Level 2 BLAS solve if the reciprocal of the bound on
+          elements of X is not too small.
 */
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    x_dim1 = *ldx;
-    x_offset = 1 + x_dim1;
-    x -= x_offset;
+	ctrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1);
+    } else {
 
-    /* Function Body */
+/*        Use a Level 1 BLAS solve, scaling intermediate results. */
 
-/*     Compute BIGNUM */
+	if (xmax > bignum * .5f) {
 
-    smlnum = 2. * SAFEMINIMUM;
-    bignum = 1. / smlnum;
-    smini = max(*smin,smlnum);
+/*
+             Scale X so that its components are less than or equal to
+             BIGNUM in absolute value.
+*/
 
-/*     Don't check for input errors */
+	    *scale = bignum * .5f / xmax;
+	    csscal_(n, scale, &x[1], &c__1);
+	    xmax = bignum;
+	} else {
+	    xmax *= 2.f;
+	}
 
-    *info = 0;
+	if (notran) {
 
-/*     Standard Initializations */
+/*           Solve A * x = b */
 
-    *scale = 1.;
+	    i__1 = jlast;
+	    i__2 = jinc;
+	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
 
-    if (*na == 1) {
+/*              Compute x(j) = b(j) / A(j,j), scaling x if necessary. */
 
-/*        1 x 1  (i.e., scalar) system   C X = B */
+		i__3 = j;
+		xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]),
+			dabs(r__2));
+		if (nounit) {
+		    i__3 = j + j * a_dim1;
+		    q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3].i;
+		    tjjs.r = q__1.r, tjjs.i = q__1.i;
+		} else {
+		    tjjs.r = tscal, tjjs.i = 0.f;
+		    if (tscal == 1.f) {
+			goto L105;
+		    }
+		}
+		tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			dabs(r__2));
+		if (tjj > smlnum) {
 
-	if (*nw == 1) {
+/*                    abs(A(j,j)) > SMLNUM: */
 
-/*
-             Real 1x1 system.
+		    if (tjj < 1.f) {
+			if (xj > tjj * bignum) {
 
-             C = ca A - w D
-*/
+/*                          Scale x by 1/b(j). */
 
-	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
-	    cnorm = abs(csr);
+			    rec = 1.f / xj;
+			    csscal_(n, &rec, &x[1], &c__1);
+			    *scale *= rec;
+			    xmax *= rec;
+			}
+		    }
+		    i__3 = j;
+		    cladiv_(&q__1, &x[j], &tjjs);
+		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    i__3 = j;
+		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
+			    ), dabs(r__2));
+		} else if (tjj > 0.f) {
 
-/*           If | C | < SMINI, use C = SMINI */
+/*                    0 < abs(A(j,j)) <= SMLNUM: */
 
-	    if (cnorm < smini) {
-		csr = smini;
-		cnorm = smini;
-		*info = 1;
-	    }
+		    if (xj > tjj * bignum) {
 
-/*           Check scaling for  X = B / C */
+/*
+                         Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+                         to avoid overflow when dividing by A(j,j).
+*/
 
-	    bnorm = (d__1 = b[b_dim1 + 1], abs(d__1));
-	    if (cnorm < 1. && bnorm > 1.) {
-		if (bnorm > bignum * cnorm) {
-		    *scale = 1. / bnorm;
-		}
-	    }
+			rec = tjj * bignum / xj;
+			if (cnorm[j] > 1.f) {
 
-/*           Compute X */
+/*
+                            Scale by 1/CNORM(j) to avoid overflow when
+                            multiplying x(j) times column j.
+*/
 
-	    x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr;
-	    *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1));
-	} else {
+			    rec /= cnorm[j];
+			}
+			csscal_(n, &rec, &x[1], &c__1);
+			*scale *= rec;
+			xmax *= rec;
+		    }
+		    i__3 = j;
+		    cladiv_(&q__1, &x[j], &tjjs);
+		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    i__3 = j;
+		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
+			    ), dabs(r__2));
+		} else {
 
 /*
-             Complex 1x1 system (w is complex)
+                      A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+                      scale = 0, and compute a solution to A*x = 0.
+*/
 
-             C = ca A - w D
+		    i__3 = *n;
+		    for (i__ = 1; i__ <= i__3; ++i__) {
+			i__4 = i__;
+			x[i__4].r = 0.f, x[i__4].i = 0.f;
+/* L100: */
+		    }
+		    i__3 = j;
+		    x[i__3].r = 1.f, x[i__3].i = 0.f;
+		    xj = 1.f;
+		    *scale = 0.f;
+		    xmax = 0.f;
+		}
+L105:
+
+/*
+                Scale x if necessary to avoid overflow when adding a
+                multiple of column j of A.
 */
 
-	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
-	    csi = -(*wi) * *d1;
-	    cnorm = abs(csr) + abs(csi);
+		if (xj > 1.f) {
+		    rec = 1.f / xj;
+		    if (cnorm[j] > (bignum - xmax) * rec) {
 
-/*           If | C | < SMINI, use C = SMINI */
+/*                    Scale x by 1/(2*abs(x(j))). */
 
-	    if (cnorm < smini) {
-		csr = smini;
-		csi = 0.;
-		cnorm = smini;
-		*info = 1;
-	    }
+			rec *= .5f;
+			csscal_(n, &rec, &x[1], &c__1);
+			*scale *= rec;
+		    }
+		} else if (xj * cnorm[j] > bignum - xmax) {
 
-/*           Check scaling for  X = B / C */
+/*                 Scale x by 1/2. */
 
-	    bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 <<
-		    1) + 1], abs(d__2));
-	    if (cnorm < 1. && bnorm > 1.) {
-		if (bnorm > bignum * cnorm) {
-		    *scale = 1. / bnorm;
+		    csscal_(n, &c_b1794, &x[1], &c__1);
+		    *scale *= .5f;
 		}
-	    }
 
-/*           Compute X */
+		if (upper) {
+		    if (j > 1) {
 
-	    d__1 = *scale * b[b_dim1 + 1];
-	    d__2 = *scale * b[(b_dim1 << 1) + 1];
-	    dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1)
-		     + 1]);
-	    *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 <<
-		    1) + 1], abs(d__2));
-	}
+/*
+                      Compute the update
+                         x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
+*/
 
-    } else {
+			i__3 = j - 1;
+			i__4 = j;
+			q__2.r = -x[i__4].r, q__2.i = -x[i__4].i;
+			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
+			caxpy_(&i__3, &q__1, &a[j * a_dim1 + 1], &c__1, &x[1],
+				 &c__1);
+			i__3 = j - 1;
+			i__ = icamax_(&i__3, &x[1], &c__1);
+			i__3 = i__;
+			xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 =
+				r_imag(&x[i__]), dabs(r__2));
+		    }
+		} else {
+		    if (j < *n) {
 
 /*
-          2x2 System
+                      Compute the update
+                         x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
+*/
 
-          Compute the real part of  C = ca A - w D  (or  ca A' - w D )
+			i__3 = *n - j;
+			i__4 = j;
+			q__2.r = -x[i__4].r, q__2.i = -x[i__4].i;
+			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
+			caxpy_(&i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, &
+				x[j + 1], &c__1);
+			i__3 = *n - j;
+			i__ = j + icamax_(&i__3, &x[j + 1], &c__1);
+			i__3 = i__;
+			xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 =
+				r_imag(&x[i__]), dabs(r__2));
+		    }
+		}
+/* L110: */
+	    }
+
+	} else if (lsame_(trans, "T")) {
+
+/*           Solve A**T * x = b */
+
+	    i__2 = jlast;
+	    i__1 = jinc;
+	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+
+/*
+                Compute x(j) = b(j) - sum A(k,j)*x(k).
+                                      k<>j
 */
 
-	cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1;
-	cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2;
-	if (*ltrans) {
-	    cr[2] = *ca * a[a_dim1 + 2];
-	    cr[1] = *ca * a[(a_dim1 << 1) + 1];
-	} else {
-	    cr[1] = *ca * a[a_dim1 + 2];
-	    cr[2] = *ca * a[(a_dim1 << 1) + 1];
-	}
+		i__3 = j;
+		xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]),
+			dabs(r__2));
+		uscal.r = tscal, uscal.i = 0.f;
+		rec = 1.f / dmax(xmax,1.f);
+		if (cnorm[j] > (bignum - xj) * rec) {
 
-	if (*nw == 1) {
+/*                 If x(j) could overflow, scale x by 1/(2*XMAX). */
+
+		    rec *= .5f;
+		    if (nounit) {
+			i__3 = j + j * a_dim1;
+			q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3]
+				.i;
+			tjjs.r = q__1.r, tjjs.i = q__1.i;
+		    } else {
+			tjjs.r = tscal, tjjs.i = 0.f;
+		    }
+		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			     dabs(r__2));
+		    if (tjj > 1.f) {
 
 /*
-             Real 2x2 system  (w is real)
+                         Divide by A(j,j) when scaling x if A(j,j) > 1.
 
-             Find the largest element in C
+   Computing MIN
 */
+			r__1 = 1.f, r__2 = rec * tjj;
+			rec = dmin(r__1,r__2);
+			cladiv_(&q__1, &uscal, &tjjs);
+			uscal.r = q__1.r, uscal.i = q__1.i;
+		    }
+		    if (rec < 1.f) {
+			csscal_(n, &rec, &x[1], &c__1);
+			*scale *= rec;
+			xmax *= rec;
+		    }
+		}
 
-	    cmax = 0.;
-	    icmax = 0;
+		csumj.r = 0.f, csumj.i = 0.f;
+		if (uscal.r == 1.f && uscal.i == 0.f) {
 
-	    for (j = 1; j <= 4; ++j) {
-		if ((d__1 = crv[j - 1], abs(d__1)) > cmax) {
-		    cmax = (d__1 = crv[j - 1], abs(d__1));
-		    icmax = j;
-		}
-/* L10: */
-	    }
+/*
+                   If the scaling needed for A in the dot product is 1,
+                   call CDOTU to perform the dot product.
+*/
 
-/*           If norm(C) < SMINI, use SMINI*identity. */
+		    if (upper) {
+			i__3 = j - 1;
+			cdotu_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1],
+				 &c__1);
+			csumj.r = q__1.r, csumj.i = q__1.i;
+		    } else if (j < *n) {
+			i__3 = *n - j;
+			cdotu_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, &
+				x[j + 1], &c__1);
+			csumj.r = q__1.r, csumj.i = q__1.i;
+		    }
+		} else {
 
-	    if (cmax < smini) {
-/* Computing MAX */
-		d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[
-			b_dim1 + 2], abs(d__2));
-		bnorm = max(d__3,d__4);
-		if (smini < 1. && bnorm > 1.) {
-		    if (bnorm > bignum * smini) {
-			*scale = 1. / bnorm;
+/*                 Otherwise, use in-line code for the dot product. */
+
+		    if (upper) {
+			i__3 = j - 1;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * a_dim1;
+			    q__3.r = a[i__4].r * uscal.r - a[i__4].i *
+				    uscal.i, q__3.i = a[i__4].r * uscal.i + a[
+				    i__4].i * uscal.r;
+			    i__5 = i__;
+			    q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i,
+				    q__2.i = q__3.r * x[i__5].i + q__3.i * x[
+				    i__5].r;
+			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i +
+				    q__2.i;
+			    csumj.r = q__1.r, csumj.i = q__1.i;
+/* L120: */
+			}
+		    } else if (j < *n) {
+			i__3 = *n;
+			for (i__ = j + 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * a_dim1;
+			    q__3.r = a[i__4].r * uscal.r - a[i__4].i *
+				    uscal.i, q__3.i = a[i__4].r * uscal.i + a[
+				    i__4].i * uscal.r;
+			    i__5 = i__;
+			    q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i,
+				    q__2.i = q__3.r * x[i__5].i + q__3.i * x[
+				    i__5].r;
+			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i +
+				    q__2.i;
+			    csumj.r = q__1.r, csumj.i = q__1.i;
+/* L130: */
+			}
 		    }
 		}
-		temp = *scale / smini;
-		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
-		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
-		*xnorm = temp * bnorm;
-		*info = 1;
-		return 0;
-	    }
 
-/*           Gaussian elimination with complete pivoting. */
+		q__1.r = tscal, q__1.i = 0.f;
+		if (uscal.r == q__1.r && uscal.i == q__1.i) {
 
-	    ur11 = crv[icmax - 1];
-	    cr21 = crv[ipivot[(icmax << 2) - 3] - 1];
-	    ur12 = crv[ipivot[(icmax << 2) - 2] - 1];
-	    cr22 = crv[ipivot[(icmax << 2) - 1] - 1];
-	    ur11r = 1. / ur11;
-	    lr21 = ur11r * cr21;
-	    ur22 = cr22 - ur12 * lr21;
+/*
+                   Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+                   was not used to scale the dotproduct.
+*/
 
-/*           If smaller pivot < SMINI, use SMINI */
+		    i__3 = j;
+		    i__4 = j;
+		    q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i -
+			    csumj.i;
+		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    i__3 = j;
+		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
+			    ), dabs(r__2));
+		    if (nounit) {
+			i__3 = j + j * a_dim1;
+			q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3]
+				.i;
+			tjjs.r = q__1.r, tjjs.i = q__1.i;
+		    } else {
+			tjjs.r = tscal, tjjs.i = 0.f;
+			if (tscal == 1.f) {
+			    goto L145;
+			}
+		    }
 
-	    if (abs(ur22) < smini) {
-		ur22 = smini;
-		*info = 1;
-	    }
-	    if (rswap[icmax - 1]) {
-		br1 = b[b_dim1 + 2];
-		br2 = b[b_dim1 + 1];
-	    } else {
-		br1 = b[b_dim1 + 1];
-		br2 = b[b_dim1 + 2];
-	    }
-	    br2 -= lr21 * br1;
-/* Computing MAX */
-	    d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2);
-	    bbnd = max(d__2,d__3);
-	    if (bbnd > 1. && abs(ur22) < 1.) {
-		if (bbnd >= bignum * abs(ur22)) {
-		    *scale = 1. / bbnd;
-		}
-	    }
+/*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */
 
-	    xr2 = br2 * *scale / ur22;
-	    xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12);
-	    if (zswap[icmax - 1]) {
-		x[x_dim1 + 1] = xr2;
-		x[x_dim1 + 2] = xr1;
-	    } else {
-		x[x_dim1 + 1] = xr1;
-		x[x_dim1 + 2] = xr2;
-	    }
-/* Computing MAX */
-	    d__1 = abs(xr1), d__2 = abs(xr2);
-	    *xnorm = max(d__1,d__2);
+		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			     dabs(r__2));
+		    if (tjj > smlnum) {
 
-/*           Further scaling if  norm(A) norm(X) > overflow */
+/*                       abs(A(j,j)) > SMLNUM: */
 
-	    if (*xnorm > 1. && cmax > 1.) {
-		if (*xnorm > bignum / cmax) {
-		    temp = cmax / bignum;
-		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
-		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
-		    *xnorm = temp * *xnorm;
-		    *scale = temp * *scale;
+			if (tjj < 1.f) {
+			    if (xj > tjj * bignum) {
+
+/*                             Scale X by 1/abs(x(j)). */
+
+				rec = 1.f / xj;
+				csscal_(n, &rec, &x[1], &c__1);
+				*scale *= rec;
+				xmax *= rec;
+			    }
+			}
+			i__3 = j;
+			cladiv_(&q__1, &x[j], &tjjs);
+			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    } else if (tjj > 0.f) {
+
+/*                       0 < abs(A(j,j)) <= SMLNUM: */
+
+			if (xj > tjj * bignum) {
+
+/*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
+
+			    rec = tjj * bignum / xj;
+			    csscal_(n, &rec, &x[1], &c__1);
+			    *scale *= rec;
+			    xmax *= rec;
+			}
+			i__3 = j;
+			cladiv_(&q__1, &x[j], &tjjs);
+			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    } else {
+
+/*
+                         A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+                         scale = 0 and compute a solution to A**T *x = 0.
+*/
+
+			i__3 = *n;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__;
+			    x[i__4].r = 0.f, x[i__4].i = 0.f;
+/* L140: */
+			}
+			i__3 = j;
+			x[i__3].r = 1.f, x[i__3].i = 0.f;
+			*scale = 0.f;
+			xmax = 0.f;
+		    }
+L145:
+		    ;
+		} else {
+
+/*
+                   Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+                   product has already been divided by 1/A(j,j).
+*/
+
+		    i__3 = j;
+		    cladiv_(&q__2, &x[j], &tjjs);
+		    q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i;
+		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
 		}
+/* Computing MAX */
+		i__3 = j;
+		r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 =
+			r_imag(&x[j]), dabs(r__2));
+		xmax = dmax(r__3,r__4);
+/* L150: */
 	    }
+
 	} else {
 
-/*
-             Complex 2x2 system  (w is complex)
+/*           Solve A**H * x = b */
 
-             Find the largest element in C
+	    i__1 = jlast;
+	    i__2 = jinc;
+	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*
+                Compute x(j) = b(j) - sum A(k,j)*x(k).
+                                      k<>j
 */
 
-	    ci[0] = -(*wi) * *d1;
-	    ci[1] = 0.;
-	    ci[2] = 0.;
-	    ci[3] = -(*wi) * *d2;
-	    cmax = 0.;
-	    icmax = 0;
+		i__3 = j;
+		xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]),
+			dabs(r__2));
+		uscal.r = tscal, uscal.i = 0.f;
+		rec = 1.f / dmax(xmax,1.f);
+		if (cnorm[j] > (bignum - xj) * rec) {
 
-	    for (j = 1; j <= 4; ++j) {
-		if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs(
-			d__2)) > cmax) {
-		    cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1]
-			    , abs(d__2));
-		    icmax = j;
-		}
-/* L20: */
-	    }
+/*                 If x(j) could overflow, scale x by 1/(2*XMAX). */
 
-/*           If norm(C) < SMINI, use SMINI*identity. */
+		    rec *= .5f;
+		    if (nounit) {
+			r_cnjg(&q__2, &a[j + j * a_dim1]);
+			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
+			tjjs.r = q__1.r, tjjs.i = q__1.i;
+		    } else {
+			tjjs.r = tscal, tjjs.i = 0.f;
+		    }
+		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			     dabs(r__2));
+		    if (tjj > 1.f) {
 
-	    if (cmax < smini) {
-/* Computing MAX */
-		d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1
-			<< 1) + 1], abs(d__2)), d__6 = (d__3 = b[b_dim1 + 2],
-			abs(d__3)) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4));
-		bnorm = max(d__5,d__6);
-		if (smini < 1. && bnorm > 1.) {
-		    if (bnorm > bignum * smini) {
-			*scale = 1. / bnorm;
+/*
+                         Divide by A(j,j) when scaling x if A(j,j) > 1.
+
+   Computing MIN
+*/
+			r__1 = 1.f, r__2 = rec * tjj;
+			rec = dmin(r__1,r__2);
+			cladiv_(&q__1, &uscal, &tjjs);
+			uscal.r = q__1.r, uscal.i = q__1.i;
+		    }
+		    if (rec < 1.f) {
+			csscal_(n, &rec, &x[1], &c__1);
+			*scale *= rec;
+			xmax *= rec;
 		    }
 		}
-		temp = *scale / smini;
-		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
-		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
-		x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1];
-		x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2];
-		*xnorm = temp * bnorm;
-		*info = 1;
-		return 0;
-	    }
-
-/*           Gaussian elimination with complete pivoting. */
 
-	    ur11 = crv[icmax - 1];
-	    ui11 = civ[icmax - 1];
-	    cr21 = crv[ipivot[(icmax << 2) - 3] - 1];
-	    ci21 = civ[ipivot[(icmax << 2) - 3] - 1];
-	    ur12 = crv[ipivot[(icmax << 2) - 2] - 1];
-	    ui12 = civ[ipivot[(icmax << 2) - 2] - 1];
-	    cr22 = crv[ipivot[(icmax << 2) - 1] - 1];
-	    ci22 = civ[ipivot[(icmax << 2) - 1] - 1];
-	    if (icmax == 1 || icmax == 4) {
+		csumj.r = 0.f, csumj.i = 0.f;
+		if (uscal.r == 1.f && uscal.i == 0.f) {
 
-/*              Code when off-diagonals of pivoted C are real */
+/*
+                   If the scaling needed for A in the dot product is 1,
+                   call CDOTC to perform the dot product.
+*/
 
-		if (abs(ur11) > abs(ui11)) {
-		    temp = ui11 / ur11;
-/* Computing 2nd power */
-		    d__1 = temp;
-		    ur11r = 1. / (ur11 * (d__1 * d__1 + 1.));
-		    ui11r = -temp * ur11r;
+		    if (upper) {
+			i__3 = j - 1;
+			cdotc_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1],
+				 &c__1);
+			csumj.r = q__1.r, csumj.i = q__1.i;
+		    } else if (j < *n) {
+			i__3 = *n - j;
+			cdotc_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, &
+				x[j + 1], &c__1);
+			csumj.r = q__1.r, csumj.i = q__1.i;
+		    }
 		} else {
-		    temp = ur11 / ui11;
-/* Computing 2nd power */
-		    d__1 = temp;
-		    ui11r = -1. / (ui11 * (d__1 * d__1 + 1.));
-		    ur11r = -temp * ui11r;
+
+/*                 Otherwise, use in-line code for the dot product. */
+
+		    if (upper) {
+			i__3 = j - 1;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    r_cnjg(&q__4, &a[i__ + j * a_dim1]);
+			    q__3.r = q__4.r * uscal.r - q__4.i * uscal.i,
+				    q__3.i = q__4.r * uscal.i + q__4.i *
+				    uscal.r;
+			    i__4 = i__;
+			    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i,
+				    q__2.i = q__3.r * x[i__4].i + q__3.i * x[
+				    i__4].r;
+			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i +
+				    q__2.i;
+			    csumj.r = q__1.r, csumj.i = q__1.i;
+/* L160: */
+			}
+		    } else if (j < *n) {
+			i__3 = *n;
+			for (i__ = j + 1; i__ <= i__3; ++i__) {
+			    r_cnjg(&q__4, &a[i__ + j * a_dim1]);
+			    q__3.r = q__4.r * uscal.r - q__4.i * uscal.i,
+				    q__3.i = q__4.r * uscal.i + q__4.i *
+				    uscal.r;
+			    i__4 = i__;
+			    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i,
+				    q__2.i = q__3.r * x[i__4].i + q__3.i * x[
+				    i__4].r;
+			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i +
+				    q__2.i;
+			    csumj.r = q__1.r, csumj.i = q__1.i;
+/* L170: */
+			}
+		    }
 		}
-		lr21 = cr21 * ur11r;
-		li21 = cr21 * ui11r;
-		ur12s = ur12 * ur11r;
-		ui12s = ur12 * ui11r;
-		ur22 = cr22 - ur12 * lr21;
-		ui22 = ci22 - ur12 * li21;
-	    } else {
 
-/*              Code when diagonals of pivoted C are real */
+		q__1.r = tscal, q__1.i = 0.f;
+		if (uscal.r == q__1.r && uscal.i == q__1.i) {
 
-		ur11r = 1. / ur11;
-		ui11r = 0.;
-		lr21 = cr21 * ur11r;
-		li21 = ci21 * ur11r;
-		ur12s = ur12 * ur11r;
-		ui12s = ui12 * ur11r;
-		ur22 = cr22 - ur12 * lr21 + ui12 * li21;
-		ui22 = -ur12 * li21 - ui12 * lr21;
-	    }
-	    u22abs = abs(ur22) + abs(ui22);
+/*
+                   Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+                   was not used to scale the dotproduct.
+*/
 
-/*           If smaller pivot < SMINI, use SMINI */
+		    i__3 = j;
+		    i__4 = j;
+		    q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i -
+			    csumj.i;
+		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    i__3 = j;
+		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
+			    ), dabs(r__2));
+		    if (nounit) {
+			r_cnjg(&q__2, &a[j + j * a_dim1]);
+			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
+			tjjs.r = q__1.r, tjjs.i = q__1.i;
+		    } else {
+			tjjs.r = tscal, tjjs.i = 0.f;
+			if (tscal == 1.f) {
+			    goto L185;
+			}
+		    }
 
-	    if (u22abs < smini) {
-		ur22 = smini;
-		ui22 = 0.;
-		*info = 1;
-	    }
-	    if (rswap[icmax - 1]) {
-		br2 = b[b_dim1 + 1];
-		br1 = b[b_dim1 + 2];
-		bi2 = b[(b_dim1 << 1) + 1];
-		bi1 = b[(b_dim1 << 1) + 2];
-	    } else {
-		br1 = b[b_dim1 + 1];
-		br2 = b[b_dim1 + 2];
-		bi1 = b[(b_dim1 << 1) + 1];
-		bi2 = b[(b_dim1 << 1) + 2];
-	    }
-	    br2 = br2 - lr21 * br1 + li21 * bi1;
-	    bi2 = bi2 - li21 * br1 - lr21 * bi1;
-/* Computing MAX */
-	    d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r))
-		    ), d__2 = abs(br2) + abs(bi2);
-	    bbnd = max(d__1,d__2);
-	    if (bbnd > 1. && u22abs < 1.) {
-		if (bbnd >= bignum * u22abs) {
-		    *scale = 1. / bbnd;
-		    br1 = *scale * br1;
-		    bi1 = *scale * bi1;
-		    br2 = *scale * br2;
-		    bi2 = *scale * bi2;
-		}
-	    }
+/*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */
 
-	    dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2);
-	    xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2;
-	    xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2;
-	    if (zswap[icmax - 1]) {
-		x[x_dim1 + 1] = xr2;
-		x[x_dim1 + 2] = xr1;
-		x[(x_dim1 << 1) + 1] = xi2;
-		x[(x_dim1 << 1) + 2] = xi1;
-	    } else {
-		x[x_dim1 + 1] = xr1;
-		x[x_dim1 + 2] = xr2;
-		x[(x_dim1 << 1) + 1] = xi1;
-		x[(x_dim1 << 1) + 2] = xi2;
-	    }
-/* Computing MAX */
-	    d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2);
-	    *xnorm = max(d__1,d__2);
+		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			     dabs(r__2));
+		    if (tjj > smlnum) {
 
-/*           Further scaling if  norm(A) norm(X) > overflow */
+/*                       abs(A(j,j)) > SMLNUM: */
 
-	    if (*xnorm > 1. && cmax > 1.) {
-		if (*xnorm > bignum / cmax) {
-		    temp = cmax / bignum;
-		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
-		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
-		    x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1];
-		    x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2];
-		    *xnorm = temp * *xnorm;
-		    *scale = temp * *scale;
+			if (tjj < 1.f) {
+			    if (xj > tjj * bignum) {
+
+/*                             Scale X by 1/abs(x(j)). */
+
+				rec = 1.f / xj;
+				csscal_(n, &rec, &x[1], &c__1);
+				*scale *= rec;
+				xmax *= rec;
+			    }
+			}
+			i__3 = j;
+			cladiv_(&q__1, &x[j], &tjjs);
+			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    } else if (tjj > 0.f) {
+
+/*                       0 < abs(A(j,j)) <= SMLNUM: */
+
+			if (xj > tjj * bignum) {
+
+/*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
+
+			    rec = tjj * bignum / xj;
+			    csscal_(n, &rec, &x[1], &c__1);
+			    *scale *= rec;
+			    xmax *= rec;
+			}
+			i__3 = j;
+			cladiv_(&q__1, &x[j], &tjjs);
+			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    } else {
+
+/*
+                         A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+                         scale = 0 and compute a solution to A**H *x = 0.
+*/
+
+			i__3 = *n;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__;
+			    x[i__4].r = 0.f, x[i__4].i = 0.f;
+/* L180: */
+			}
+			i__3 = j;
+			x[i__3].r = 1.f, x[i__3].i = 0.f;
+			*scale = 0.f;
+			xmax = 0.f;
+		    }
+L185:
+		    ;
+		} else {
+
+/*
+                   Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+                   product has already been divided by 1/A(j,j).
+*/
+
+		    i__3 = j;
+		    cladiv_(&q__2, &x[j], &tjjs);
+		    q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i;
+		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
 		}
+/* Computing MAX */
+		i__3 = j;
+		r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 =
+			r_imag(&x[j]), dabs(r__2));
+		xmax = dmax(r__3,r__4);
+/* L190: */
 	    }
 	}
+	*scale /= tscal;
     }
 
-    return 0;
+/*     Scale the column norms by 1/TSCAL for return. */
 
-/*     End of DLALN2 */
+    if (tscal != 1.f) {
+	r__1 = 1.f / tscal;
+	sscal_(n, &r__1, &cnorm[1], &c__1);
+    }
 
-} /* dlaln2_ */
+    return 0;
 
-#undef crv
-#undef civ
-#undef cr
-#undef ci
+/*     End of CLATRS */
 
+} /* clatrs_ */
 
-/* Subroutine */ int dlals0_(integer *icompq, integer *nl, integer *nr,
-	integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal
-	*bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol,
-	integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *
-	poles, doublereal *difl, doublereal *difr, doublereal *z__, integer *
-	k, doublereal *c__, doublereal *s, doublereal *work, integer *info)
+/* Subroutine */ int clauu2_(char *uplo, integer *n, complex *a, integer *lda,
+	 integer *info)
 {
     /* System generated locals */
-    integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset,
-	    difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1,
-	    poles_offset, i__1, i__2;
-    doublereal d__1;
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1;
+    complex q__1;
 
     /* Local variables */
-    static integer i__, j, m, n;
-    static doublereal dj;
-    static integer nlp1;
-    static doublereal temp;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+    static integer i__;
+    static real aii;
+    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
+	    *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *);
+    static logical upper;
+    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *),
+	    csscal_(integer *, real *, complex *, integer *), xerbla_(char *,
 	    integer *);
-    static doublereal diflj, difrj, dsigj;
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *), dcopy_(integer *,
-	    doublereal *, integer *, doublereal *, integer *);
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dlacpy_(char *, integer *, integer
-	    *, doublereal *, integer *, doublereal *, integer *),
-	    xerbla_(char *, integer *);
-    static doublereal dsigjp;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
+    -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       December 1, 1999
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DLALS0 applies back the multiplying factors of either the left or the
-    right singular vector matrix of a diagonal matrix appended by a row
-    to the right hand side matrix B in solving the least squares problem
-    using the divide-and-conquer SVD approach.
+    CLAUU2 computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
 
-    For the left singular vector matrix, three types of orthogonal
-    matrices are involved:
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
 
-    (1L) Givens rotations: the number of such rotations is GIVPTR; the
-         pairs of columns/rows they were applied to are stored in GIVCOL;
-         and the C- and S-values of these rotations are stored in GIVNUM.
+    This is the unblocked form of the algorithm, calling Level 2 BLAS.
 
-    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
-         row, and for J=2:N, PERM(J)-th row of B is to be moved to the
-         J-th row.
+    Arguments
+    =========
 
-    (3L) The left singular vector matrix of the remaining matrix.
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
 
-    For the right singular vector matrix, four types of orthogonal
-    matrices are involved:
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
 
-    (1R) The right singular vector matrix of the remaining matrix.
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
 
-    (2R) If SQRE = 1, one extra Givens rotation to generate the right
-         null space.
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-    (3R) The inverse transformation of (2L).
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
 
-    (4R) The inverse transformation of (1L).
+    =====================================================================
 
-    Arguments
-    =========
 
-    ICOMPQ (input) INTEGER
-           Specifies whether singular vectors are to be computed in
-           factored form:
-           = 0: Left singular vector matrix.
-           = 1: Right singular vector matrix.
+       Test the input parameters.
+*/
 
-    NL     (input) INTEGER
-           The row dimension of the upper block. NL >= 1.
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
-    NR     (input) INTEGER
-           The row dimension of the lower block. NR >= 1.
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLAUU2", &i__1);
+	return 0;
+    }
 
-    SQRE   (input) INTEGER
-           = 0: the lower block is an NR-by-NR square matrix.
-           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+/*     Quick return if possible */
 
-           The bidiagonal matrix has row dimension N = NL + NR + 1,
-           and column dimension M = N + SQRE.
+    if (*n == 0) {
+	return 0;
+    }
 
-    NRHS   (input) INTEGER
-           The number of columns of B and BX. NRHS must be at least 1.
+    if (upper) {
 
-    B      (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS )
-           On input, B contains the right hand sides of the least
-           squares problem in rows 1 through M. On output, B contains
-           the solution X in rows 1 through N.
+/*        Compute the product U * U'. */
 
-    LDB    (input) INTEGER
-           The leading dimension of B. LDB must be at least
-           max(1,MAX( M, N ) ).
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    aii = a[i__2].r;
+	    if (i__ < *n) {
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = *n - i__;
+		cdotc_(&q__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &a[
+			i__ + (i__ + 1) * a_dim1], lda);
+		r__1 = aii * aii + q__1.r;
+		a[i__2].r = r__1, a[i__2].i = 0.f;
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		q__1.r = aii, q__1.i = 0.f;
+		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			q__1, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    } else {
+		csscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
+	    }
+/* L10: */
+	}
 
-    BX     (workspace) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
+    } else {
 
-    LDBX   (input) INTEGER
-           The leading dimension of BX.
+/*        Compute the product L' * L. */
 
-    PERM   (input) INTEGER array, dimension ( N )
-           The permutations (from deflation and sorting) applied
-           to the two blocks.
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    aii = a[i__2].r;
+	    if (i__ < *n) {
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = *n - i__;
+		cdotc_(&q__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[
+			i__ + 1 + i__ * a_dim1], &c__1);
+		r__1 = aii * aii + q__1.r;
+		a[i__2].r = r__1, a[i__2].i = 0.f;
+		i__2 = i__ - 1;
+		clacgv_(&i__2, &a[i__ + a_dim1], lda);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		q__1.r = aii, q__1.i = 0.f;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			q__1, &a[i__ + a_dim1], lda);
+		i__2 = i__ - 1;
+		clacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    } else {
+		csscal_(&i__, &aii, &a[i__ + a_dim1], lda);
+	    }
+/* L20: */
+	}
+    }
 
-    GIVPTR (input) INTEGER
-           The number of Givens rotations which took place in this
-           subproblem.
+    return 0;
 
-    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
-           Each pair of numbers indicates a pair of rows/columns
-           involved in a Givens rotation.
+/*     End of CLAUU2 */
 
-    LDGCOL (input) INTEGER
-           The leading dimension of GIVCOL, must be at least N.
+} /* clauu2_ */
 
-    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
-           Each number indicates the C or S value used in the
-           corresponding Givens rotation.
+/* Subroutine */ int clauum_(char *uplo, integer *n, complex *a, integer *lda,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
-    LDGNUM (input) INTEGER
-           The leading dimension of arrays DIFR, POLES and
-           GIVNUM, must be at least K.
+    /* Local variables */
+    static integer i__, ib, nb;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *), cherk_(char *,
+	    char *, integer *, integer *, real *, complex *, integer *, real *
+	    , complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *);
+    static logical upper;
+    extern /* Subroutine */ int clauu2_(char *, integer *, complex *, integer
+	    *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
 
-    POLES  (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
-           On entry, POLES(1:K, 1) contains the new singular
-           values obtained from solving the secular equation, and
-           POLES(1:K, 2) is an array containing the poles in the secular
-           equation.
 
-    DIFL   (input) DOUBLE PRECISION array, dimension ( K ).
-           On entry, DIFL(I) is the distance between I-th updated
-           (undeflated) singular value and the I-th (undeflated) old
-           singular value.
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-    DIFR   (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ).
-           On entry, DIFR(I, 1) contains the distances between I-th
-           updated (undeflated) singular value and the I+1-th
-           (undeflated) old singular value. And DIFR(I, 2) is the
-           normalizing factor for the I-th right singular vector.
 
-    Z      (input) DOUBLE PRECISION array, dimension ( K )
-           Contain the components of the deflation-adjusted updating row
-           vector.
+    Purpose
+    =======
 
-    K      (input) INTEGER
-           Contains the dimension of the non-deflated matrix,
-           This is the order of the related secular equation. 1 <= K <=N.
+    CLAUUM computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
 
-    C      (input) DOUBLE PRECISION
-           C contains garbage if SQRE =0 and the C-value of a Givens
-           rotation related to the right null space if SQRE = 1.
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
 
-    S      (input) DOUBLE PRECISION
-           S contains garbage if SQRE =0 and the S-value of a Givens
-           rotation related to the right null space if SQRE = 1.
+    This is the blocked form of the algorithm, calling Level 3 BLAS.
 
-    WORK   (workspace) DOUBLE PRECISION array, dimension ( K )
+    Arguments
+    =========
 
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
 
-    Further Details
-    ===============
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
 
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
 
     =====================================================================
 
@@ -14612,435 +17941,402 @@ L150:
 */
 
     /* Parameter adjustments */
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    bx_dim1 = *ldbx;
-    bx_offset = 1 + bx_dim1;
-    bx -= bx_offset;
-    --perm;
-    givcol_dim1 = *ldgcol;
-    givcol_offset = 1 + givcol_dim1;
-    givcol -= givcol_offset;
-    difr_dim1 = *ldgnum;
-    difr_offset = 1 + difr_dim1;
-    difr -= difr_offset;
-    poles_dim1 = *ldgnum;
-    poles_offset = 1 + poles_dim1;
-    poles -= poles_offset;
-    givnum_dim1 = *ldgnum;
-    givnum_offset = 1 + givnum_dim1;
-    givnum -= givnum_offset;
-    --difl;
-    --z__;
-    --work;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
     /* Function Body */
     *info = 0;
-
-    if (*icompq < 0 || *icompq > 1) {
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
-    } else if (*nl < 1) {
+    } else if (*n < 0) {
 	*info = -2;
-    } else if (*nr < 1) {
-	*info = -3;
-    } else if (*sqre < 0 || *sqre > 1) {
+    } else if (*lda < max(1,*n)) {
 	*info = -4;
     }
-
-    n = *nl + *nr + 1;
-
-    if (*nrhs < 1) {
-	*info = -5;
-    } else if (*ldb < n) {
-	*info = -7;
-    } else if (*ldbx < n) {
-	*info = -9;
-    } else if (*givptr < 0) {
-	*info = -11;
-    } else if (*ldgcol < n) {
-	*info = -13;
-    } else if (*ldgnum < n) {
-	*info = -15;
-    } else if (*k < 1) {
-	*info = -20;
-    }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLALS0", &i__1);
+	xerbla_("CLAUUM", &i__1);
 	return 0;
     }
 
-    m = n + *sqre;
-    nlp1 = *nl + 1;
-
-    if (*icompq == 0) {
-
-/*
-          Apply back orthogonal transformations from the left.
-
-          Step (1L): apply back the Givens rotations performed.
-*/
-
-	i__1 = *givptr;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
-		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ +
-		    (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]);
-/* L10: */
-	}
+/*     Quick return if possible */
 
-/*        Step (2L): permute rows of B. */
+    if (*n == 0) {
+	return 0;
+    }
 
-	dcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
-	i__1 = n;
-	for (i__ = 2; i__ <= i__1; ++i__) {
-	    dcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1],
-		    ldbx);
-/* L20: */
-	}
+/*     Determine the block size for this environment. */
 
-/*
-          Step (3L): apply the inverse of the left singular vector
-          matrix to BX.
-*/
+    nb = ilaenv_(&c__1, "CLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
 
-	if (*k == 1) {
-	    dcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
-	    if (z__[1] < 0.) {
-		dscal_(nrhs, &c_b151, &b[b_offset], ldb);
-	    }
-	} else {
-	    i__1 = *k;
-	    for (j = 1; j <= i__1; ++j) {
-		diflj = difl[j];
-		dj = poles[j + poles_dim1];
-		dsigj = -poles[j + (poles_dim1 << 1)];
-		if (j < *k) {
-		    difrj = -difr[j + difr_dim1];
-		    dsigjp = -poles[j + 1 + (poles_dim1 << 1)];
-		}
-		if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) {
-		    work[j] = 0.;
-		} else {
-		    work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj /
-			     (poles[j + (poles_dim1 << 1)] + dj);
-		}
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] ==
-			    0.) {
-			work[i__] = 0.;
-		    } else {
-			work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__]
-				/ (dlamc3_(&poles[i__ + (poles_dim1 << 1)], &
-				dsigj) - diflj) / (poles[i__ + (poles_dim1 <<
-				1)] + dj);
-		    }
-/* L30: */
-		}
-		i__2 = *k;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] ==
-			    0.) {
-			work[i__] = 0.;
-		    } else {
-			work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__]
-				/ (dlamc3_(&poles[i__ + (poles_dim1 << 1)], &
-				dsigjp) + difrj) / (poles[i__ + (poles_dim1 <<
-				 1)] + dj);
-		    }
-/* L40: */
-		}
-		work[1] = -1.;
-		temp = dnrm2_(k, &work[1], &c__1);
-		dgemv_("T", k, nrhs, &c_b15, &bx[bx_offset], ldbx, &work[1], &
-			c__1, &c_b29, &b[j + b_dim1], ldb);
-		dlascl_("G", &c__0, &c__0, &temp, &c_b15, &c__1, nrhs, &b[j +
-			b_dim1], ldb, info);
-/* L50: */
-	    }
-	}
+    if (nb <= 1 || nb >= *n) {
 
-/*        Move the deflated rows of BX to B also. */
+/*        Use unblocked code */
 
-	if (*k < max(m,n)) {
-	    i__1 = n - *k;
-	    dlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1
-		    + b_dim1], ldb);
-	}
+	clauu2_(uplo, n, &a[a_offset], lda, info);
     } else {
 
-/*
-          Apply back the right orthogonal transformations.
+/*        Use blocked code */
 
-          Step (1R): apply back the new right singular vector matrix
-          to B.
-*/
+	if (upper) {
 
-	if (*k == 1) {
-	    dcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
-	} else {
-	    i__1 = *k;
-	    for (j = 1; j <= i__1; ++j) {
-		dsigj = poles[j + (poles_dim1 << 1)];
-		if (z__[j] == 0.) {
-		    work[j] = 0.;
-		} else {
-		    work[j] = -z__[j] / difl[j] / (dsigj + poles[j +
-			    poles_dim1]) / difr[j + (difr_dim1 << 1)];
-		}
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    if (z__[j] == 0.) {
-			work[i__] = 0.;
-		    } else {
-			d__1 = -poles[i__ + 1 + (poles_dim1 << 1)];
-			work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[
-				i__ + difr_dim1]) / (dsigj + poles[i__ +
-				poles_dim1]) / difr[i__ + (difr_dim1 << 1)];
-		    }
-/* L60: */
-		}
-		i__2 = *k;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    if (z__[j] == 0.) {
-			work[i__] = 0.;
-		    } else {
-			d__1 = -poles[i__ + (poles_dim1 << 1)];
-			work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[
-				i__]) / (dsigj + poles[i__ + poles_dim1]) /
-				difr[i__ + (difr_dim1 << 1)];
-		    }
-/* L70: */
+/*           Compute the product U * U'. */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", &
+			i__3, &ib, &c_b56, &a[i__ + i__ * a_dim1], lda, &a[
+			i__ * a_dim1 + 1], lda);
+		clauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    cgemm_("No transpose", "Conjugate transpose", &i__3, &ib,
+			    &i__4, &c_b56, &a[(i__ + ib) * a_dim1 + 1], lda, &
+			    a[i__ + (i__ + ib) * a_dim1], lda, &c_b56, &a[i__
+			    * a_dim1 + 1], lda);
+		    i__3 = *n - i__ - ib + 1;
+		    cherk_("Upper", "No transpose", &ib, &i__3, &c_b871, &a[
+			    i__ + (i__ + ib) * a_dim1], lda, &c_b871, &a[i__
+			    + i__ * a_dim1], lda);
 		}
-		dgemv_("T", k, nrhs, &c_b15, &b[b_offset], ldb, &work[1], &
-			c__1, &c_b29, &bx[j + bx_dim1], ldbx);
-/* L80: */
+/* L10: */
 	    }
-	}
-
-/*
-          Step (2R): if SQRE = 1, apply back the rotation that is
-          related to the right null space of the subproblem.
-*/
-
-	if (*sqre == 1) {
-	    dcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
-	    drot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__,
-		    s);
-	}
-	if (*k < max(m,n)) {
-	    i__1 = n - *k;
-	    dlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 +
-		    bx_dim1], ldbx);
-	}
-
-/*        Step (3R): permute rows of B. */
-
-	dcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
-	if (*sqre == 1) {
-	    dcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
-	}
-	i__1 = n;
-	for (i__ = 2; i__ <= i__1; ++i__) {
-	    dcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1],
-		    ldb);
-/* L90: */
-	}
+	} else {
 
-/*        Step (4R): apply back the Givens rotations performed. */
+/*           Compute the product L' * L. */
 
-	for (i__ = *givptr; i__ >= 1; --i__) {
-	    d__1 = -givnum[i__ + givnum_dim1];
-	    drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
-		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ +
-		    (givnum_dim1 << 1)], &d__1);
-/* L100: */
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		ctrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", &
+			ib, &i__3, &c_b56, &a[i__ + i__ * a_dim1], lda, &a[
+			i__ + a_dim1], lda);
+		clauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    cgemm_("Conjugate transpose", "No transpose", &ib, &i__3,
+			    &i__4, &c_b56, &a[i__ + ib + i__ * a_dim1], lda, &
+			    a[i__ + ib + a_dim1], lda, &c_b56, &a[i__ +
+			    a_dim1], lda);
+		    i__3 = *n - i__ - ib + 1;
+		    cherk_("Lower", "Conjugate transpose", &ib, &i__3, &
+			    c_b871, &a[i__ + ib + i__ * a_dim1], lda, &c_b871,
+			     &a[i__ + i__ * a_dim1], lda);
+		}
+/* L20: */
+	    }
 	}
     }
 
     return 0;
 
-/*     End of DLALS0 */
+/*     End of CLAUUM */
 
-} /* dlals0_ */
+} /* clauum_ */
 
-/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n,
-	integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer *
-	ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k,
-	doublereal *difl, doublereal *difr, doublereal *z__, doublereal *
-	poles, integer *givptr, integer *givcol, integer *ldgcol, integer *
-	perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *
-	work, integer *iwork, integer *info)
+/* Subroutine */ int cpotf2_(char *uplo, integer *n, complex *a, integer *lda,
+	 integer *info)
 {
     /* System generated locals */
-    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1,
-	    b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1,
-	    difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset,
-	     u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1,
-	    i__2;
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1;
+    complex q__1, q__2;
 
     /* Builtin functions */
-    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
 
     /* Local variables */
-    static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl,
-	    ndb1, nlp1, lvl2, nrp1, nlvl, sqre;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
-    static integer inode, ndiml, ndimr;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlals0_(integer *, integer *, integer *,
-	     integer *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, integer *, integer *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
-	     integer *), dlasdt_(integer *, integer *, integer *, integer *,
-	    integer *, integer *, integer *), xerbla_(char *, integer *);
+    static integer j;
+    static real ajj;
+    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
+	    *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *);
+    static logical upper;
+    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *),
+	    csscal_(integer *, real *, complex *, integer *), xerbla_(char *,
+	    integer *);
 
 
 /*
     -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DLALSA is an itermediate step in solving the least squares problem
-    by computing the SVD of the coefficient matrix in compact form (The
-    singular vectors are computed as products of simple orthorgonal
-    matrices.).
+    CPOTF2 computes the Cholesky factorization of a complex Hermitian
+    positive definite matrix A.
 
-    If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector
-    matrix of an upper bidiagonal matrix to the right hand side; and if
-    ICOMPQ = 1, DLALSA applies the right singular vector matrix to the
-    right hand side. The singular vector matrices were generated in
-    compact form by DLALSA.
+    The factorization has the form
+       A = U' * U ,  if UPLO = 'U', or
+       A = L  * L',  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the unblocked version of the algorithm, calling Level 2 BLAS.
 
     Arguments
     =========
 
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            Hermitian matrix A is stored.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
 
-    ICOMPQ (input) INTEGER
-           Specifies whether the left or the right singular vector
-           matrix is involved.
-           = 0: Left singular vector matrix
-           = 1: Right singular vector matrix
-
-    SMLSIZ (input) INTEGER
-           The maximum size of the subproblems at the bottom of the
-           computation tree.
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-    N      (input) INTEGER
-           The row and column dimensions of the upper bidiagonal matrix.
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            n by n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
 
-    NRHS   (input) INTEGER
-           The number of columns of B and BX. NRHS must be at least 1.
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U'*U  or A = L*L'.
 
-    B      (input) DOUBLE PRECISION array, dimension ( LDB, NRHS )
-           On input, B contains the right hand sides of the least
-           squares problem in rows 1 through M. On output, B contains
-           the solution X in rows 1 through N.
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-    LDB    (input) INTEGER
-           The leading dimension of B in the calling subprogram.
-           LDB must be at least max(1,MAX( M, N ) ).
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, the leading minor of order k is not
+                 positive definite, and the factorization could not be
+                 completed.
 
-    BX     (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
-           On exit, the result of applying the left or right singular
-           vector matrix to B.
+    =====================================================================
 
-    LDBX   (input) INTEGER
-           The leading dimension of BX.
 
-    U      (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
-           On entry, U contains the left singular vector matrices of all
-           subproblems at the bottom level.
+       Test the input parameters.
+*/
 
-    LDU    (input) INTEGER, LDU = > N.
-           The leading dimension of arrays U, VT, DIFL, DIFR,
-           POLES, GIVNUM, and Z.
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
-    VT     (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
-           On entry, VT' contains the right singular vector matrices of
-           all subproblems at the bottom level.
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CPOTF2", &i__1);
+	return 0;
+    }
 
-    K      (input) INTEGER array, dimension ( N ).
+/*     Quick return if possible */
 
-    DIFL   (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
-           where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+    if (*n == 0) {
+	return 0;
+    }
 
-    DIFR   (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
-           On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
-           distances between singular values on the I-th level and
-           singular values on the (I -1)-th level, and DIFR(*, 2 * I)
-           record the normalizing factors of the right singular vectors
-           matrices of subproblems on I-th level.
+    if (upper) {
 
-    Z      (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
-           On entry, Z(1, I) contains the components of the deflation-
-           adjusted updating row vector for subproblems on the I-th
-           level.
+/*        Compute the Cholesky factorization A = U'*U. */
 
-    POLES  (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
-           On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
-           singular values involved in the secular equations on the I-th
-           level.
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
 
-    GIVPTR (input) INTEGER array, dimension ( N ).
-           On entry, GIVPTR( I ) records the number of Givens
-           rotations performed on the I-th problem on the computation
-           tree.
+/*           Compute U(J,J) and test for non-positive-definiteness. */
 
-    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
-           On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
-           locations of Givens rotations performed on the I-th level on
-           the computation tree.
+	    i__2 = j + j * a_dim1;
+	    r__1 = a[i__2].r;
+	    i__3 = j - 1;
+	    cdotc_(&q__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1]
+		    , &c__1);
+	    q__1.r = r__1 - q__2.r, q__1.i = -q__2.i;
+	    ajj = q__1.r;
+	    if (ajj <= 0.f) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = ajj, a[i__2].i = 0.f;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    i__2 = j + j * a_dim1;
+	    a[i__2].r = ajj, a[i__2].i = 0.f;
 
-    LDGCOL (input) INTEGER, LDGCOL = > N.
-           The leading dimension of arrays GIVCOL and PERM.
+/*           Compute elements J+1:N of row J. */
 
-    PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
-           On entry, PERM(*, I) records permutations done on the I-th
-           level of the computation tree.
+	    if (j < *n) {
+		i__2 = j - 1;
+		clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
+		i__2 = j - 1;
+		i__3 = *n - j;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("Transpose", &i__2, &i__3, &q__1, &a[(j + 1) * a_dim1
+			+ 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b56, &a[j + (
+			j + 1) * a_dim1], lda);
+		i__2 = j - 1;
+		clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
+		i__2 = *n - j;
+		r__1 = 1.f / ajj;
+		csscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda);
+	    }
+/* L10: */
+	}
+    } else {
 
-    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
-           On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
-           values of Givens rotations performed on the I-th level on the
-           computation tree.
+/*        Compute the Cholesky factorization A = L*L'. */
 
-    C      (input) DOUBLE PRECISION array, dimension ( N ).
-           On entry, if the I-th subproblem is not square,
-           C( I ) contains the C-value of a Givens rotation related to
-           the right null space of the I-th subproblem.
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
 
-    S      (input) DOUBLE PRECISION array, dimension ( N ).
-           On entry, if the I-th subproblem is not square,
-           S( I ) contains the S-value of a Givens rotation related to
-           the right null space of the I-th subproblem.
+/*           Compute L(J,J) and test for non-positive-definiteness. */
 
-    WORK   (workspace) DOUBLE PRECISION array.
-           The dimension must be at least N.
+	    i__2 = j + j * a_dim1;
+	    r__1 = a[i__2].r;
+	    i__3 = j - 1;
+	    cdotc_(&q__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda);
+	    q__1.r = r__1 - q__2.r, q__1.i = -q__2.i;
+	    ajj = q__1.r;
+	    if (ajj <= 0.f) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = ajj, a[i__2].i = 0.f;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    i__2 = j + j * a_dim1;
+	    a[i__2].r = ajj, a[i__2].i = 0.f;
 
-    IWORK  (workspace) INTEGER array.
-           The dimension must be at least 3 * N
+/*           Compute elements J+1:N of column J. */
 
-    INFO   (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
+	    if (j < *n) {
+		i__2 = j - 1;
+		clacgv_(&i__2, &a[j + a_dim1], lda);
+		i__2 = *n - j;
+		i__3 = j - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[j + 1 + a_dim1]
+			, lda, &a[j + a_dim1], lda, &c_b56, &a[j + 1 + j *
+			a_dim1], &c__1);
+		i__2 = j - 1;
+		clacgv_(&i__2, &a[j + a_dim1], lda);
+		i__2 = *n - j;
+		r__1 = 1.f / ajj;
+		csscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+    goto L40;
 
-    Further Details
-    ===============
+L30:
+    *info = j;
 
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
+L40:
+    return 0;
+
+/*     End of CPOTF2 */
+
+} /* cpotf2_ */
+
+/* Subroutine */ int cpotrf_(char *uplo, integer *n, complex *a, integer *lda,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    complex q__1;
+
+    /* Local variables */
+    static integer j, jb, nb;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *), cherk_(char *,
+	    char *, integer *, integer *, real *, complex *, integer *, real *
+	    , complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *);
+    static logical upper;
+    extern /* Subroutine */ int cpotf2_(char *, integer *, complex *, integer
+	    *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CPOTRF computes the Cholesky factorization of a complex Hermitian
+    positive definite matrix A.
+
+    The factorization has the form
+       A = U**H * U,  if UPLO = 'U', or
+       A = L  * L**H,  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the block version of the algorithm, calling Level 3 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U**H*U or A = L*L**H.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the leading minor of order i is not
+                  positive definite, and the factorization could not be
+                  completed.
 
     =====================================================================
 
@@ -15049,429 +18345,196 @@ L150:
 */
 
     /* Parameter adjustments */
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    bx_dim1 = *ldbx;
-    bx_offset = 1 + bx_dim1;
-    bx -= bx_offset;
-    givnum_dim1 = *ldu;
-    givnum_offset = 1 + givnum_dim1;
-    givnum -= givnum_offset;
-    poles_dim1 = *ldu;
-    poles_offset = 1 + poles_dim1;
-    poles -= poles_offset;
-    z_dim1 = *ldu;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    difr_dim1 = *ldu;
-    difr_offset = 1 + difr_dim1;
-    difr -= difr_offset;
-    difl_dim1 = *ldu;
-    difl_offset = 1 + difl_dim1;
-    difl -= difl_offset;
-    vt_dim1 = *ldu;
-    vt_offset = 1 + vt_dim1;
-    vt -= vt_offset;
-    u_dim1 = *ldu;
-    u_offset = 1 + u_dim1;
-    u -= u_offset;
-    --k;
-    --givptr;
-    perm_dim1 = *ldgcol;
-    perm_offset = 1 + perm_dim1;
-    perm -= perm_offset;
-    givcol_dim1 = *ldgcol;
-    givcol_offset = 1 + givcol_dim1;
-    givcol -= givcol_offset;
-    --c__;
-    --s;
-    --work;
-    --iwork;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
     /* Function Body */
     *info = 0;
-
-    if (*icompq < 0 || *icompq > 1) {
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
-    } else if (*smlsiz < 3) {
+    } else if (*n < 0) {
 	*info = -2;
-    } else if (*n < *smlsiz) {
-	*info = -3;
-    } else if (*nrhs < 1) {
+    } else if (*lda < max(1,*n)) {
 	*info = -4;
-    } else if (*ldb < *n) {
-	*info = -6;
-    } else if (*ldbx < *n) {
-	*info = -8;
-    } else if (*ldu < *n) {
-	*info = -10;
-    } else if (*ldgcol < *n) {
-	*info = -19;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLALSA", &i__1);
+	xerbla_("CPOTRF", &i__1);
 	return 0;
     }
 
-/*     Book-keeping and  setting up the computation tree. */
-
-    inode = 1;
-    ndiml = inode + *n;
-    ndimr = ndiml + *n;
+/*     Quick return if possible */
 
-    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
-	    smlsiz);
+    if (*n == 0) {
+	return 0;
+    }
 
-/*
-       The following code applies back the left singular vector factors.
-       For applying back the right singular vector factors, go to 50.
-*/
+/*     Determine the block size for this environment. */
 
-    if (*icompq == 1) {
-	goto L50;
-    }
+    nb = ilaenv_(&c__1, "CPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    if (nb <= 1 || nb >= *n) {
 
-/*
-       The nodes on the bottom level of the tree were solved
-       by DLASDQ. The corresponding left and right singular vector
-       matrices are in explicit form. First apply back the left
-       singular vector matrices.
-*/
+/*        Use unblocked code. */
 
-    ndb1 = (nd + 1) / 2;
-    i__1 = nd;
-    for (i__ = ndb1; i__ <= i__1; ++i__) {
+	cpotf2_(uplo, n, &a[a_offset], lda, info);
+    } else {
 
-/*
-          IC : center row of each node
-          NL : number of rows of left  subproblem
-          NR : number of rows of right subproblem
-          NLF: starting row of the left   subproblem
-          NRF: starting row of the right  subproblem
-*/
+/*        Use blocked code. */
 
-	i1 = i__ - 1;
-	ic = iwork[inode + i1];
-	nl = iwork[ndiml + i1];
-	nr = iwork[ndimr + i1];
-	nlf = ic - nl;
-	nrf = ic + 1;
-	dgemm_("T", "N", &nl, nrhs, &nl, &c_b15, &u[nlf + u_dim1], ldu, &b[
-		nlf + b_dim1], ldb, &c_b29, &bx[nlf + bx_dim1], ldbx);
-	dgemm_("T", "N", &nr, nrhs, &nr, &c_b15, &u[nrf + u_dim1], ldu, &b[
-		nrf + b_dim1], ldb, &c_b29, &bx[nrf + bx_dim1], ldbx);
-/* L10: */
-    }
+	if (upper) {
 
-/*
-       Next copy the rows of B that correspond to unchanged rows
-       in the bidiagonal matrix to BX.
-*/
+/*           Compute the Cholesky factorization A = U'*U. */
 
-    i__1 = nd;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	ic = iwork[inode + i__ - 1];
-	dcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx);
-/* L20: */
-    }
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
 
 /*
-       Finally go through the left singular vector matrices of all
-       the other subproblems bottom-up on the tree.
-*/
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
 
-    j = pow_ii(&c__2, &nlvl);
-    sqre = 0;
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		cherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1150, &
+			a[j * a_dim1 + 1], lda, &c_b871, &a[j + j * a_dim1],
+			lda);
+		cpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
 
-    for (lvl = nlvl; lvl >= 1; --lvl) {
-	lvl2 = (lvl << 1) - 1;
+/*                 Compute the current block row. */
 
-/*
-          find the first node LF and last node LL on
-          the current level LVL
-*/
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("Conjugate transpose", "No transpose", &jb, &i__3,
+			    &i__4, &q__1, &a[j * a_dim1 + 1], lda, &a[(j + jb)
+			     * a_dim1 + 1], lda, &c_b56, &a[j + (j + jb) *
+			    a_dim1], lda);
+		    i__3 = *n - j - jb + 1;
+		    ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit",
+			     &jb, &i__3, &c_b56, &a[j + j * a_dim1], lda, &a[
+			    j + (j + jb) * a_dim1], lda);
+		}
+/* L10: */
+	    }
 
-	if (lvl == 1) {
-	    lf = 1;
-	    ll = 1;
 	} else {
-	    i__1 = lvl - 1;
-	    lf = pow_ii(&c__2, &i__1);
-	    ll = (lf << 1) - 1;
-	}
-	i__1 = ll;
-	for (i__ = lf; i__ <= i__1; ++i__) {
-	    im1 = i__ - 1;
-	    ic = iwork[inode + im1];
-	    nl = iwork[ndiml + im1];
-	    nr = iwork[ndimr + im1];
-	    nlf = ic - nl;
-	    nrf = ic + 1;
-	    --j;
-	    dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, &
-		    b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], &
-		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
-		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
-		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
-		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
-		    j], &s[j], &work[1], info);
-/* L30: */
-	}
-/* L40: */
-    }
-    goto L90;
 
-/*     ICOMPQ = 1: applying back the right singular vector factors. */
+/*           Compute the Cholesky factorization A = L*L'. */
 
-L50:
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
 
 /*
-       First now go through the right singular vector matrices of all
-       the tree nodes top-down.
-*/
-
-    j = 0;
-    i__1 = nlvl;
-    for (lvl = 1; lvl <= i__1; ++lvl) {
-	lvl2 = (lvl << 1) - 1;
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
 
-/*
-          Find the first node LF and last node LL on
-          the current level LVL.
+   Computing MIN
 */
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		cherk_("Lower", "No transpose", &jb, &i__3, &c_b1150, &a[j +
+			a_dim1], lda, &c_b871, &a[j + j * a_dim1], lda);
+		cpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
 
-	if (lvl == 1) {
-	    lf = 1;
-	    ll = 1;
-	} else {
-	    i__2 = lvl - 1;
-	    lf = pow_ii(&c__2, &i__2);
-	    ll = (lf << 1) - 1;
-	}
-	i__2 = lf;
-	for (i__ = ll; i__ >= i__2; --i__) {
-	    im1 = i__ - 1;
-	    ic = iwork[inode + im1];
-	    nl = iwork[ndiml + im1];
-	    nr = iwork[ndimr + im1];
-	    nlf = ic - nl;
-	    nrf = ic + 1;
-	    if (i__ == ll) {
-		sqre = 0;
-	    } else {
-		sqre = 1;
-	    }
-	    ++j;
-	    dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[
-		    nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], &
-		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
-		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
-		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
-		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
-		    j], &s[j], &work[1], info);
-/* L60: */
-	}
-/* L70: */
-    }
-
-/*
-       The nodes on the bottom level of the tree were solved
-       by DLASDQ. The corresponding right singular vector
-       matrices are in explicit form. Apply them back.
-*/
+/*                 Compute the current block column. */
 
-    ndb1 = (nd + 1) / 2;
-    i__1 = nd;
-    for (i__ = ndb1; i__ <= i__1; ++i__) {
-	i1 = i__ - 1;
-	ic = iwork[inode + i1];
-	nl = iwork[ndiml + i1];
-	nr = iwork[ndimr + i1];
-	nlp1 = nl + 1;
-	if (i__ == nd) {
-	    nrp1 = nr;
-	} else {
-	    nrp1 = nr + 1;
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "Conjugate transpose", &i__3, &jb,
+			    &i__4, &q__1, &a[j + jb + a_dim1], lda, &a[j +
+			    a_dim1], lda, &c_b56, &a[j + jb + j * a_dim1],
+			    lda);
+		    i__3 = *n - j - jb + 1;
+		    ctrsm_("Right", "Lower", "Conjugate transpose", "Non-unit"
+			    , &i__3, &jb, &c_b56, &a[j + j * a_dim1], lda, &a[
+			    j + jb + j * a_dim1], lda);
+		}
+/* L20: */
+	    }
 	}
-	nlf = ic - nl;
-	nrf = ic + 1;
-	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b15, &vt[nlf + vt_dim1], ldu,
-		&b[nlf + b_dim1], ldb, &c_b29, &bx[nlf + bx_dim1], ldbx);
-	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b15, &vt[nrf + vt_dim1], ldu,
-		&b[nrf + b_dim1], ldb, &c_b29, &bx[nrf + bx_dim1], ldbx);
-/* L80: */
     }
+    goto L40;
 
-L90:
+L30:
+    *info = *info + j - 1;
 
+L40:
     return 0;
 
-/*     End of DLALSA */
+/*     End of CPOTRF */
 
-} /* dlalsa_ */
+} /* cpotrf_ */
 
-/* Subroutine */ int dlalsd_(char *uplo, integer *smlsiz, integer *n, integer
-	*nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb,
-	doublereal *rcond, integer *rank, doublereal *work, integer *iwork,
-	integer *info)
+/* Subroutine */ int cpotri_(char *uplo, integer *n, complex *a, integer *lda,
+	 integer *info)
 {
     /* System generated locals */
-    integer b_dim1, b_offset, i__1, i__2;
-    doublereal d__1;
-
-    /* Builtin functions */
-    double log(doublereal), d_sign(doublereal *, doublereal *);
+    integer a_dim1, a_offset, i__1;
 
     /* Local variables */
-    static integer c__, i__, j, k;
-    static doublereal r__;
-    static integer s, u, z__;
-    static doublereal cs;
-    static integer bx;
-    static doublereal sn;
-    static integer st, vt, nm1, st1;
-    static doublereal eps;
-    static integer iwk;
-    static doublereal tol;
-    static integer difl, difr, perm, nsub;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static integer nlvl, sqre, bxst;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *),
-	     dcopy_(integer *, doublereal *, integer *, doublereal *, integer
-	    *);
-    static integer poles, sizei, nsize, nwork, icmpq1, icmpq2;
-
-    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *, integer *, integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
-	     integer *), dlalsa_(integer *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, integer *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     integer *, integer *), dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *);
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer
-	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
-	     integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlacpy_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *), dlaset_(char *, integer *, integer *,
-	     doublereal *, doublereal *, doublereal *, integer *),
-	    xerbla_(char *, integer *);
-    static integer givcol;
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
-	    integer *);
-    static doublereal orgnrm;
-    static integer givnum, givptr, smlszp;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), clauum_(
+	    char *, integer *, complex *, integer *, integer *),
+	    ctrtri_(char *, char *, integer *, complex *, integer *, integer *
+	    );
 
 
 /*
     -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+       March 31, 1993
 
 
     Purpose
     =======
 
-    DLALSD uses the singular value decomposition of A to solve the least
-    squares problem of finding X to minimize the Euclidean norm of each
-    column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
-    are N-by-NRHS. The solution X overwrites B.
-
-    The singular values of A smaller than RCOND times the largest
-    singular value are treated as zero in solving the least squares
-    problem; in this case a minimum norm solution is returned.
-    The actual singular values are returned in D in ascending order.
-
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.
+    CPOTRI computes the inverse of a complex Hermitian positive definite
+    matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
+    computed by CPOTRF.
 
     Arguments
     =========
 
-    UPLO   (input) CHARACTER*1
-           = 'U': D and E define an upper bidiagonal matrix.
-           = 'L': D and E define a  lower bidiagonal matrix.
-
-    SMLSIZ (input) INTEGER
-           The maximum size of the subproblems at the bottom of the
-           computation tree.
-
-    N      (input) INTEGER
-           The dimension of the  bidiagonal matrix.  N >= 0.
-
-    NRHS   (input) INTEGER
-           The number of columns of B. NRHS must be at least 1.
-
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
-           On entry D contains the main diagonal of the bidiagonal
-           matrix. On exit, if INFO = 0, D contains its singular values.
-
-    E      (input) DOUBLE PRECISION array, dimension (N-1)
-           Contains the super-diagonal entries of the bidiagonal matrix.
-           On exit, E has been destroyed.
-
-    B      (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
-           On input, B contains the right hand sides of the least
-           squares problem. On output, B contains the solution X.
-
-    LDB    (input) INTEGER
-           The leading dimension of B in the calling subprogram.
-           LDB must be at least max(1,N).
-
-    RCOND  (input) DOUBLE PRECISION
-           The singular values of A less than or equal to RCOND times
-           the largest singular value are treated as zero in solving
-           the least squares problem. If RCOND is negative,
-           machine precision is used instead.
-           For example, if diag(S)*X=B were the least squares problem,
-           where diag(S) is a diagonal matrix of singular values, the
-           solution would be X(i) = B(i) / S(i) if S(i) is greater than
-           RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
-           RCOND*max(S).
-
-    RANK   (output) INTEGER
-           The number of singular values of A greater than RCOND times
-           the largest singular value.
-
-    WORK   (workspace) DOUBLE PRECISION array, dimension at least
-           (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),
-           where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
 
-    IWORK  (workspace) INTEGER array, dimension at least
-           (3*N*NLVL + 11*N)
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-    INFO   (output) INTEGER
-           = 0:  successful exit.
-           < 0:  if INFO = -i, the i-th argument had an illegal value.
-           > 0:  The algorithm failed to compute an singular value while
-                 working on the submatrix lying in rows and columns
-                 INFO/(N+1) through MOD(INFO,N+1).
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the triangular factor U or L from the Cholesky
+            factorization A = U**H*U or A = L*L**H, as computed by
+            CPOTRF.
+            On exit, the upper or lower triangle of the (Hermitian)
+            inverse of A, overwriting the input factor U or L.
 
-    Further Details
-    ===============
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-    Based on contributions by
-       Ming Gu and Ren-Cang Li, Computer Science Division, University of
-         California at Berkeley, USA
-       Osni Marques, LBNL/NERSC, USA
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the (i,i) element of the factor U or L is
+                  zero, and the inverse could not be computed.
 
     =====================================================================
 
@@ -15480,1161 +18543,1382 @@ L90:
 */
 
     /* Parameter adjustments */
-    --d__;
-    --e;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1;
-    b -= b_offset;
-    --work;
-    --iwork;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
     /* Function Body */
     *info = 0;
-
-    if (*n < 0) {
-	*info = -3;
-    } else if (*nrhs < 1) {
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
 	*info = -4;
-    } else if (*ldb < 1 || *ldb < *n) {
-	*info = -8;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLALSD", &i__1);
+	xerbla_("CPOTRI", &i__1);
 	return 0;
     }
 
-    eps = EPSILON;
-
-/*     Set up the tolerance. */
-
-    if (*rcond <= 0. || *rcond >= 1.) {
-	*rcond = eps;
-    }
-
-    *rank = 0;
-
-/*     Quick return if possible. */
+/*     Quick return if possible */
 
     if (*n == 0) {
 	return 0;
-    } else if (*n == 1) {
-	if (d__[1] == 0.) {
-	    dlaset_("A", &c__1, nrhs, &c_b29, &c_b29, &b[b_offset], ldb);
-	} else {
-	    *rank = 1;
-	    dlascl_("G", &c__0, &c__0, &d__[1], &c_b15, &c__1, nrhs, &b[
-		    b_offset], ldb, info);
-	    d__[1] = abs(d__[1]);
-	}
-	return 0;
     }
 
-/*     Rotate the matrix if it is lower bidiagonal. */
+/*     Invert the triangular Cholesky factor U or L. */
 
-    if (*(unsigned char *)uplo == 'L') {
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
-	    d__[i__] = r__;
-	    e[i__] = sn * d__[i__ + 1];
-	    d__[i__ + 1] = cs * d__[i__ + 1];
-	    if (*nrhs == 1) {
-		drot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], &
-			c__1, &cs, &sn);
-	    } else {
-		work[(i__ << 1) - 1] = cs;
-		work[i__ * 2] = sn;
-	    }
-/* L10: */
-	}
-	if (*nrhs > 1) {
-	    i__1 = *nrhs;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-		i__2 = *n - 1;
-		for (j = 1; j <= i__2; ++j) {
-		    cs = work[(j << 1) - 1];
-		    sn = work[j * 2];
-		    drot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ *
-			     b_dim1], &c__1, &cs, &sn);
-/* L20: */
-		}
-/* L30: */
-	    }
-	}
+    ctrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info);
+    if (*info > 0) {
+	return 0;
     }
 
-/*     Scale. */
+/*     Form inv(U)*inv(U)' or inv(L)'*inv(L). */
 
-    nm1 = *n - 1;
-    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.) {
-	dlaset_("A", n, nrhs, &c_b29, &c_b29, &b[b_offset], ldb);
-	return 0;
-    }
+    clauum_(uplo, n, &a[a_offset], lda, info);
 
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, &c__1, &d__[1], n, info);
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &nm1, &c__1, &e[1], &nm1,
-	    info);
+    return 0;
 
-/*
-       If N is smaller than the minimum divide size SMLSIZ, then solve
-       the problem with another solver.
-*/
+/*     End of CPOTRI */
 
-    if (*n <= *smlsiz) {
-	nwork = *n * *n + 1;
-	dlaset_("A", n, n, &c_b29, &c_b15, &work[1], n);
-	dlasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, &
-		work[1], n, &b[b_offset], ldb, &work[nwork], info);
-	if (*info != 0) {
-	    return 0;
-	}
-	tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (d__[i__] <= tol) {
-		dlaset_("A", &c__1, nrhs, &c_b29, &c_b29, &b[i__ + b_dim1],
-			ldb);
-	    } else {
-		dlascl_("G", &c__0, &c__0, &d__[i__], &c_b15, &c__1, nrhs, &b[
-			i__ + b_dim1], ldb, info);
-		++(*rank);
-	    }
-/* L40: */
-	}
-	dgemm_("T", "N", n, nrhs, n, &c_b15, &work[1], n, &b[b_offset], ldb, &
-		c_b29, &work[nwork], n);
-	dlacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb);
+} /* cpotri_ */
 
-/*        Unscale. */
+/* Subroutine */ int cpotrs_(char *uplo, integer *n, integer *nrhs, complex *
+	a, integer *lda, complex *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
 
-	dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n,
-		info);
-	dlasrt_("D", n, &d__[1], info);
-	dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, nrhs, &b[b_offset],
-		ldb, info);
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
 
-	return 0;
-    }
 
-/*     Book-keeping and setting up some constants. */
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-    nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) /
-	    log(2.)) + 1;
 
-    smlszp = *smlsiz + 1;
+    Purpose
+    =======
 
-    u = 1;
-    vt = *smlsiz * *n + 1;
-    difl = vt + smlszp * *n;
-    difr = difl + nlvl * *n;
-    z__ = difr + (nlvl * *n << 1);
-    c__ = z__ + nlvl * *n;
-    s = c__ + *n;
-    poles = s + *n;
-    givnum = poles + (nlvl << 1) * *n;
-    bx = givnum + (nlvl << 1) * *n;
-    nwork = bx + *n * *nrhs;
+    CPOTRS solves a system of linear equations A*X = B with a Hermitian
+    positive definite matrix A using the Cholesky factorization
+    A = U**H*U or A = L*L**H computed by CPOTRF.
 
-    sizei = *n + 1;
-    k = sizei + *n;
-    givptr = k + *n;
-    perm = givptr + *n;
-    givcol = perm + nlvl * *n;
-    iwk = givcol + (nlvl * *n << 1);
+    Arguments
+    =========
 
-    st = 1;
-    sqre = 0;
-    icmpq1 = 1;
-    icmpq2 = 0;
-    nsub = 0;
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
 
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((d__1 = d__[i__], abs(d__1)) < eps) {
-	    d__[i__] = d_sign(&eps, &d__[i__]);
-	}
-/* L50: */
-    }
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-    i__1 = nm1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) {
-	    ++nsub;
-	    iwork[nsub] = st;
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
 
-/*
-             Subproblem found. First determine its size and then
-             apply divide and conquer on it.
-*/
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The triangular factor U or L from the Cholesky factorization
+            A = U**H*U or A = L*L**H, as computed by CPOTRF.
 
-	    if (i__ < nm1) {
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-/*              A subproblem with E(I) small for I < NM1. */
+    B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
 
-		nsize = i__ - st + 1;
-		iwork[sizei + nsub - 1] = nsize;
-	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
 
-/*              A subproblem with E(NM1) not too small but I = NM1. */
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
 
-		nsize = *n - st + 1;
-		iwork[sizei + nsub - 1] = nsize;
-	    } else {
+    =====================================================================
 
-/*
-                A subproblem with E(NM1) small. This implies an
-                1-by-1 subproblem at D(N), which is not solved
-                explicitly.
+
+       Test the input parameters.
 */
 
-		nsize = i__ - st + 1;
-		iwork[sizei + nsub - 1] = nsize;
-		++nsub;
-		iwork[nsub] = *n;
-		iwork[sizei + nsub - 1] = 1;
-		dcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n);
-	    }
-	    st1 = st - 1;
-	    if (nsize == 1) {
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
 
-/*
-                This is a 1-by-1 subproblem and is not solved
-                explicitly.
-*/
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CPOTRS", &i__1);
+	return 0;
+    }
 
-		dcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n);
-	    } else if (nsize <= *smlsiz) {
+/*     Quick return if possible */
 
-/*              This is a small subproblem and is solved by DLASDQ. */
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
 
-		dlaset_("A", &nsize, &nsize, &c_b29, &c_b15, &work[vt + st1],
-			n);
-		dlasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[
-			st], &work[vt + st1], n, &work[nwork], n, &b[st +
-			b_dim1], ldb, &work[nwork], info);
-		if (*info != 0) {
-		    return 0;
-		}
-		dlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx +
-			st1], n);
-	    } else {
+    if (upper) {
 
-/*              A large problem. Solve it using divide and conquer. */
+/*
+          Solve A*X = B where A = U'*U.
 
-		dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], &
-			work[u + st1], n, &work[vt + st1], &iwork[k + st1], &
-			work[difl + st1], &work[difr + st1], &work[z__ + st1],
-			 &work[poles + st1], &iwork[givptr + st1], &iwork[
-			givcol + st1], n, &iwork[perm + st1], &work[givnum +
-			st1], &work[c__ + st1], &work[s + st1], &work[nwork],
-			&iwork[iwk], info);
-		if (*info != 0) {
-		    return 0;
-		}
-		bxst = bx + st1;
-		dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, &
-			work[bxst], n, &work[u + st1], n, &work[vt + st1], &
-			iwork[k + st1], &work[difl + st1], &work[difr + st1],
-			&work[z__ + st1], &work[poles + st1], &iwork[givptr +
-			st1], &iwork[givcol + st1], n, &iwork[perm + st1], &
-			work[givnum + st1], &work[c__ + st1], &work[s + st1],
-			&work[nwork], &iwork[iwk], info);
-		if (*info != 0) {
-		    return 0;
-		}
-	    }
-	    st = i__ + 1;
-	}
-/* L60: */
-    }
+          Solve U'*X = B, overwriting B with X.
+*/
 
-/*     Apply the singular values and treat the tiny ones as zero. */
+	ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, &
+		c_b56, &a[a_offset], lda, &b[b_offset], ldb);
 
-    tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
+/*        Solve U*X = B, overwriting B with X. */
 
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
+	ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b56, &
+		a[a_offset], lda, &b[b_offset], ldb);
+    } else {
 
 /*
-          Some of the elements in D can be negative because 1-by-1
-          subproblems were not solved explicitly.
+          Solve A*X = B where A = L*L'.
+
+          Solve L*X = B, overwriting B with X.
 */
 
-	if ((d__1 = d__[i__], abs(d__1)) <= tol) {
-	    dlaset_("A", &c__1, nrhs, &c_b29, &c_b29, &work[bx + i__ - 1], n);
-	} else {
-	    ++(*rank);
-	    dlascl_("G", &c__0, &c__0, &d__[i__], &c_b15, &c__1, nrhs, &work[
-		    bx + i__ - 1], n, info);
-	}
-	d__[i__] = (d__1 = d__[i__], abs(d__1));
-/* L70: */
-    }
+	ctrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b56, &
+		a[a_offset], lda, &b[b_offset], ldb);
 
-/*     Now apply back the right singular vectors. */
+/*        Solve L'*X = B, overwriting B with X. */
 
-    icmpq2 = 1;
-    i__1 = nsub;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	st = iwork[i__];
-	st1 = st - 1;
-	nsize = iwork[sizei + i__ - 1];
-	bxst = bx + st1;
-	if (nsize == 1) {
-	    dcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb);
-	} else if (nsize <= *smlsiz) {
-	    dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b15, &work[vt + st1], n,
-		     &work[bxst], n, &c_b29, &b[st + b_dim1], ldb);
-	} else {
-	    dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st +
-		    b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[
-		    k + st1], &work[difl + st1], &work[difr + st1], &work[z__
-		    + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[
-		    givcol + st1], n, &iwork[perm + st1], &work[givnum + st1],
-		     &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[
-		    iwk], info);
-	    if (*info != 0) {
-		return 0;
-	    }
-	}
-/* L80: */
+	ctrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, &
+		c_b56, &a[a_offset], lda, &b[b_offset], ldb);
     }
 
-/*     Unscale and sort the singular values. */
-
-    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, n, &c__1, &d__[1], n, info);
-    dlasrt_("D", n, &d__[1], info);
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, n, nrhs, &b[b_offset], ldb,
-	    info);
-
     return 0;
 
-/*     End of DLALSD */
+/*     End of CPOTRS */
 
-} /* dlalsd_ */
+} /* cpotrs_ */
 
-/* Subroutine */ int dlamrg_(integer *n1, integer *n2, doublereal *a, integer
-	*dtrd1, integer *dtrd2, integer *index)
+/* Subroutine */ int csrot_(integer *n, complex *cx, integer *incx, complex *
+	cy, integer *incy, real *c__, real *s)
 {
     /* System generated locals */
-    integer i__1;
+    integer i__1, i__2, i__3, i__4;
+    complex q__1, q__2, q__3;
 
     /* Local variables */
-    static integer i__, ind1, ind2, n1sv, n2sv;
+    static integer i__, ix, iy;
+    static complex ctemp;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+       applies a plane rotation, where the cos and sin (c and s) are real
+       and the vectors cx and cy are complex.
+       jack dongarra, linpack, 3/11/78.
 
+    =====================================================================
+*/
 
-    Purpose
-    =======
 
-    DLAMRG will create a permutation list which will merge the elements
-    of A (which is composed of two independently sorted sets) into a
-    single set which is sorted in ascending order.
+    /* Parameter adjustments */
+    --cy;
+    --cx;
 
-    Arguments
-    =========
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
 
-    N1     (input) INTEGER
-    N2     (input) INTEGER
-           These arguements contain the respective lengths of the two
-           sorted lists to be merged.
+/*
+          code for unequal increments or equal increments not equal
+            to 1
+*/
 
-    A      (input) DOUBLE PRECISION array, dimension (N1+N2)
-           The first N1 elements of A contain a list of numbers which
-           are sorted in either ascending or descending order.  Likewise
-           for the final N2 elements.
-
-    DTRD1  (input) INTEGER
-    DTRD2  (input) INTEGER
-           These are the strides to be taken through the array A.
-           Allowable strides are 1 and -1.  They indicate whether a
-           subset of A is sorted in ascending (DTRDx = 1) or descending
-           (DTRDx = -1) order.
-
-    INDEX  (output) INTEGER array, dimension (N1+N2)
-           On exit this array will contain a permutation such that
-           if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be
-           sorted in ascending order.
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --index;
-    --a;
-
-    /* Function Body */
-    n1sv = *n1;
-    n2sv = *n2;
-    if (*dtrd1 > 0) {
-	ind1 = 1;
-    } else {
-	ind1 = *n1;
-    }
-    if (*dtrd2 > 0) {
-	ind2 = *n1 + 1;
-    } else {
-	ind2 = *n1 + *n2;
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
     }
-    i__ = 1;
-/*     while ( (N1SV > 0) & (N2SV > 0) ) */
-L10:
-    if (n1sv > 0 && n2sv > 0) {
-	if (a[ind1] <= a[ind2]) {
-	    index[i__] = ind1;
-	    ++i__;
-	    ind1 += *dtrd1;
-	    --n1sv;
-	} else {
-	    index[i__] = ind2;
-	    ++i__;
-	    ind2 += *dtrd2;
-	    --n2sv;
-	}
-	goto L10;
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
     }
-/*     end while */
-    if (n1sv == 0) {
-	i__1 = n2sv;
-	for (n1sv = 1; n1sv <= i__1; ++n1sv) {
-	    index[i__] = ind2;
-	    ++i__;
-	    ind2 += *dtrd2;
-/* L20: */
-	}
-    } else {
-/*     N2SV .EQ. 0 */
-	i__1 = n1sv;
-	for (n2sv = 1; n2sv <= i__1; ++n2sv) {
-	    index[i__] = ind1;
-	    ++i__;
-	    ind1 += *dtrd1;
-/* L30: */
-	}
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
+	i__3 = iy;
+	q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
+	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	i__2 = iy;
+	i__3 = iy;
+	q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
+	i__4 = ix;
+	q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
+	q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
+	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
+	i__2 = ix;
+	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
     }
-
     return 0;
 
-/*     End of DLAMRG */
+/*        code for both increments equal to 1 */
 
-} /* dlamrg_ */
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
+	i__3 = i__;
+	q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
+	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	i__2 = i__;
+	i__3 = i__;
+	q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
+	i__4 = i__;
+	q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
+	q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
+	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
+	i__2 = i__;
+	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
+/* L30: */
+    }
+    return 0;
+} /* csrot_ */
 
-doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer
-	*lda, doublereal *work)
+/* Subroutine */ int cstedc_(char *compz, integer *n, real *d__, real *e,
+	complex *z__, integer *ldz, complex *work, integer *lwork, real *
+	rwork, integer *lrwork, integer *iwork, integer *liwork, integer *
+	info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    doublereal ret_val, d__1, d__2, d__3;
+    integer z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2;
 
     /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
     double sqrt(doublereal);
 
     /* Local variables */
-    static integer i__, j;
-    static doublereal sum, scale;
+    static integer i__, j, k, m;
+    static real p;
+    static integer ii, ll, end, lgn;
+    static real eps, tiny;
     extern logical lsame_(char *, char *);
-    static doublereal value;
-    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
-	    doublereal *, doublereal *);
+    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static integer lwmin;
+    extern /* Subroutine */ int claed0_(integer *, integer *, real *, real *,
+	    complex *, integer *, complex *, integer *, real *, integer *,
+	    integer *);
+    static integer start;
+    extern /* Subroutine */ int clacrm_(integer *, integer *, complex *,
+	    integer *, real *, integer *, complex *, integer *, real *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
+	    *, integer *, complex *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *), sstedc_(char *, integer *, real *, real *, real *,
+	    integer *, real *, integer *, integer *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *,
+	    real *, integer *);
+    static integer liwmin, icompz;
+    extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *,
+	    complex *, integer *, real *, integer *);
+    static real orgnrm;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
+    static integer lrwmin;
+    static logical lquery;
+    static integer smlsiz;
+    extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *,
+	    real *, integer *, real *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+       June 30, 1999
 
 
     Purpose
     =======
 
-    DLANGE  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    real matrix A.
+    CSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the divide and conquer method.
+    The eigenvectors of a full or band complex Hermitian matrix can also
+    be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this
+    matrix to tridiagonal form.
 
-    Description
-    ===========
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See SLAED3 for details.
 
-    DLANGE returns the value
+    Arguments
+    =========
 
-       DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'I':  Compute eigenvectors of tridiagonal matrix also.
+            = 'V':  Compute eigenvectors of original Hermitian matrix
+                    also.  On entry, Z contains the unitary matrix used
+                    to reduce the original matrix to tridiagonal form.
 
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+    N       (input) INTEGER
+            The dimension of the symmetric tridiagonal matrix.  N >= 0.
 
-    Arguments
-    =========
+    D       (input/output) REAL array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
 
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in DLANGE as described
-            above.
+    E       (input/output) REAL array, dimension (N-1)
+            On entry, the subdiagonal elements of the tridiagonal matrix.
+            On exit, E has been destroyed.
 
-    M       (input) INTEGER
-            The number of rows of the matrix A.  M >= 0.  When M = 0,
-            DLANGE is set to zero.
+    Z       (input/output) COMPLEX array, dimension (LDZ,N)
+            On entry, if COMPZ = 'V', then Z contains the unitary
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original Hermitian matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If  COMPZ = 'N', then Z is not referenced.
 
-    N       (input) INTEGER
-            The number of columns of the matrix A.  N >= 0.  When N = 0,
-            DLANGE is set to zero.
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1.
+            If eigenvectors are desired, then LDZ >= max(1,N).
 
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            The m by n matrix A.
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(M,1).
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1.
+            If COMPZ = 'V' and N > 1, LWORK must be at least N*N.
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
-            where LWORK >= M when NORM = 'I'; otherwise, WORK is not
-            referenced.
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
 
-   =====================================================================
-*/
+    RWORK   (workspace/output) REAL array,
+                                           dimension (LRWORK)
+            On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+
+    LRWORK  (input) INTEGER
+            The dimension of the array RWORK.
+            If COMPZ = 'N' or N <= 1, LRWORK must be at least 1.
+            If COMPZ = 'V' and N > 1, LRWORK must be at least
+                           1 + 3*N + 2*N*lg N + 3*N**2 ,
+                           where lg( N ) = smallest integer k such
+                           that 2**k >= N.
+            If COMPZ = 'I' and N > 1, LRWORK must be at least
+                           1 + 4*N + 2*N**2 .
+
+            If LRWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the RWORK array,
+            returns this value as the first entry of the RWORK array, and
+            no error message related to LRWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If COMPZ = 'N' or N <= 1, LIWORK must be at least 1.
+            If COMPZ = 'V' or N > 1,  LIWORK must be at least
+                                      6 + 6*N + 5*N*lg N.
+            If COMPZ = 'I' or N > 1,  LIWORK must be at least
+                                      3 + 5*N .
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
 
+    =====================================================================
+
+
+       Test the input parameters.
+*/
 
     /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
     --work;
+    --rwork;
+    --iwork;
 
     /* Function Body */
-    if (min(*m,*n) == 0) {
-	value = 0.;
-    } else if (lsame_(norm, "M")) {
+    *info = 0;
+    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;
 
-/*        Find max(abs(A(i,j))). */
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if (*n <= 1 || icompz <= 0) {
+	lwmin = 1;
+	liwmin = 1;
+	lrwmin = 1;
+    } else {
+	lgn = (integer) (log((real) (*n)) / log(2.f));
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (icompz == 1) {
+	    lwmin = *n * *n;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3;
+	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
+	} else if (icompz == 2) {
+	    lwmin = 1;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1);
+	    liwmin = *n * 5 + 3;
+	}
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
+	*info = -6;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*lrwork < lrwmin && ! lquery) {
+	*info = -10;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -12;
+    }
 
-	value = 0.;
+    if (*info == 0) {
+	work[1].r = (real) lwmin, work[1].i = 0.f;
+	rwork[1] = (real) lrwmin;
+	iwork[1] = liwmin;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CSTEDC", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	if (icompz != 0) {
+	    i__1 = z_dim1 + 1;
+	    z__[i__1].r = 1.f, z__[i__1].i = 0.f;
+	}
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "CSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       If the following conditional clause is removed, then the routine
+       will use the Divide and Conquer routine to compute only the
+       eigenvalues, which requires (3N + 3N**2) real workspace and
+       (2 + 5N + 2N lg(N)) integer workspace.
+       Since on many architectures SSTERF is much faster than any other
+       algorithm for finding eigenvalues only, it is used here
+       as the default.
+
+       If COMPZ = 'N', use SSTERF to compute the eigenvalues.
+*/
+
+    if (icompz == 0) {
+	ssterf_(n, &d__[1], &e[1], info);
+	return 0;
+    }
+
+/*
+       If N is smaller than the minimum divide size (SMLSIZ+1), then
+       solve the problem with another solver.
+*/
+
+    if (*n <= smlsiz) {
+	if (icompz == 0) {
+	    ssterf_(n, &d__[1], &e[1], info);
+	    return 0;
+	} else if (icompz == 2) {
+	    csteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
+		    info);
+	    return 0;
+	} else {
+	    csteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
+		    info);
+	    return 0;
+	}
+    }
+
+/*     If COMPZ = 'I', we simply call SSTEDC instead. */
+
+    if (icompz == 2) {
+	slaset_("Full", n, n, &c_b1101, &c_b871, &rwork[1], n);
+	ll = *n * *n + 1;
+	i__1 = *lrwork - ll + 1;
+	sstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, &
+		iwork[1], liwork, info);
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
+	    i__2 = *n;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
-		value = max(d__2,d__3);
+		i__3 = i__ + j * z_dim1;
+		i__4 = (j - 1) * *n + i__;
+		z__[i__3].r = rwork[i__4], z__[i__3].i = 0.f;
 /* L10: */
 	    }
 /* L20: */
 	}
-    } else if (lsame_(norm, "O") || *(unsigned char *)
-	    norm == '1') {
+	return 0;
+    }
 
-/*        Find norm1(A). */
+/*
+       From now on, only option left to be handled is COMPZ = 'V',
+       i.e. ICOMPZ = 1.
 
-	value = 0.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = 0.;
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		sum += (d__1 = a[i__ + j * a_dim1], abs(d__1));
-/* L30: */
+       Scale.
+*/
+
+    orgnrm = slanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.f) {
+	return 0;
+    }
+
+    eps = slamch_("Epsilon");
+
+    start = 1;
+
+/*     while ( START <= N ) */
+
+L30:
+    if (start <= *n) {
+
+/*
+       Let END be the position of the next subdiagonal entry such that
+       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
+       matrix identified by the elements between START and END
+       constitutes an independent sub-problem.
+*/
+
+	end = start;
+L40:
+	if (end < *n) {
+	    tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 =
+		    d__[end + 1], dabs(r__2)));
+	    if ((r__1 = e[end], dabs(r__1)) > tiny) {
+		++end;
+		goto L40;
 	    }
-	    value = max(value,sum);
-/* L40: */
 	}
-    } else if (lsame_(norm, "I")) {
 
-/*        Find normI(A). */
+/*        (Sub) Problem determined.  Compute its size and solve it. */
 
-	i__1 = *m;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    work[i__] = 0.;
-/* L50: */
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *m;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1));
-/* L60: */
+	m = end - start + 1;
+	if (m > smlsiz) {
+	    *info = smlsiz;
+
+/*           Scale. */
+
+	    orgnrm = slanst_("M", &m, &d__[start], &e[start]);
+	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &m, &c__1, &d__[
+		    start], &m, info);
+	    i__1 = m - 1;
+	    i__2 = m - 1;
+	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &i__1, &c__1, &e[
+		    start], &i__2, info);
+
+	    claed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1],
+		    ldz, &work[1], n, &rwork[1], &iwork[1], info);
+	    if (*info > 0) {
+		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
+			+ 1) + start - 1;
+		return 0;
+	    }
+
+/*           Scale back. */
+
+	    slascl_("G", &c__0, &c__0, &c_b871, &orgnrm, &m, &c__1, &d__[
+		    start], &m, info);
+
+	} else {
+	    ssteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m *
+		     m + 1], info);
+	    clacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, &
+		    work[1], n, &rwork[m * m + 1]);
+	    clacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz);
+	    if (*info > 0) {
+		*info = start * (*n + 1) + end;
+		return 0;
 	    }
-/* L70: */
-	}
-	value = 0.;
-	i__1 = *m;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	    d__1 = value, d__2 = work[i__];
-	    value = max(d__1,d__2);
-/* L80: */
 	}
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
 
-/*        Find normF(A). */
+	start = end + 1;
+	goto L30;
+    }
+
+/*
+       endwhile
+
+       If the problem split any number of times, then the eigenvalues
+       will not be properly ordered.  Here we permute the eigenvalues
+       (and the associated eigenvectors) into ascending order.
+*/
+
+    if (m != *n) {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
 
-	scale = 0.;
-	sum = 1.;
 	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L90: */
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
+	    i__2 = *n;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
+		}
+/* L50: */
+	    }
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
+	    }
+/* L60: */
 	}
-	value = scale * sqrt(sum);
     }
 
-    ret_val = value;
-    return ret_val;
+    work[1].r = (real) lwmin, work[1].i = 0.f;
+    rwork[1] = (real) lrwmin;
+    iwork[1] = liwmin;
 
-/*     End of DLANGE */
+    return 0;
 
-} /* dlange_ */
+/*     End of CSTEDC */
 
-doublereal dlanhs_(char *norm, integer *n, doublereal *a, integer *lda,
-	doublereal *work)
+} /* cstedc_ */
+
+/* Subroutine */ int csteqr_(char *compz, integer *n, real *d__, real *e,
+	complex *z__, integer *ldz, real *work, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    doublereal ret_val, d__1, d__2, d__3;
+    integer z_dim1, z_offset, i__1, i__2;
+    real r__1, r__2;
 
     /* Builtin functions */
-    double sqrt(doublereal);
+    double sqrt(doublereal), r_sign(real *, real *);
 
     /* Local variables */
-    static integer i__, j;
-    static doublereal sum, scale;
+    static real b, c__, f, g;
+    static integer i__, j, k, l, m;
+    static real p, r__, s;
+    static integer l1, ii, mm, lm1, mm1, nm1;
+    static real rt1, rt2, eps;
+    static integer lsv;
+    static real tst, eps2;
+    static integer lend, jtot;
+    extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *)
+	    ;
     extern logical lsame_(char *, char *);
-    static doublereal value;
-    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
-	    doublereal *, doublereal *);
+    extern /* Subroutine */ int clasr_(char *, char *, char *, integer *,
+	    integer *, real *, real *, complex *, integer *);
+    static real anorm;
+    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static integer lendm1, lendp1;
+    extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real *
+	    , real *, real *);
+    extern doublereal slapy2_(real *, real *);
+    static integer iscale;
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int claset_(char *, integer *, integer *, complex
+	    *, complex *, complex *, integer *);
+    static real safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real safmax;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *);
+    static integer lendsv;
+    extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
+	    );
+    static real ssfmin;
+    static integer nmaxit, icompz;
+    static real ssfmax;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DLANHS  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    Hessenberg matrix A.
+    CSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the implicit QL or QR method.
+    The eigenvectors of a full or band complex Hermitian matrix can also
+    be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this
+    matrix to tridiagonal form.
 
-    Description
-    ===========
+    Arguments
+    =========
 
-    DLANHS returns the value
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'V':  Compute eigenvalues and eigenvectors of the original
+                    Hermitian matrix.  On entry, Z must contain the
+                    unitary matrix used to reduce the original matrix
+                    to tridiagonal form.
+            = 'I':  Compute eigenvalues and eigenvectors of the
+                    tridiagonal matrix.  Z is initialized to the identity
+                    matrix.
 
-       DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+    N       (input) INTEGER
+            The order of the matrix.  N >= 0.
 
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+    D       (input/output) REAL array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
 
-    Arguments
-    =========
+    E       (input/output) REAL array, dimension (N-1)
+            On entry, the (n-1) subdiagonal elements of the tridiagonal
+            matrix.
+            On exit, E has been destroyed.
 
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in DLANHS as described
-            above.
+    Z       (input/output) COMPLEX array, dimension (LDZ, N)
+            On entry, if  COMPZ = 'V', then Z contains the unitary
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original Hermitian matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If COMPZ = 'N', then Z is not referenced.
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, DLANHS is
-            set to zero.
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1, and if
+            eigenvectors are desired, then  LDZ >= max(1,N).
 
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            The n by n upper Hessenberg matrix A; the part of A below the
-            first sub-diagonal is not referenced.
+    WORK    (workspace) REAL array, dimension (max(1,2*N-2))
+            If COMPZ = 'N', then WORK is not referenced.
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(N,1).
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm has failed to find all the eigenvalues in
+                  a total of 30*N iterations; if INFO = i, then i
+                  elements of E have not converged to zero; on exit, D
+                  and E contain the elements of a symmetric tridiagonal
+                  matrix which is unitarily similar to the original
+                  matrix.
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
-            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-            referenced.
+    =====================================================================
 
-   =====================================================================
-*/
 
+       Test the input parameters.
+*/
 
     /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
     --work;
 
     /* Function Body */
-    if (*n == 0) {
-	value = 0.;
-    } else if (lsame_(norm, "M")) {
+    *info = 0;
 
-/*        Find max(abs(A(i,j))). */
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CSTEQR", &i__1);
+	return 0;
+    }
 
-	value = 0.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
-		value = max(d__2,d__3);
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else if (lsame_(norm, "O") || *(unsigned char *)
-	    norm == '1') {
+/*     Quick return if possible */
 
-/*        Find norm1(A). */
+    if (*n == 0) {
+	return 0;
+    }
 
-	value = 0.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = 0.;
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		sum += (d__1 = a[i__ + j * a_dim1], abs(d__1));
-/* L30: */
-	    }
-	    value = max(value,sum);
-/* L40: */
+    if (*n == 1) {
+	if (icompz == 2) {
+	    i__1 = z_dim1 + 1;
+	    z__[i__1].r = 1.f, z__[i__1].i = 0.f;
 	}
-    } else if (lsame_(norm, "I")) {
+	return 0;
+    }
 
-/*        Find normI(A). */
+/*     Determine the unit roundoff and over/underflow thresholds. */
 
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    work[i__] = 0.;
-/* L50: */
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1));
-/* L60: */
+    eps = slamch_("E");
+/* Computing 2nd power */
+    r__1 = eps;
+    eps2 = r__1 * r__1;
+    safmin = slamch_("S");
+    safmax = 1.f / safmin;
+    ssfmax = sqrt(safmax) / 3.f;
+    ssfmin = sqrt(safmin) / eps2;
+
+/*
+       Compute the eigenvalues and eigenvectors of the tridiagonal
+       matrix.
+*/
+
+    if (icompz == 2) {
+	claset_("Full", n, n, &c_b55, &c_b56, &z__[z_offset], ldz);
+    }
+
+    nmaxit = *n * 30;
+    jtot = 0;
+
+/*
+       Determine where the matrix splits and choose QL or QR iteration
+       for each block, according to whether top or bottom diagonal
+       element is smaller.
+*/
+
+    l1 = 1;
+    nm1 = *n - 1;
+
+L10:
+    if (l1 > *n) {
+	goto L160;
+    }
+    if (l1 > 1) {
+	e[l1 - 1] = 0.f;
+    }
+    if (l1 <= nm1) {
+	i__1 = nm1;
+	for (m = l1; m <= i__1; ++m) {
+	    tst = (r__1 = e[m], dabs(r__1));
+	    if (tst == 0.f) {
+		goto L30;
 	    }
-/* L70: */
-	}
-	value = 0.;
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	    d__1 = value, d__2 = work[i__];
-	    value = max(d__1,d__2);
-/* L80: */
+	    if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m
+		    + 1], dabs(r__2))) * eps) {
+		e[m] = 0.f;
+		goto L30;
+	    }
+/* L20: */
 	}
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+    }
+    m = *n;
 
-/*        Find normF(A). */
+L30:
+    l = l1;
+    lsv = l;
+    lend = m;
+    lendsv = lend;
+    l1 = m + 1;
+    if (lend == l) {
+	goto L10;
+    }
 
-	scale = 0.;
-	sum = 1.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L90: */
-	}
-	value = scale * sqrt(sum);
+/*     Scale submatrix in rows and columns L to LEND */
+
+    i__1 = lend - l + 1;
+    anorm = slanst_("I", &i__1, &d__[l], &e[l]);
+    iscale = 0;
+    if (anorm == 0.f) {
+	goto L10;
+    }
+    if (anorm > ssfmax) {
+	iscale = 1;
+	i__1 = lend - l + 1;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+		info);
+    } else if (anorm < ssfmin) {
+	iscale = 2;
+	i__1 = lend - l + 1;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+		info);
     }
 
-    ret_val = value;
-    return ret_val;
+/*     Choose between QL and QR iteration */
 
-/*     End of DLANHS */
+    if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) {
+	lend = lsv;
+	l = lendsv;
+    }
 
-} /* dlanhs_ */
+    if (lend > l) {
 
-doublereal dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal ret_val, d__1, d__2, d__3, d__4, d__5;
+/*
+          QL Iteration
 
-    /* Builtin functions */
-    double sqrt(doublereal);
+          Look for small subdiagonal element.
+*/
 
-    /* Local variables */
-    static integer i__;
-    static doublereal sum, scale;
-    extern logical lsame_(char *, char *);
-    static doublereal anorm;
-    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
-	    doublereal *, doublereal *);
+L40:
+	if (l != lend) {
+	    lendm1 = lend - 1;
+	    i__1 = lendm1;
+	    for (m = l; m <= i__1; ++m) {
+/* Computing 2nd power */
+		r__2 = (r__1 = e[m], dabs(r__1));
+		tst = r__2 * r__2;
+		if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
+			+ 1], dabs(r__2)) + safmin) {
+		    goto L60;
+		}
+/* L50: */
+	    }
+	}
 
+	m = lend;
+
+L60:
+	if (m < lend) {
+	    e[m] = 0.f;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L80;
+	}
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+          If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
 
+	if (m == l + 1) {
+	    if (icompz > 0) {
+		slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
+		work[l] = c__;
+		work[*n - 1 + l] = s;
+		clasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
+			z__[l * z_dim1 + 1], ldz);
+	    } else {
+		slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
+	    }
+	    d__[l] = rt1;
+	    d__[l + 1] = rt2;
+	    e[l] = 0.f;
+	    l += 2;
+	    if (l <= lend) {
+		goto L40;
+	    }
+	    goto L140;
+	}
 
-    Purpose
-    =======
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
 
-    DLANST  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    real symmetric tridiagonal matrix A.
+/*        Form shift. */
 
-    Description
-    ===========
+	g = (d__[l + 1] - p) / (e[l] * 2.f);
+	r__ = slapy2_(&g, &c_b871);
+	g = d__[m] - p + e[l] / (g + r_sign(&r__, &g));
 
-    DLANST returns the value
+	s = 1.f;
+	c__ = 1.f;
+	p = 0.f;
 
-       DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+/*        Inner loop */
 
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+	mm1 = m - 1;
+	i__1 = l;
+	for (i__ = mm1; i__ >= i__1; --i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    slartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m - 1) {
+		e[i__ + 1] = r__;
+	    }
+	    g = d__[i__ + 1] - p;
+	    r__ = (d__[i__] - g) * s + c__ * 2.f * b;
+	    p = s * r__;
+	    d__[i__ + 1] = g + p;
+	    g = c__ * r__ - b;
 
-    Arguments
-    =========
+/*           If eigenvectors are desired, then save rotations. */
 
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in DLANST as described
-            above.
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = -s;
+	    }
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, DLANST is
-            set to zero.
+/* L70: */
+	}
 
-    D       (input) DOUBLE PRECISION array, dimension (N)
-            The diagonal elements of A.
+/*        If eigenvectors are desired, then apply saved rotations. */
 
-    E       (input) DOUBLE PRECISION array, dimension (N-1)
-            The (n-1) sub-diagonal or super-diagonal elements of A.
+	if (icompz > 0) {
+	    mm = m - l + 1;
+	    clasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
+		    * z_dim1 + 1], ldz);
+	}
 
-    =====================================================================
-*/
+	d__[l] -= p;
+	e[l] = g;
+	goto L40;
 
+/*        Eigenvalue found. */
 
-    /* Parameter adjustments */
-    --e;
-    --d__;
+L80:
+	d__[l] = p;
 
-    /* Function Body */
-    if (*n <= 0) {
-	anorm = 0.;
-    } else if (lsame_(norm, "M")) {
+	++l;
+	if (l <= lend) {
+	    goto L40;
+	}
+	goto L140;
 
-/*        Find max(abs(A(i,j))). */
+    } else {
 
-	anorm = (d__1 = d__[*n], abs(d__1));
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	    d__2 = anorm, d__3 = (d__1 = d__[i__], abs(d__1));
-	    anorm = max(d__2,d__3);
-/* Computing MAX */
-	    d__2 = anorm, d__3 = (d__1 = e[i__], abs(d__1));
-	    anorm = max(d__2,d__3);
-/* L10: */
-	}
-    } else if (lsame_(norm, "O") || *(unsigned char *)
-	    norm == '1' || lsame_(norm, "I")) {
+/*
+          QR Iteration
 
-/*        Find norm1(A). */
+          Look for small superdiagonal element.
+*/
 
-	if (*n == 1) {
-	    anorm = abs(d__[1]);
-	} else {
-/* Computing MAX */
-	    d__3 = abs(d__[1]) + abs(e[1]), d__4 = (d__1 = e[*n - 1], abs(
-		    d__1)) + (d__2 = d__[*n], abs(d__2));
-	    anorm = max(d__3,d__4);
-	    i__1 = *n - 1;
-	    for (i__ = 2; i__ <= i__1; ++i__) {
-/* Computing MAX */
-		d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[
-			i__], abs(d__2)) + (d__3 = e[i__ - 1], abs(d__3));
-		anorm = max(d__4,d__5);
-/* L20: */
+L90:
+	if (l != lend) {
+	    lendp1 = lend + 1;
+	    i__1 = lendp1;
+	    for (m = l; m >= i__1; --m) {
+/* Computing 2nd power */
+		r__2 = (r__1 = e[m - 1], dabs(r__1));
+		tst = r__2 * r__2;
+		if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
+			- 1], dabs(r__2)) + safmin) {
+		    goto L110;
+		}
+/* L100: */
 	    }
 	}
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
 
-/*        Find normF(A). */
+	m = lend;
 
-	scale = 0.;
-	sum = 1.;
-	if (*n > 1) {
-	    i__1 = *n - 1;
-	    dlassq_(&i__1, &e[1], &c__1, &scale, &sum);
-	    sum *= 2;
+L110:
+	if (m > lend) {
+	    e[m - 1] = 0.f;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L130;
 	}
-	dlassq_(n, &d__[1], &c__1, &scale, &sum);
-	anorm = scale * sqrt(sum);
-    }
 
-    ret_val = anorm;
-    return ret_val;
+/*
+          If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
 
-/*     End of DLANST */
+	if (m == l - 1) {
+	    if (icompz > 0) {
+		slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
+			;
+		work[m] = c__;
+		work[*n - 1 + m] = s;
+		clasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
+			z__[(l - 1) * z_dim1 + 1], ldz);
+	    } else {
+		slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
+	    }
+	    d__[l - 1] = rt1;
+	    d__[l] = rt2;
+	    e[l - 1] = 0.f;
+	    l += -2;
+	    if (l >= lend) {
+		goto L90;
+	    }
+	    goto L140;
+	}
 
-} /* dlanst_ */
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
 
-doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer
-	*lda, doublereal *work)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    doublereal ret_val, d__1, d__2, d__3;
+/*        Form shift. */
 
-    /* Builtin functions */
-    double sqrt(doublereal);
+	g = (d__[l - 1] - p) / (e[l - 1] * 2.f);
+	r__ = slapy2_(&g, &c_b871);
+	g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g));
 
-    /* Local variables */
-    static integer i__, j;
-    static doublereal sum, absa, scale;
-    extern logical lsame_(char *, char *);
-    static doublereal value;
-    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
-	    doublereal *, doublereal *);
+	s = 1.f;
+	c__ = 1.f;
+	p = 0.f;
 
+/*        Inner loop */
 
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+	lm1 = l - 1;
+	i__1 = lm1;
+	for (i__ = m; i__ <= i__1; ++i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    slartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m) {
+		e[i__ - 1] = r__;
+	    }
+	    g = d__[i__] - p;
+	    r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b;
+	    p = s * r__;
+	    d__[i__] = g + p;
+	    g = c__ * r__ - b;
 
+/*           If eigenvectors are desired, then save rotations. */
 
-    Purpose
-    =======
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = s;
+	    }
 
-    DLANSY  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    real symmetric matrix A.
+/* L120: */
+	}
 
-    Description
-    ===========
+/*        If eigenvectors are desired, then apply saved rotations. */
 
-    DLANSY returns the value
-
-       DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+	if (icompz > 0) {
+	    mm = l - m + 1;
+	    clasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
+		    * z_dim1 + 1], ldz);
+	}
 
-    Arguments
-    =========
+	d__[l] -= p;
+	e[lm1] = g;
+	goto L90;
 
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in DLANSY as described
-            above.
+/*        Eigenvalue found. */
 
-    UPLO    (input) CHARACTER*1
-            Specifies whether the upper or lower triangular part of the
-            symmetric matrix A is to be referenced.
-            = 'U':  Upper triangular part of A is referenced
-            = 'L':  Lower triangular part of A is referenced
+L130:
+	d__[l] = p;
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, DLANSY is
-            set to zero.
+	--l;
+	if (l >= lend) {
+	    goto L90;
+	}
+	goto L140;
 
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            The symmetric matrix A.  If UPLO = 'U', the leading n by n
-            upper triangular part of A contains the upper triangular part
-            of the matrix A, and the strictly lower triangular part of A
-            is not referenced.  If UPLO = 'L', the leading n by n lower
-            triangular part of A contains the lower triangular part of
-            the matrix A, and the strictly upper triangular part of A is
-            not referenced.
+    }
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(N,1).
+/*     Undo scaling if necessary */
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
-            where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
-            WORK is not referenced.
+L140:
+    if (iscale == 1) {
+	i__1 = lendsv - lsv + 1;
+	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    } else if (iscale == 2) {
+	i__1 = lendsv - lsv + 1;
+	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    }
 
-   =====================================================================
+/*
+       Check for no convergence to an eigenvalue after a total
+       of N*MAXIT iterations.
 */
 
+    if (jtot == nmaxit) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (e[i__] != 0.f) {
+		++(*info);
+	    }
+/* L150: */
+	}
+	return 0;
+    }
+    goto L10;
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --work;
-
-    /* Function Body */
-    if (*n == 0) {
-	value = 0.;
-    } else if (lsame_(norm, "M")) {
+/*     Order eigenvalues and eigenvectors. */
 
-/*        Find max(abs(A(i,j))). */
+L160:
+    if (icompz == 0) {
 
-	value = 0.;
-	if (lsame_(uplo, "U")) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = j;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		    d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(
-			    d__1));
-		    value = max(d__2,d__3);
-/* L10: */
-		}
-/* L20: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *n;
-		for (i__ = j; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		    d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(
-			    d__1));
-		    value = max(d__2,d__3);
-/* L30: */
-		}
-/* L40: */
-	    }
-	}
-    } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') {
+/*        Use Quick Sort */
 
-/*        Find normI(A) ( = norm1(A), since A is symmetric). */
+	slasrt_("I", n, &d__[1], info);
 
-	value = 0.;
-	if (lsame_(uplo, "U")) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		sum = 0.;
-		i__2 = j - 1;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    absa = (d__1 = a[i__ + j * a_dim1], abs(d__1));
-		    sum += absa;
-		    work[i__] += absa;
-/* L50: */
-		}
-		work[j] = sum + (d__1 = a[j + j * a_dim1], abs(d__1));
-/* L60: */
-	    }
-	    i__1 = *n;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-		d__1 = value, d__2 = work[i__];
-		value = max(d__1,d__2);
-/* L70: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (i__ = 1; i__ <= i__1; ++i__) {
-		work[i__] = 0.;
-/* L80: */
-	    }
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		sum = work[j] + (d__1 = a[j + j * a_dim1], abs(d__1));
-		i__2 = *n;
-		for (i__ = j + 1; i__ <= i__2; ++i__) {
-		    absa = (d__1 = a[i__ + j * a_dim1], abs(d__1));
-		    sum += absa;
-		    work[i__] += absa;
-/* L90: */
-		}
-		value = max(value,sum);
-/* L100: */
-	    }
-	}
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+    } else {
 
-/*        Find normF(A). */
+/*        Use Selection Sort to minimize swaps of eigenvectors */
 
-	scale = 0.;
-	sum = 1.;
-	if (lsame_(uplo, "U")) {
-	    i__1 = *n;
-	    for (j = 2; j <= i__1; ++j) {
-		i__2 = j - 1;
-		dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L110: */
+	i__1 = *n;
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
+	    i__2 = *n;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
+		}
+/* L170: */
 	    }
-	} else {
-	    i__1 = *n - 1;
-	    for (j = 1; j <= i__1; ++j) {
-		i__2 = *n - j;
-		dlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum);
-/* L120: */
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
 	    }
+/* L180: */
 	}
-	sum *= 2;
-	i__1 = *lda + 1;
-	dlassq_(n, &a[a_offset], &i__1, &scale, &sum);
-	value = scale * sqrt(sum);
     }
+    return 0;
 
-    ret_val = value;
-    return ret_val;
-
-/*     End of DLANSY */
+/*     End of CSTEQR */
 
-} /* dlansy_ */
+} /* csteqr_ */
 
-/* Subroutine */ int dlanv2_(doublereal *a, doublereal *b, doublereal *c__,
-	doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r,
-	 doublereal *rt2i, doublereal *cs, doublereal *sn)
+/* Subroutine */ int ctrevc_(char *side, char *howmny, logical *select,
+	integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl,
+	complex *vr, integer *ldvr, integer *mm, integer *m, complex *work,
+	real *rwork, integer *info)
 {
     /* System generated locals */
-    doublereal d__1, d__2;
+    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3, i__4, i__5;
+    real r__1, r__2, r__3;
+    complex q__1, q__2;
 
     /* Builtin functions */
-    double d_sign(doublereal *, doublereal *), sqrt(doublereal);
+    double r_imag(complex *);
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
-    static doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau,
-	    temp, scale, bcmax, bcmis, sigma;
-
+    static integer i__, j, k, ii, ki, is;
+    static real ulp;
+    static logical allv;
+    static real unfl, ovfl, smin;
+    static logical over;
+    static real scale;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *);
+    static real remax;
+    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static logical leftv, bothv, somev;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    extern integer icamax_(integer *, complex *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *), xerbla_(char *, integer *), clatrs_(char *, char *,
+	    char *, char *, integer *, complex *, integer *, complex *, real *
+	    , real *, integer *);
+    extern doublereal scasum_(integer *, complex *, integer *);
+    static logical rightv;
+    static real smlnum;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
        June 30, 1999
@@ -16643,1853 +19927,58533 @@ doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer
     Purpose
     =======
 
-    DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric
-    matrix in standard form:
+    CTREVC computes some or all of the right and/or left eigenvectors of
+    a complex upper triangular matrix T.
 
-         [ A  B ] = [ CS -SN ] [ AA  BB ] [ CS  SN ]
-         [ C  D ]   [ SN  CS ] [ CC  DD ] [-SN  CS ]
+    The right eigenvector x and the left eigenvector y of T corresponding
+    to an eigenvalue w are defined by:
 
-    where either
-    1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or
-    2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex
-    conjugate eigenvalues.
+                 T*x = w*x,     y'*T = w*y'
 
-    Arguments
-    =========
+    where y' denotes the conjugate transpose of the vector y.
 
-    A       (input/output) DOUBLE PRECISION
-    B       (input/output) DOUBLE PRECISION
-    C       (input/output) DOUBLE PRECISION
-    D       (input/output) DOUBLE PRECISION
-            On entry, the elements of the input matrix.
-            On exit, they are overwritten by the elements of the
-            standardised Schur form.
+    If all eigenvectors are requested, the routine may either return the
+    matrices X and/or Y of right or left eigenvectors of T, or the
+    products Q*X and/or Q*Y, where Q is an input unitary
+    matrix. If T was obtained from the Schur factorization of an
+    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
+    right or left eigenvectors of A.
 
-    RT1R    (output) DOUBLE PRECISION
-    RT1I    (output) DOUBLE PRECISION
-    RT2R    (output) DOUBLE PRECISION
-    RT2I    (output) DOUBLE PRECISION
-            The real and imaginary parts of the eigenvalues. If the
-            eigenvalues are a complex conjugate pair, RT1I > 0.
+    Arguments
+    =========
 
-    CS      (output) DOUBLE PRECISION
-    SN      (output) DOUBLE PRECISION
-            Parameters of the rotation matrix.
+    SIDE    (input) CHARACTER*1
+            = 'R':  compute right eigenvectors only;
+            = 'L':  compute left eigenvectors only;
+            = 'B':  compute both right and left eigenvectors.
 
-    Further Details
-    ===============
+    HOWMNY  (input) CHARACTER*1
+            = 'A':  compute all right and/or left eigenvectors;
+            = 'B':  compute all right and/or left eigenvectors,
+                    and backtransform them using the input matrices
+                    supplied in VR and/or VL;
+            = 'S':  compute selected right and/or left eigenvectors,
+                    specified by the logical array SELECT.
 
-    Modified by V. Sima, Research Institute for Informatics, Bucharest,
-    Romania, to reduce the risk of cancellation errors,
-    when computing real eigenvalues, and to ensure, if possible, that
-    abs(RT1R) >= abs(RT2R).
+    SELECT  (input) LOGICAL array, dimension (N)
+            If HOWMNY = 'S', SELECT specifies the eigenvectors to be
+            computed.
+            If HOWMNY = 'A' or 'B', SELECT is not referenced.
+            To select the eigenvector corresponding to the j-th
+            eigenvalue, SELECT(j) must be set to .TRUE..
 
-    =====================================================================
-*/
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
 
+    T       (input/output) COMPLEX array, dimension (LDT,N)
+            The upper triangular matrix T.  T is modified, but restored
+            on exit.
 
-    eps = PRECISION;
-    if (*c__ == 0.) {
-	*cs = 1.;
-	*sn = 0.;
-	goto L10;
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
 
-    } else if (*b == 0.) {
+    VL      (input/output) COMPLEX array, dimension (LDVL,MM)
+            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+            contain an N-by-N matrix Q (usually the unitary matrix Q of
+            Schur vectors returned by CHSEQR).
+            On exit, if SIDE = 'L' or 'B', VL contains:
+            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
+                             VL is lower triangular. The i-th column
+                             VL(i) of VL is the eigenvector corresponding
+                             to T(i,i).
+            if HOWMNY = 'B', the matrix Q*Y;
+            if HOWMNY = 'S', the left eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VL, in the same order as their
+                             eigenvalues.
+            If SIDE = 'R', VL is not referenced.
 
-/*        Swap rows and columns */
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= max(1,N) if
+            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
 
-	*cs = 0.;
-	*sn = 1.;
-	temp = *d__;
-	*d__ = *a;
-	*a = temp;
-	*b = -(*c__);
-	*c__ = 0.;
-	goto L10;
-    } else if (*a - *d__ == 0. && d_sign(&c_b15, b) != d_sign(&c_b15, c__)) {
-	*cs = 1.;
-	*sn = 0.;
-	goto L10;
-    } else {
+    VR      (input/output) COMPLEX array, dimension (LDVR,MM)
+            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+            contain an N-by-N matrix Q (usually the unitary matrix Q of
+            Schur vectors returned by CHSEQR).
+            On exit, if SIDE = 'R' or 'B', VR contains:
+            if HOWMNY = 'A', the matrix X of right eigenvectors of T;
+                             VR is upper triangular. The i-th column
+                             VR(i) of VR is the eigenvector corresponding
+                             to T(i,i).
+            if HOWMNY = 'B', the matrix Q*X;
+            if HOWMNY = 'S', the right eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VR, in the same order as their
+                             eigenvalues.
+            If SIDE = 'L', VR is not referenced.
 
-	temp = *a - *d__;
-	p = temp * .5;
-/* Computing MAX */
-	d__1 = abs(*b), d__2 = abs(*c__);
-	bcmax = max(d__1,d__2);
-/* Computing MIN */
-	d__1 = abs(*b), d__2 = abs(*c__);
-	bcmis = min(d__1,d__2) * d_sign(&c_b15, b) * d_sign(&c_b15, c__);
-/* Computing MAX */
-	d__1 = abs(p);
-	scale = max(d__1,bcmax);
-	z__ = p / scale * p + bcmax / scale * bcmis;
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= max(1,N) if
+             SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
 
-/*
-          If Z is of the order of the machine accuracy, postpone the
-          decision on the nature of eigenvalues
-*/
+    MM      (input) INTEGER
+            The number of columns in the arrays VL and/or VR. MM >= M.
 
-	if (z__ >= eps * 4.) {
+    M       (output) INTEGER
+            The number of columns in the arrays VL and/or VR actually
+            used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
+            is set to N.  Each selected eigenvector occupies one
+            column.
 
-/*           Real eigenvalues. Compute A and D. */
+    WORK    (workspace) COMPLEX array, dimension (2*N)
 
-	    d__1 = sqrt(scale) * sqrt(z__);
-	    z__ = p + d_sign(&d__1, &p);
-	    *a = *d__ + z__;
-	    *d__ -= bcmax / z__ * bcmis;
+    RWORK   (workspace) REAL array, dimension (N)
 
-/*           Compute B and the rotation matrix */
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
 
-	    tau = dlapy2_(c__, &z__);
-	    *cs = z__ / tau;
-	    *sn = *c__ / tau;
-	    *b -= *c__;
-	    *c__ = 0.;
-	} else {
+    Further Details
+    ===============
 
-/*
-             Complex eigenvalues, or real (almost) equal eigenvalues.
-             Make diagonal elements equal.
-*/
+    The algorithm used in this program is basically backward (forward)
+    substitution, with scaling to make the the code robust against
+    possible overflow.
 
-	    sigma = *b + *c__;
-	    tau = dlapy2_(&sigma, &temp);
-	    *cs = sqrt((abs(sigma) / tau + 1.) * .5);
-	    *sn = -(p / (tau * *cs)) * d_sign(&c_b15, &sigma);
+    Each eigenvector is normalized so that the element of largest
+    magnitude has magnitude 1; here the magnitude of a complex number
+    (x,y) is taken to be |x| + |y|.
 
-/*
-             Compute [ AA  BB ] = [ A  B ] [ CS -SN ]
-                     [ CC  DD ]   [ C  D ] [ SN  CS ]
-*/
+    =====================================================================
 
-	    aa = *a * *cs + *b * *sn;
-	    bb = -(*a) * *sn + *b * *cs;
-	    cc = *c__ * *cs + *d__ * *sn;
-	    dd = -(*c__) * *sn + *d__ * *cs;
 
-/*
-             Compute [ A  B ] = [ CS  SN ] [ AA  BB ]
-                     [ C  D ]   [-SN  CS ] [ CC  DD ]
+       Decode and test the input parameters
 */
 
-	    *a = aa * *cs + cc * *sn;
-	    *b = bb * *cs + dd * *sn;
-	    *c__ = -aa * *sn + cc * *cs;
-	    *d__ = -bb * *sn + dd * *cs;
+    /* Parameter adjustments */
+    --select;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+    --rwork;
 
-	    temp = (*a + *d__) * .5;
-	    *a = temp;
-	    *d__ = temp;
+    /* Function Body */
+    bothv = lsame_(side, "B");
+    rightv = lsame_(side, "R") || bothv;
+    leftv = lsame_(side, "L") || bothv;
 
-	    if (*c__ != 0.) {
-		if (*b != 0.) {
-		    if (d_sign(&c_b15, b) == d_sign(&c_b15, c__)) {
+    allv = lsame_(howmny, "A");
+    over = lsame_(howmny, "B");
+    somev = lsame_(howmny, "S");
 
-/*                    Real eigenvalues: reduce to upper triangular form */
+/*
+       Set M to the number of columns required to store the selected
+       eigenvectors.
+*/
 
-			sab = sqrt((abs(*b)));
-			sac = sqrt((abs(*c__)));
-			d__1 = sab * sac;
-			p = d_sign(&d__1, c__);
-			tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1)));
-			*a = temp + p;
-			*d__ = temp - p;
-			*b -= *c__;
-			*c__ = 0.;
-			cs1 = sab * tau;
-			sn1 = sac * tau;
-			temp = *cs * cs1 - *sn * sn1;
-			*sn = *cs * sn1 + *sn * cs1;
-			*cs = temp;
-		    }
-		} else {
-		    *b = -(*c__);
-		    *c__ = 0.;
-		    temp = *cs;
-		    *cs = -(*sn);
-		    *sn = temp;
-		}
+    if (somev) {
+	*m = 0;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (select[j]) {
+		++(*m);
 	    }
+/* L10: */
 	}
-
+    } else {
+	*m = *n;
     }
 
-L10:
-
-/*     Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */
-
-    *rt1r = *a;
-    *rt2r = *d__;
-    if (*c__ == 0.) {
-	*rt1i = 0.;
-	*rt2i = 0.;
-    } else {
-	*rt1i = sqrt((abs(*b))) * sqrt((abs(*c__)));
-	*rt2i = -(*rt1i);
+    *info = 0;
+    if (! rightv && ! leftv) {
+	*info = -1;
+    } else if (! allv && ! over && ! somev) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ldt < max(1,*n)) {
+	*info = -6;
+    } else if (*ldvl < 1 || leftv && *ldvl < *n) {
+	*info = -8;
+    } else if (*ldvr < 1 || rightv && *ldvr < *n) {
+	*info = -10;
+    } else if (*mm < *m) {
+	*info = -11;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CTREVC", &i__1);
+	return 0;
     }
-    return 0;
 
-/*     End of DLANV2 */
+/*     Quick return if possible. */
 
-} /* dlanv2_ */
+    if (*n == 0) {
+	return 0;
+    }
 
-doublereal dlapy2_(doublereal *x, doublereal *y)
-{
-    /* System generated locals */
-    doublereal ret_val, d__1;
+/*     Set the constants to control overflow. */
 
-    /* Builtin functions */
-    double sqrt(doublereal);
+    unfl = slamch_("Safe minimum");
+    ovfl = 1.f / unfl;
+    slabad_(&unfl, &ovfl);
+    ulp = slamch_("Precision");
+    smlnum = unfl * (*n / ulp);
 
-    /* Local variables */
-    static doublereal w, z__, xabs, yabs;
+/*     Store the diagonal elements of T in working array WORK. */
 
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__ + *n;
+	i__3 = i__ + i__ * t_dim1;
+	work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i;
+/* L20: */
+    }
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
+       Compute 1-norm of each column of strictly upper triangular
+       part of T to control overflow in triangular solver.
+*/
 
-    Purpose
-    =======
+    rwork[1] = 0.f;
+    i__1 = *n;
+    for (j = 2; j <= i__1; ++j) {
+	i__2 = j - 1;
+	rwork[j] = scasum_(&i__2, &t[j * t_dim1 + 1], &c__1);
+/* L30: */
+    }
 
-    DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
-    overflow.
+    if (rightv) {
 
-    Arguments
-    =========
+/*        Compute right eigenvectors. */
 
-    X       (input) DOUBLE PRECISION
-    Y       (input) DOUBLE PRECISION
-            X and Y specify the values x and y.
+	is = *m;
+	for (ki = *n; ki >= 1; --ki) {
 
-    =====================================================================
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L80;
+		}
+	    }
+/* Computing MAX */
+	    i__1 = ki + ki * t_dim1;
+	    r__3 = ulp * ((r__1 = t[i__1].r, dabs(r__1)) + (r__2 = r_imag(&t[
+		    ki + ki * t_dim1]), dabs(r__2)));
+	    smin = dmax(r__3,smlnum);
+
+	    work[1].r = 1.f, work[1].i = 0.f;
+
+/*           Form right-hand side. */
+
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k;
+		i__3 = k + ki * t_dim1;
+		q__1.r = -t[i__3].r, q__1.i = -t[i__3].i;
+		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
+/* L40: */
+	    }
+
+/*
+             Solve the triangular system:
+                (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK.
 */
 
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k + k * t_dim1;
+		i__3 = k + k * t_dim1;
+		i__4 = ki + ki * t_dim1;
+		q__1.r = t[i__3].r - t[i__4].r, q__1.i = t[i__3].i - t[i__4]
+			.i;
+		t[i__2].r = q__1.r, t[i__2].i = q__1.i;
+		i__2 = k + k * t_dim1;
+		if ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k *
+			 t_dim1]), dabs(r__2)) < smin) {
+		    i__3 = k + k * t_dim1;
+		    t[i__3].r = smin, t[i__3].i = 0.f;
+		}
+/* L50: */
+	    }
 
-    xabs = abs(*x);
-    yabs = abs(*y);
-    w = max(xabs,yabs);
-    z__ = min(xabs,yabs);
-    if (z__ == 0.) {
-	ret_val = w;
-    } else {
-/* Computing 2nd power */
-	d__1 = z__ / w;
-	ret_val = w * sqrt(d__1 * d__1 + 1.);
+	    if (ki > 1) {
+		i__1 = ki - 1;
+		clatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[
+			t_offset], ldt, &work[1], &scale, &rwork[1], info);
+		i__1 = ki;
+		work[i__1].r = scale, work[i__1].i = 0.f;
+	    }
+
+/*           Copy the vector x or Q*x to VR and normalize. */
+
+	    if (! over) {
+		ccopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1);
+
+		ii = icamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
+		i__1 = ii + is * vr_dim1;
+		remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 =
+			r_imag(&vr[ii + is * vr_dim1]), dabs(r__2)));
+		csscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+
+		i__1 = *n;
+		for (k = ki + 1; k <= i__1; ++k) {
+		    i__2 = k + is * vr_dim1;
+		    vr[i__2].r = 0.f, vr[i__2].i = 0.f;
+/* L60: */
+		}
+	    } else {
+		if (ki > 1) {
+		    i__1 = ki - 1;
+		    q__1.r = scale, q__1.i = 0.f;
+		    cgemv_("N", n, &i__1, &c_b56, &vr[vr_offset], ldvr, &work[
+			    1], &c__1, &q__1, &vr[ki * vr_dim1 + 1], &c__1);
+		}
+
+		ii = icamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
+		i__1 = ii + ki * vr_dim1;
+		remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 =
+			r_imag(&vr[ii + ki * vr_dim1]), dabs(r__2)));
+		csscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+	    }
+
+/*           Set back the original diagonal elements of T. */
+
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k + k * t_dim1;
+		i__3 = k + *n;
+		t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i;
+/* L70: */
+	    }
+
+	    --is;
+L80:
+	    ;
+	}
     }
-    return ret_val;
 
-/*     End of DLAPY2 */
+    if (leftv) {
 
-} /* dlapy2_ */
+/*        Compute left eigenvectors. */
 
-doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
-{
-    /* System generated locals */
-    doublereal ret_val, d__1, d__2, d__3;
+	is = 1;
+	i__1 = *n;
+	for (ki = 1; ki <= i__1; ++ki) {
 
-    /* Builtin functions */
-    double sqrt(doublereal);
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L130;
+		}
+	    }
+/* Computing MAX */
+	    i__2 = ki + ki * t_dim1;
+	    r__3 = ulp * ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[
+		    ki + ki * t_dim1]), dabs(r__2)));
+	    smin = dmax(r__3,smlnum);
 
-    /* Local variables */
-    static doublereal w, xabs, yabs, zabs;
+	    i__2 = *n;
+	    work[i__2].r = 1.f, work[i__2].i = 0.f;
+
+/*           Form right-hand side. */
 
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k;
+		r_cnjg(&q__2, &t[ki + k * t_dim1]);
+		q__1.r = -q__2.r, q__1.i = -q__2.i;
+		work[i__3].r = q__1.r, work[i__3].i = q__1.i;
+/* L90: */
+	    }
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+             Solve the triangular system:
+                (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK.
+*/
 
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k + k * t_dim1;
+		i__4 = k + k * t_dim1;
+		i__5 = ki + ki * t_dim1;
+		q__1.r = t[i__4].r - t[i__5].r, q__1.i = t[i__4].i - t[i__5]
+			.i;
+		t[i__3].r = q__1.r, t[i__3].i = q__1.i;
+		i__3 = k + k * t_dim1;
+		if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k *
+			 t_dim1]), dabs(r__2)) < smin) {
+		    i__4 = k + k * t_dim1;
+		    t[i__4].r = smin, t[i__4].i = 0.f;
+		}
+/* L100: */
+	    }
 
-    Purpose
-    =======
+	    if (ki < *n) {
+		i__2 = *n - ki;
+		clatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", &
+			i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki +
+			1], &scale, &rwork[1], info);
+		i__2 = ki;
+		work[i__2].r = scale, work[i__2].i = 0.f;
+	    }
 
-    DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause
-    unnecessary overflow.
+/*           Copy the vector x or Q*x to VL and normalize. */
 
-    Arguments
-    =========
+	    if (! over) {
+		i__2 = *n - ki + 1;
+		ccopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1)
+			;
 
-    X       (input) DOUBLE PRECISION
-    Y       (input) DOUBLE PRECISION
-    Z       (input) DOUBLE PRECISION
-            X, Y and Z specify the values x, y and z.
+		i__2 = *n - ki + 1;
+		ii = icamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1;
+		i__2 = ii + is * vl_dim1;
+		remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 =
+			r_imag(&vl[ii + is * vl_dim1]), dabs(r__2)));
+		i__2 = *n - ki + 1;
+		csscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
 
-    =====================================================================
-*/
+		i__2 = ki - 1;
+		for (k = 1; k <= i__2; ++k) {
+		    i__3 = k + is * vl_dim1;
+		    vl[i__3].r = 0.f, vl[i__3].i = 0.f;
+/* L110: */
+		}
+	    } else {
+		if (ki < *n) {
+		    i__2 = *n - ki;
+		    q__1.r = scale, q__1.i = 0.f;
+		    cgemv_("N", n, &i__2, &c_b56, &vl[(ki + 1) * vl_dim1 + 1],
+			     ldvl, &work[ki + 1], &c__1, &q__1, &vl[ki *
+			    vl_dim1 + 1], &c__1);
+		}
 
+		ii = icamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
+		i__2 = ii + ki * vl_dim1;
+		remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 =
+			r_imag(&vl[ii + ki * vl_dim1]), dabs(r__2)));
+		csscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+	    }
 
-    xabs = abs(*x);
-    yabs = abs(*y);
-    zabs = abs(*z__);
-/* Computing MAX */
-    d__1 = max(xabs,yabs);
-    w = max(d__1,zabs);
-    if (w == 0.) {
-	ret_val = 0.;
-    } else {
-/* Computing 2nd power */
-	d__1 = xabs / w;
-/* Computing 2nd power */
-	d__2 = yabs / w;
-/* Computing 2nd power */
-	d__3 = zabs / w;
-	ret_val = w * sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3);
+/*           Set back the original diagonal elements of T. */
+
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k + k * t_dim1;
+		i__4 = k + *n;
+		t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i;
+/* L120: */
+	    }
+
+	    ++is;
+L130:
+	    ;
+	}
     }
-    return ret_val;
 
-/*     End of DLAPY3 */
+    return 0;
 
-} /* dlapy3_ */
+/*     End of CTREVC */
 
-/* Subroutine */ int dlarf_(char *side, integer *m, integer *n, doublereal *v,
-	 integer *incv, doublereal *tau, doublereal *c__, integer *ldc,
-	doublereal *work)
+} /* ctrevc_ */
+
+/* Subroutine */ int ctrti2_(char *uplo, char *diag, integer *n, complex *a,
+	integer *lda, integer *info)
 {
     /* System generated locals */
-    integer c_dim1, c_offset;
-    doublereal d__1;
+    integer a_dim1, a_offset, i__1, i__2;
+    complex q__1;
+
+    /* Builtin functions */
+    void c_div(complex *, complex *, complex *);
 
     /* Local variables */
-    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+    static integer j;
+    static complex ajj;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
 	    integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *,
+	    complex *, integer *, complex *, integer *), xerbla_(char *, integer *);
+    static logical nounit;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DLARF applies a real elementary reflector H to a real m by n matrix
-    C, from either the left or the right. H is represented in the form
-
-          H = I - tau * v * v'
-
-    where tau is a real scalar and v is a real vector.
+    CTRTI2 computes the inverse of a complex upper or lower triangular
+    matrix.
 
-    If tau = 0, then H is taken to be the unit matrix.
+    This is the Level 2 BLAS version of the algorithm.
 
     Arguments
     =========
 
-    SIDE    (input) CHARACTER*1
-            = 'L': form  H * C
-            = 'R': form  C * H
+    UPLO    (input) CHARACTER*1
+            Specifies whether the matrix A is upper or lower triangular.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
 
-    M       (input) INTEGER
-            The number of rows of the matrix C.
+    DIAG    (input) CHARACTER*1
+            Specifies whether or not the matrix A is unit triangular.
+            = 'N':  Non-unit triangular
+            = 'U':  Unit triangular
 
     N       (input) INTEGER
-            The number of columns of the matrix C.
+            The order of the matrix A.  N >= 0.
 
-    V       (input) DOUBLE PRECISION array, dimension
-                       (1 + (M-1)*abs(INCV)) if SIDE = 'L'
-                    or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
-            The vector v in the representation of H. V is not used if
-            TAU = 0.
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading n by n upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
 
-    INCV    (input) INTEGER
-            The increment between elements of v. INCV <> 0.
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
 
-    TAU     (input) DOUBLE PRECISION
-            The value tau in the representation of H.
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the m by n matrix C.
-            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
-            or C * H if SIDE = 'R'.
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
 
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDC >= max(1,M).
+    =====================================================================
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension
-                           (N) if SIDE = 'L'
-                        or (M) if SIDE = 'R'
 
-    =====================================================================
+       Test the input parameters.
 */
 
-
     /* Parameter adjustments */
-    --v;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
     /* Function Body */
-    if (lsame_(side, "L")) {
-
-/*        Form  H * C */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CTRTI2", &i__1);
+	return 0;
+    }
 
-	if (*tau != 0.) {
+    if (upper) {
 
-/*           w := C' * v */
+/*        Compute inverse of upper triangular matrix. */
 
-	    dgemv_("Transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1],
-		    incv, &c_b29, &work[1], &c__1);
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (nounit) {
+		i__2 = j + j * a_dim1;
+		c_div(&q__1, &c_b56, &a[j + j * a_dim1]);
+		a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+		i__2 = j + j * a_dim1;
+		q__1.r = -a[i__2].r, q__1.i = -a[i__2].i;
+		ajj.r = q__1.r, ajj.i = q__1.i;
+	    } else {
+		q__1.r = -1.f, q__1.i = -0.f;
+		ajj.r = q__1.r, ajj.i = q__1.i;
+	    }
 
-/*           C := C - v * w' */
+/*           Compute elements 1:j-1 of j-th column. */
 
-	    d__1 = -(*tau);
-	    dger_(m, n, &d__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
-		    ldc);
+	    i__2 = j - 1;
+	    ctrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, &
+		    a[j * a_dim1 + 1], &c__1);
+	    i__2 = j - 1;
+	    cscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1);
+/* L10: */
 	}
     } else {
 
-/*        Form  C * H */
-
-	if (*tau != 0.) {
-
-/*           w := C * v */
+/*        Compute inverse of lower triangular matrix. */
 
-	    dgemv_("No transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1],
-		    incv, &c_b29, &work[1], &c__1);
+	for (j = *n; j >= 1; --j) {
+	    if (nounit) {
+		i__1 = j + j * a_dim1;
+		c_div(&q__1, &c_b56, &a[j + j * a_dim1]);
+		a[i__1].r = q__1.r, a[i__1].i = q__1.i;
+		i__1 = j + j * a_dim1;
+		q__1.r = -a[i__1].r, q__1.i = -a[i__1].i;
+		ajj.r = q__1.r, ajj.i = q__1.i;
+	    } else {
+		q__1.r = -1.f, q__1.i = -0.f;
+		ajj.r = q__1.r, ajj.i = q__1.i;
+	    }
+	    if (j < *n) {
 
-/*           C := C - w * v' */
+/*              Compute elements j+1:n of j-th column. */
 
-	    d__1 = -(*tau);
-	    dger_(m, n, &d__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
-		    ldc);
+		i__1 = *n - j;
+		ctrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j +
+			1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1);
+		i__1 = *n - j;
+		cscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+/* L20: */
 	}
     }
+
     return 0;
 
-/*     End of DLARF */
+/*     End of CTRTI2 */
 
-} /* dlarf_ */
+} /* ctrti2_ */
 
-/* Subroutine */ int dlarfb_(char *side, char *trans, char *direct, char *
-	storev, integer *m, integer *n, integer *k, doublereal *v, integer *
-	ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc,
-	doublereal *work, integer *ldwork)
+/* Subroutine */ int ctrtri_(char *uplo, char *diag, integer *n, complex *a,
+	integer *lda, integer *info)
 {
     /* System generated locals */
-    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
-	    work_offset, i__1, i__2;
+    address a__1[2];
+    integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5;
+    complex q__1;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
 
     /* Local variables */
-    static integer i__, j;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
+    static integer j, jb, nb, nn;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dtrmm_(char *, char *, char *, char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static char transt[1];
+    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *), ctrsm_(char *, char *,
+	     char *, char *, integer *, integer *, complex *, complex *,
+	    integer *, complex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int ctrti2_(char *, char *, integer *, complex *,
+	    integer *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical nounit;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+       September 30, 1994
 
 
     Purpose
     =======
 
-    DLARFB applies a real block reflector H or its transpose H' to a
-    real m by n matrix C, from either the left or the right.
+    CTRTRI computes the inverse of a complex upper or lower triangular
+    matrix A.
+
+    This is the Level 3 BLAS version of the algorithm.
 
     Arguments
     =========
 
-    SIDE    (input) CHARACTER*1
-            = 'L': apply H or H' from the Left
-            = 'R': apply H or H' from the Right
-
-    TRANS   (input) CHARACTER*1
-            = 'N': apply H (No transpose)
-            = 'T': apply H' (Transpose)
-
-    DIRECT  (input) CHARACTER*1
-            Indicates how H is formed from a product of elementary
-            reflectors
-            = 'F': H = H(1) H(2) . . . H(k) (Forward)
-            = 'B': H = H(k) . . . H(2) H(1) (Backward)
-
-    STOREV  (input) CHARACTER*1
-            Indicates how the vectors which define the elementary
-            reflectors are stored:
-            = 'C': Columnwise
-            = 'R': Rowwise
+    UPLO    (input) CHARACTER*1
+            = 'U':  A is upper triangular;
+            = 'L':  A is lower triangular.
 
-    M       (input) INTEGER
-            The number of rows of the matrix C.
+    DIAG    (input) CHARACTER*1
+            = 'N':  A is non-unit triangular;
+            = 'U':  A is unit triangular.
 
     N       (input) INTEGER
-            The number of columns of the matrix C.
-
-    K       (input) INTEGER
-            The order of the matrix T (= the number of elementary
-            reflectors whose product defines the block reflector).
-
-    V       (input) DOUBLE PRECISION array, dimension
-                                  (LDV,K) if STOREV = 'C'
-                                  (LDV,M) if STOREV = 'R' and SIDE = 'L'
-                                  (LDV,N) if STOREV = 'R' and SIDE = 'R'
-            The matrix V. See further details.
-
-    LDV     (input) INTEGER
-            The leading dimension of the array V.
-            If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
-            if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
-            if STOREV = 'R', LDV >= K.
-
-    T       (input) DOUBLE PRECISION array, dimension (LDT,K)
-            The triangular k by k matrix T in the representation of the
-            block reflector.
-
-    LDT     (input) INTEGER
-            The leading dimension of the array T. LDT >= K.
-
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the m by n matrix C.
-            On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+            The order of the matrix A.  N >= 0.
 
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDA >= max(1,M).
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,K)
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-    LDWORK  (input) INTEGER
-            The leading dimension of the array WORK.
-            If SIDE = 'L', LDWORK >= max(1,N);
-            if SIDE = 'R', LDWORK >= max(1,M).
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
+                 matrix is singular and its inverse can not be computed.
 
     =====================================================================
 
 
-       Quick return if possible
+       Test the input parameters.
 */
 
     /* Parameter adjustments */
-    v_dim1 = *ldv;
-    v_offset = 1 + v_dim1;
-    v -= v_offset;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    work_dim1 = *ldwork;
-    work_offset = 1 + work_dim1;
-    work -= work_offset;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
     /* Function Body */
-    if (*m <= 0 || *n <= 0) {
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CTRTRI", &i__1);
 	return 0;
     }
 
-    if (lsame_(trans, "N")) {
-	*(unsigned char *)transt = 'T';
-    } else {
-	*(unsigned char *)transt = 'N';
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
     }
 
-    if (lsame_(storev, "C")) {
+/*     Check for singularity if non-unit. */
 
-	if (lsame_(direct, "F")) {
+    if (nounit) {
+	i__1 = *n;
+	for (*info = 1; *info <= i__1; ++(*info)) {
+	    i__2 = *info + *info * a_dim1;
+	    if (a[i__2].r == 0.f && a[i__2].i == 0.f) {
+		return 0;
+	    }
+/* L10: */
+	}
+	*info = 0;
+    }
 
 /*
-             Let  V =  ( V1 )    (first K rows)
-                       ( V2 )
-             where  V1  is unit lower triangular.
+       Determine the block size for this environment.
+
+   Writing concatenation
 */
+    i__3[0] = 1, a__1[0] = uplo;
+    i__3[1] = 1, a__1[1] = diag;
+    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+    nb = ilaenv_(&c__1, "CTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (nb <= 1 || nb >= *n) {
 
-	    if (lsame_(side, "L")) {
+/*        Use unblocked code */
 
-/*
-                Form  H * C  or  H' * C  where  C = ( C1 )
-                                                    ( C2 )
+	ctrti2_(uplo, diag, n, &a[a_offset], lda, info);
+    } else {
 
-                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+/*        Use blocked code */
 
-                W := C1'
-*/
+	if (upper) {
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
-/* L10: */
-		}
+/*           Compute inverse of upper triangular matrix */
 
-/*              W := W * V1 */
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+		i__4 = nb, i__5 = *n - j + 1;
+		jb = min(i__4,i__5);
+
+/*              Compute rows 1:j-1 of current block column */
+
+		i__4 = j - 1;
+		ctrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
+			c_b56, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
+		i__4 = j - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		ctrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
+			q__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1],
+			lda);
 
-		dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b15,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
+/*              Compute inverse of current diagonal block */
 
-/*                 W := W + C2'*V2 */
+		ctrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L20: */
+	    }
+	} else {
 
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b15, &
-			    c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1],
-			    ldv, &c_b15, &work[work_offset], ldwork);
-		}
+/*           Compute inverse of lower triangular matrix */
 
-/*              W := W * T'  or  W * T */
+	    nn = (*n - 1) / nb * nb + 1;
+	    i__2 = -nb;
+	    for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) {
+/* Computing MIN */
+		i__1 = nb, i__4 = *n - j + 1;
+		jb = min(i__1,i__4);
+		if (j + jb <= *n) {
 
-		dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+/*                 Compute rows j+jb:n of current block column */
 
-/*              C := C - V * W' */
+		    i__1 = *n - j - jb + 1;
+		    ctrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
+			    &c_b56, &a[j + jb + (j + jb) * a_dim1], lda, &a[j
+			    + jb + j * a_dim1], lda);
+		    i__1 = *n - j - jb + 1;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    ctrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
+			     &q__1, &a[j + j * a_dim1], lda, &a[j + jb + j *
+			    a_dim1], lda);
+		}
 
-		if (*m > *k) {
+/*              Compute inverse of current diagonal block */
 
-/*                 C2 := C2 - V2 * W' */
+		ctrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L30: */
+	    }
+	}
+    }
 
-		    i__1 = *m - *k;
-		    dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b151,
-			    &v[*k + 1 + v_dim1], ldv, &work[work_offset],
-			    ldwork, &c_b15, &c__[*k + 1 + c_dim1], ldc);
-		}
+    return 0;
 
-/*              W := W * V1' */
+/*     End of CTRTRI */
 
-		dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b15, &
-			v[v_offset], ldv, &work[work_offset], ldwork);
+} /* ctrtri_ */
 
-/*              C1 := C1 - W' */
+/* Subroutine */ int cung2r_(integer *m, integer *n, integer *k, complex *a,
+	integer *lda, complex *tau, complex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    complex q__1;
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
-/* L20: */
-		    }
-/* L30: */
-		}
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *), clarf_(char *, integer *, integer *, complex *,
+	    integer *, complex *, complex *, integer *, complex *),
+	    xerbla_(char *, integer *);
 
-	    } else if (lsame_(side, "R")) {
 
 /*
-                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 
-                W := C1
-*/
+    Purpose
+    =======
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
-			    work_dim1 + 1], &c__1);
-/* L40: */
-		}
+    CUNG2R generates an m by n complex matrix Q with orthonormal columns,
+    which is defined as the first n columns of a product of k elementary
+    reflectors of order m
 
-/*              W := W * V1 */
+          Q  =  H(1) H(2) . . . H(k)
 
-		dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b15,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
+    as returned by CGEQRF.
 
-/*                 W := W + C2 * V2 */
+    Arguments
+    =========
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "No transpose", m, k, &i__1, &
-			    c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
-			    1 + v_dim1], ldv, &c_b15, &work[work_offset],
-			    ldwork);
-		}
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
 
-/*              W := W * T  or  W * T' */
+    N       (input) INTEGER
+            The number of columns of the matrix Q. M >= N >= 0.
 
-		dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. N >= K >= 0.
 
-/*              C := C - W * V' */
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the i-th column must contain the vector which
+            defines the elementary reflector H(i), for i = 1,2,...,k, as
+            returned by CGEQRF in the first k columns of its array
+            argument A.
+            On exit, the m by n matrix Q.
 
-		if (*n > *k) {
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
 
-/*                 C2 := C2 - W * V2' */
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEQRF.
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b151,
-			    &work[work_offset], ldwork, &v[*k + 1 + v_dim1],
-			    ldv, &c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc);
-		}
+    WORK    (workspace) COMPLEX array, dimension (N)
 
-/*              W := W * V1' */
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument has an illegal value
 
-		dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b15, &
-			v[v_offset], ldv, &work[work_offset], ldwork);
+    =====================================================================
 
-/*              C1 := C1 - W */
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
-/* L50: */
-		    }
-/* L60: */
-		}
-	    }
+       Test the input arguments
+*/
 
-	} else {
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
 
-/*
-             Let  V =  ( V1 )
-                       ( V2 )    (last K rows)
-             where  V2  is unit upper triangular.
-*/
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0 || *n > *m) {
+	*info = -2;
+    } else if (*k < 0 || *k > *n) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNG2R", &i__1);
+	return 0;
+    }
 
-	    if (lsame_(side, "L")) {
+/*     Quick return if possible */
 
-/*
-                Form  H * C  or  H' * C  where  C = ( C1 )
-                                                    ( C2 )
+    if (*n <= 0) {
+	return 0;
+    }
 
-                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+/*     Initialise columns k+1:n to columns of the unit matrix */
 
-                W := C2'
-*/
+    i__1 = *n;
+    for (j = *k + 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (l = 1; l <= i__2; ++l) {
+	    i__3 = l + j * a_dim1;
+	    a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L10: */
+	}
+	i__2 = j + j * a_dim1;
+	a[i__2].r = 1.f, a[i__2].i = 0.f;
+/* L20: */
+    }
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
-/* L70: */
-		}
+    for (i__ = *k; i__ >= 1; --i__) {
 
-/*              W := W * V2 */
+/*        Apply H(i) to A(i:m,i:n) from the left */
 
-		dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b15,
-			 &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-		if (*m > *k) {
+	if (i__ < *n) {
+	    i__1 = i__ + i__ * a_dim1;
+	    a[i__1].r = 1.f, a[i__1].i = 0.f;
+	    i__1 = *m - i__ + 1;
+	    i__2 = *n - i__;
+	    clarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	}
+	if (i__ < *m) {
+	    i__1 = *m - i__;
+	    i__2 = i__;
+	    q__1.r = -tau[i__2].r, q__1.i = -tau[i__2].i;
+	    cscal_(&i__1, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
+	}
+	i__1 = i__ + i__ * a_dim1;
+	i__2 = i__;
+	q__1.r = 1.f - tau[i__2].r, q__1.i = 0.f - tau[i__2].i;
+	a[i__1].r = q__1.r, a[i__1].i = q__1.i;
 
-/*                 W := W + C1'*V1 */
+/*        Set A(1:i-1,i) to zero */
 
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b15, &
-			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
-			    work[work_offset], ldwork);
-		}
+	i__1 = i__ - 1;
+	for (l = 1; l <= i__1; ++l) {
+	    i__2 = l + i__ * a_dim1;
+	    a[i__2].r = 0.f, a[i__2].i = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    return 0;
 
-/*              W := W * T'  or  W * T */
+/*     End of CUNG2R */
 
-		dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+} /* cung2r_ */
 
-/*              C := C - V * W' */
+/* Subroutine */ int cungbr_(char *vect, integer *m, integer *n, integer *k,
+	complex *a, integer *lda, complex *tau, complex *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
 
-		if (*m > *k) {
+    /* Local variables */
+    static integer i__, j, nb, mn;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    static logical wantq;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int cunglq_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *, integer *),
+	     cungqr_(integer *, integer *, integer *, complex *, integer *,
+	    complex *, complex *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
 
-/*                 C1 := C1 - V1 * W' */
 
-		    i__1 = *m - *k;
-		    dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b151,
-			    &v[v_offset], ldv, &work[work_offset], ldwork, &
-			    c_b15, &c__[c_offset], ldc)
-			    ;
-		}
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-/*              W := W * V2' */
 
-		dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b15, &
-			v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
+    Purpose
+    =======
 
-/*              C2 := C2 - W' */
+    CUNGBR generates one of the complex unitary matrices Q or P**H
+    determined by CGEBRD when reducing a complex matrix A to bidiagonal
+    form: A = Q * B * P**H.  Q and P**H are defined as products of
+    elementary reflectors H(i) or G(i) respectively.
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
-				work_dim1];
-/* L80: */
-		    }
-/* L90: */
-		}
+    If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
+    is of order M:
+    if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n
+    columns of Q, where m >= n >= k;
+    if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an
+    M-by-M matrix.
 
-	    } else if (lsame_(side, "R")) {
+    If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H
+    is of order N:
+    if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m
+    rows of P**H, where n >= m >= k;
+    if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as
+    an N-by-N matrix.
 
-/*
-                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+    Arguments
+    =========
 
-                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+    VECT    (input) CHARACTER*1
+            Specifies whether the matrix Q or the matrix P**H is
+            required, as defined in the transformation applied by CGEBRD:
+            = 'Q':  generate Q;
+            = 'P':  generate P**H.
 
-                W := C2
-*/
+    M       (input) INTEGER
+            The number of rows of the matrix Q or P**H to be returned.
+            M >= 0.
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
-/* L100: */
-		}
+    N       (input) INTEGER
+            The number of columns of the matrix Q or P**H to be returned.
+            N >= 0.
+            If VECT = 'Q', M >= N >= min(M,K);
+            if VECT = 'P', N >= M >= min(N,K).
 
-/*              W := W * V2 */
+    K       (input) INTEGER
+            If VECT = 'Q', the number of columns in the original M-by-K
+            matrix reduced by CGEBRD.
+            If VECT = 'P', the number of rows in the original K-by-N
+            matrix reduced by CGEBRD.
+            K >= 0.
 
-		dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b15,
-			 &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-		if (*n > *k) {
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the vectors which define the elementary reflectors,
+            as returned by CGEBRD.
+            On exit, the M-by-N matrix Q or P**H.
 
-/*                 W := W + C1 * V1 */
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= M.
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "No transpose", m, k, &i__1, &
-			    c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, &
-			    c_b15, &work[work_offset], ldwork);
-		}
+    TAU     (input) COMPLEX array, dimension
+                                  (min(M,K)) if VECT = 'Q'
+                                  (min(N,K)) if VECT = 'P'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i) or G(i), which determines Q or P**H, as
+            returned by CGEBRD in its array argument TAUQ or TAUP.
 
-/*              W := W * T  or  W * T' */
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
-		dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,min(M,N)).
+            For optimum performance LWORK >= min(M,N)*NB, where NB
+            is the optimal blocksize.
 
-/*              C := C - W * V' */
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
 
-		if (*n > *k) {
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
 
-/*                 C1 := C1 - W * V1' */
+    =====================================================================
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b151,
-			    &work[work_offset], ldwork, &v[v_offset], ldv, &
-			    c_b15, &c__[c_offset], ldc)
-			    ;
-		}
 
-/*              W := W * V2' */
+       Test the input arguments
+*/
 
-		dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b15, &
-			v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
 
-/*              C2 := C2 - W */
+    /* Function Body */
+    *info = 0;
+    wantq = lsame_(vect, "Q");
+    mn = min(*m,*n);
+    lquery = *lwork == -1;
+    if (! wantq && ! lsame_(vect, "P")) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && (
+	    *m > *n || *m < min(*n,*k))) {
+	*info = -3;
+    } else if (*k < 0) {
+	*info = -4;
+    } else if (*lda < max(1,*m)) {
+	*info = -6;
+    } else if (*lwork < max(1,mn) && ! lquery) {
+	*info = -9;
+    }
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
-				work_dim1];
-/* L110: */
-		    }
-/* L120: */
-		}
-	    }
+    if (*info == 0) {
+	if (wantq) {
+	    nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	} else {
+	    nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
 	}
+	lwkopt = max(1,mn) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
 
-    } else if (lsame_(storev, "R")) {
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNGBR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
 
-	if (lsame_(direct, "F")) {
+/*     Quick return if possible */
 
-/*
-             Let  V =  ( V1  V2 )    (V1: first K columns)
-             where  V1  is unit upper triangular.
-*/
+    if (*m == 0 || *n == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
 
-	    if (lsame_(side, "L")) {
+    if (wantq) {
 
 /*
-                Form  H * C  or  H' * C  where  C = ( C1 )
-                                                    ( C2 )
-
-                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
-
-                W := C1'
+          Form Q, determined by a call to CGEBRD to reduce an m-by-k
+          matrix
 */
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
-/* L130: */
-		}
-
-/*              W := W * V1' */
-
-		dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b15, &
-			v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
+	if (*m >= *k) {
 
-/*                 W := W + C2'*V2' */
+/*           If m >= k, assume m >= n >= k */
 
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b15, &
-			    c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 +
-			    1], ldv, &c_b15, &work[work_offset], ldwork);
-		}
+	    cungqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+		    iinfo);
 
-/*              W := W * T'  or  W * T */
+	} else {
 
-		dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+/*
+             If m < k, assume m = n
 
-/*              C := C - V' * W' */
+             Shift the vectors which define the elementary reflectors one
+             column to the right, and set the first row and column of Q
+             to those of the unit matrix
+*/
 
-		if (*m > *k) {
+	    for (j = *m; j >= 2; --j) {
+		i__1 = j * a_dim1 + 1;
+		a[i__1].r = 0.f, a[i__1].i = 0.f;
+		i__1 = *m;
+		for (i__ = j + 1; i__ <= i__1; ++i__) {
+		    i__2 = i__ + j * a_dim1;
+		    i__3 = i__ + (j - 1) * a_dim1;
+		    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
+/* L10: */
+		}
+/* L20: */
+	    }
+	    i__1 = a_dim1 + 1;
+	    a[i__1].r = 1.f, a[i__1].i = 0.f;
+	    i__1 = *m;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+		i__2 = i__ + a_dim1;
+		a[i__2].r = 0.f, a[i__2].i = 0.f;
+/* L30: */
+	    }
+	    if (*m > 1) {
 
-/*                 C2 := C2 - V2' * W' */
+/*              Form Q(2:m,2:m) */
 
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b151, &v[
-			    (*k + 1) * v_dim1 + 1], ldv, &work[work_offset],
-			    ldwork, &c_b15, &c__[*k + 1 + c_dim1], ldc);
-		}
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		cungqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[
+			1], &work[1], lwork, &iinfo);
+	    }
+	}
+    } else {
 
-/*              W := W * V1 */
+/*
+          Form P', determined by a call to CGEBRD to reduce a k-by-n
+          matrix
+*/
 
-		dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b15,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
+	if (*k < *n) {
 
-/*              C1 := C1 - W' */
+/*           If k < n, assume k <= m <= n */
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
-/* L140: */
-		    }
-/* L150: */
-		}
+	    cunglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+		    iinfo);
 
-	    } else if (lsame_(side, "R")) {
+	} else {
 
 /*
-                Form  C * H  or  C * H'  where  C = ( C1  C2 )
-
-                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+             If k >= n, assume m = n
 
-                W := C1
+             Shift the vectors which define the elementary reflectors one
+             row downward, and set the first row and column of P' to
+             those of the unit matrix
 */
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
-			    work_dim1 + 1], &c__1);
-/* L160: */
+	    i__1 = a_dim1 + 1;
+	    a[i__1].r = 1.f, a[i__1].i = 0.f;
+	    i__1 = *n;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+		i__2 = i__ + a_dim1;
+		a[i__2].r = 0.f, a[i__2].i = 0.f;
+/* L40: */
+	    }
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		for (i__ = j - 1; i__ >= 2; --i__) {
+		    i__2 = i__ + j * a_dim1;
+		    i__3 = i__ - 1 + j * a_dim1;
+		    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
+/* L50: */
 		}
+		i__2 = j * a_dim1 + 1;
+		a[i__2].r = 0.f, a[i__2].i = 0.f;
+/* L60: */
+	    }
+	    if (*n > 1) {
 
-/*              W := W * V1' */
+/*              Form P'(2:n,2:n) */
 
-		dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b15, &
-			v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		cunglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[
+			1], &work[1], lwork, &iinfo);
+	    }
+	}
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
 
-/*                 W := W + C2 * V2' */
+/*     End of CUNGBR */
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b15, &
-			    c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) *
-			    v_dim1 + 1], ldv, &c_b15, &work[work_offset],
-			    ldwork);
-		}
+} /* cungbr_ */
 
-/*              W := W * T  or  W * T' */
+/* Subroutine */ int cunghr_(integer *n, integer *ilo, integer *ihi, complex *
+	a, integer *lda, complex *tau, complex *work, integer *lwork, integer
+	*info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
-		dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+    /* Local variables */
+    static integer i__, j, nb, nh, iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int cungqr_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
 
-/*              C := C - W * V */
 
-		if (*n > *k) {
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
 
-/*                 C2 := C2 - W * V2 */
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "No transpose", m, &i__1, k, &
-			    c_b151, &work[work_offset], ldwork, &v[(*k + 1) *
-			    v_dim1 + 1], ldv, &c_b15, &c__[(*k + 1) * c_dim1
-			    + 1], ldc);
-		}
+    Purpose
+    =======
 
-/*              W := W * V1 */
+    CUNGHR generates a complex unitary matrix Q which is defined as the
+    product of IHI-ILO elementary reflectors of order N, as returned by
+    CGEHRD:
 
-		dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b15,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
+    Q = H(ilo) H(ilo+1) . . . H(ihi-1).
 
-/*              C1 := C1 - W */
+    Arguments
+    =========
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
-/* L170: */
-		    }
-/* L180: */
-		}
+    N       (input) INTEGER
+            The order of the matrix Q. N >= 0.
 
-	    }
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            ILO and IHI must have the same values as in the previous call
+            of CGEHRD. Q is equal to the unit matrix except in the
+            submatrix Q(ilo+1:ihi,ilo+1:ihi).
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
 
-	} else {
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the vectors which define the elementary reflectors,
+            as returned by CGEHRD.
+            On exit, the N-by-N unitary matrix Q.
 
-/*
-             Let  V =  ( V1  V2 )    (V2: last K columns)
-             where  V2  is unit lower triangular.
-*/
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,N).
 
-	    if (lsame_(side, "L")) {
+    TAU     (input) COMPLEX array, dimension (N-1)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEHRD.
 
-/*
-                Form  H * C  or  H' * C  where  C = ( C1 )
-                                                    ( C2 )
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
-                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= IHI-ILO.
+            For optimum performance LWORK >= (IHI-ILO)*NB, where NB is
+            the optimal blocksize.
 
-                W := C2'
-*/
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
-/* L190: */
-		}
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
 
-/*              W := W * V2' */
+    =====================================================================
 
-		dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b15, &
-			v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[work_offset]
-			, ldwork);
-		if (*m > *k) {
 
-/*                 W := W + C1'*V1' */
+       Test the input arguments
+*/
 
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b15, &
-			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
-			    work[work_offset], ldwork);
-		}
-
-/*              W := W * T'  or  W * T */
-
-		dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - V' * W' */
-
-		if (*m > *k) {
-
-/*                 C1 := C1 - V1' * W' */
-
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b151, &v[
-			    v_offset], ldv, &work[work_offset], ldwork, &
-			    c_b15, &c__[c_offset], ldc);
-		}
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
 
-/*              W := W * V2 */
+    /* Function Body */
+    *info = 0;
+    nh = *ihi - *ilo;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+	*info = -2;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,nh) && ! lquery) {
+	*info = -8;
+    }
 
-		dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b15,
-			 &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
+    if (*info == 0) {
+	nb = ilaenv_(&c__1, "CUNGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+	lwkopt = max(1,nh) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
 
-/*              C2 := C2 - W' */
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNGHR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
-				work_dim1];
-/* L200: */
-		    }
-/* L210: */
-		}
+/*     Quick return if possible */
 
-	    } else if (lsame_(side, "R")) {
+    if (*n == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
 
 /*
-                Form  C * H  or  C * H'  where  C = ( C1  C2 )
-
-                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
-
-                W := C2
+       Shift the vectors which define the elementary reflectors one
+       column to the right, and set the first ilo and the last n-ihi
+       rows and columns to those of the unit matrix
 */
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
-/* L220: */
-		}
-
-/*              W := W * V2' */
-
-		dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b15, &
-			v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[work_offset]
-			, ldwork);
-		if (*n > *k) {
-
-/*                 W := W + C1 * V1' */
-
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b15, &
-			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
-			    work[work_offset], ldwork);
-		}
-
-/*              W := W * T  or  W * T' */
-
-		dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
-
-/*              C := C - W * V */
-
-		if (*n > *k) {
-
-/*                 C1 := C1 - W * V1 */
-
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "No transpose", m, &i__1, k, &
-			    c_b151, &work[work_offset], ldwork, &v[v_offset],
-			    ldv, &c_b15, &c__[c_offset], ldc);
-		}
-
-/*              W := W * V2 */
-
-		dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b15,
-			 &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-
-/*              C1 := C1 - W */
+    i__1 = *ilo + 1;
+    for (j = *ihi; j >= i__1; --j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L10: */
+	}
+	i__2 = *ihi;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    i__4 = i__ + (j - 1) * a_dim1;
+	    a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i;
+/* L20: */
+	}
+	i__2 = *n;
+	for (i__ = *ihi + 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    i__1 = *ilo;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L50: */
+	}
+	i__2 = j + j * a_dim1;
+	a[i__2].r = 1.f, a[i__2].i = 0.f;
+/* L60: */
+    }
+    i__1 = *n;
+    for (j = *ihi + 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L70: */
+	}
+	i__2 = j + j * a_dim1;
+	a[i__2].r = 1.f, a[i__2].i = 0.f;
+/* L80: */
+    }
 
-		i__1 = *k;
-		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
-				work_dim1];
-/* L230: */
-		    }
-/* L240: */
-		}
+    if (nh > 0) {
 
-	    }
+/*        Generate Q(ilo+1:ihi,ilo+1:ihi) */
 
-	}
+	cungqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*
+		ilo], &work[1], lwork, &iinfo);
     }
-
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
     return 0;
 
-/*     End of DLARFB */
+/*     End of CUNGHR */
 
-} /* dlarfb_ */
+} /* cunghr_ */
 
-/* Subroutine */ int dlarfg_(integer *n, doublereal *alpha, doublereal *x,
-	integer *incx, doublereal *tau)
+/* Subroutine */ int cungl2_(integer *m, integer *n, integer *k, complex *a,
+	integer *lda, complex *tau, complex *work, integer *info)
 {
     /* System generated locals */
-    integer i__1;
-    doublereal d__1;
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    complex q__1, q__2;
 
     /* Builtin functions */
-    double d_sign(doublereal *, doublereal *);
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
-    static integer j, knt;
-    static doublereal beta;
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    static doublereal xnorm;
-
-    static doublereal safmin, rsafmn;
+    static integer i__, j, l;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *), clarf_(char *, integer *, integer *, complex *,
+	    integer *, complex *, complex *, integer *, complex *),
+	    clacgv_(integer *, complex *, integer *), xerbla_(char *, integer
+	    *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+       June 30, 1999
 
 
     Purpose
     =======
 
-    DLARFG generates a real elementary reflector H of order n, such
-    that
-
-          H * ( alpha ) = ( beta ),   H' * H = I.
-              (   x   )   (   0  )
-
-    where alpha and beta are scalars, and x is an (n-1)-element real
-    vector. H is represented in the form
-
-          H = I - tau * ( 1 ) * ( 1 v' ) ,
-                        ( v )
-
-    where tau is a real scalar and v is a real (n-1)-element
-    vector.
+    CUNGL2 generates an m-by-n complex matrix Q with orthonormal rows,
+    which is defined as the first m rows of a product of k elementary
+    reflectors of order n
 
-    If the elements of x are all zero, then tau = 0 and H is taken to be
-    the unit matrix.
+          Q  =  H(k)' . . . H(2)' H(1)'
 
-    Otherwise  1 <= tau <= 2.
+    as returned by CGELQF.
 
     Arguments
     =========
 
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
     N       (input) INTEGER
-            The order of the elementary reflector.
+            The number of columns of the matrix Q. N >= M.
 
-    ALPHA   (input/output) DOUBLE PRECISION
-            On entry, the value alpha.
-            On exit, it is overwritten with the value beta.
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. M >= K >= 0.
 
-    X       (input/output) DOUBLE PRECISION array, dimension
-                           (1+(N-2)*abs(INCX))
-            On entry, the vector x.
-            On exit, it is overwritten with the vector v.
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the i-th row must contain the vector which defines
+            the elementary reflector H(i), for i = 1,2,...,k, as returned
+            by CGELQF in the first k rows of its array argument A.
+            On exit, the m by n matrix Q.
 
-    INCX    (input) INTEGER
-            The increment between elements of X. INCX > 0.
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
 
-    TAU     (output) DOUBLE PRECISION
-            The value tau.
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGELQF.
+
+    WORK    (workspace) COMPLEX array, dimension (M)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument has an illegal value
 
     =====================================================================
-*/
 
 
+       Test the input arguments
+*/
+
     /* Parameter adjustments */
-    --x;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
 
     /* Function Body */
-    if (*n <= 1) {
-	*tau = 0.;
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if (*k < 0 || *k > *m) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNGL2", &i__1);
 	return 0;
     }
 
-    i__1 = *n - 1;
-    xnorm = dnrm2_(&i__1, &x[1], incx);
+/*     Quick return if possible */
 
-    if (xnorm == 0.) {
+    if (*m <= 0) {
+	return 0;
+    }
 
-/*        H  =  I */
+    if (*k < *m) {
 
-	*tau = 0.;
-    } else {
+/*        Initialise rows k+1:m to rows of the unit matrix */
 
-/*        general case */
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (l = *k + 1; l <= i__2; ++l) {
+		i__3 = l + j * a_dim1;
+		a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L10: */
+	    }
+	    if (j > *k && j <= *m) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+	    }
+/* L20: */
+	}
+    }
 
-	d__1 = dlapy2_(alpha, &xnorm);
-	beta = -d_sign(&d__1, alpha);
-	safmin = SAFEMINIMUM / EPSILON;
-	if (abs(beta) < safmin) {
+    for (i__ = *k; i__ >= 1; --i__) {
 
-/*           XNORM, BETA may be inaccurate; scale X and recompute them */
+/*        Apply H(i)' to A(i:m,i:n) from the right */
 
-	    rsafmn = 1. / safmin;
-	    knt = 0;
-L10:
-	    ++knt;
-	    i__1 = *n - 1;
-	    dscal_(&i__1, &rsafmn, &x[1], incx);
-	    beta *= rsafmn;
-	    *alpha *= rsafmn;
-	    if (abs(beta) < safmin) {
-		goto L10;
+	if (i__ < *n) {
+	    i__1 = *n - i__;
+	    clacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    if (i__ < *m) {
+		i__1 = i__ + i__ * a_dim1;
+		a[i__1].r = 1.f, a[i__1].i = 0.f;
+		i__1 = *m - i__;
+		i__2 = *n - i__ + 1;
+		r_cnjg(&q__1, &tau[i__]);
+		clarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &
+			q__1, &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
 	    }
+	    i__1 = *n - i__;
+	    i__2 = i__;
+	    q__1.r = -tau[i__2].r, q__1.i = -tau[i__2].i;
+	    cscal_(&i__1, &q__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    i__1 = *n - i__;
+	    clacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	}
+	i__1 = i__ + i__ * a_dim1;
+	r_cnjg(&q__2, &tau[i__]);
+	q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i;
+	a[i__1].r = q__1.r, a[i__1].i = q__1.i;
 
-/*           New BETA is at most 1, at least SAFMIN */
-
-	    i__1 = *n - 1;
-	    xnorm = dnrm2_(&i__1, &x[1], incx);
-	    d__1 = dlapy2_(alpha, &xnorm);
-	    beta = -d_sign(&d__1, alpha);
-	    *tau = (beta - *alpha) / beta;
-	    i__1 = *n - 1;
-	    d__1 = 1. / (*alpha - beta);
-	    dscal_(&i__1, &d__1, &x[1], incx);
-
-/*           If ALPHA is subnormal, it may lose relative accuracy */
+/*        Set A(i,1:i-1,i) to zero */
 
-	    *alpha = beta;
-	    i__1 = knt;
-	    for (j = 1; j <= i__1; ++j) {
-		*alpha *= safmin;
-/* L20: */
-	    }
-	} else {
-	    *tau = (beta - *alpha) / beta;
-	    i__1 = *n - 1;
-	    d__1 = 1. / (*alpha - beta);
-	    dscal_(&i__1, &d__1, &x[1], incx);
-	    *alpha = beta;
+	i__1 = i__ - 1;
+	for (l = 1; l <= i__1; ++l) {
+	    i__2 = i__ + l * a_dim1;
+	    a[i__2].r = 0.f, a[i__2].i = 0.f;
+/* L30: */
 	}
+/* L40: */
     }
-
     return 0;
 
-/*     End of DLARFG */
+/*     End of CUNGL2 */
 
-} /* dlarfg_ */
+} /* cungl2_ */
 
-/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer *
-	k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t,
-	integer *ldt)
+/* Subroutine */ int cunglq_(integer *m, integer *n, integer *k, complex *a,
+	integer *lda, complex *tau, complex *work, integer *lwork, integer *
+	info)
 {
     /* System generated locals */
-    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
-    doublereal d__1;
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
-    static integer i__, j;
-    static doublereal vii;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *), dtrmv_(char *,
-	    char *, char *, integer *, doublereal *, integer *, doublereal *,
+    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int cungl2_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *), clarfb_(
+	    char *, char *, char *, char *, integer *, integer *, integer *,
+	    complex *, integer *, complex *, integer *, complex *, integer *,
+	    complex *, integer *), clarft_(
+	    char *, char *, integer *, integer *, complex *, integer *,
+	    complex *, complex *, integer *), xerbla_(char *,
 	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+       June 30, 1999
 
 
     Purpose
     =======
 
-    DLARFT forms the triangular factor T of a real block reflector H
-    of order n, which is defined as a product of k elementary reflectors.
-
-    If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
-
-    If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
-
-    If STOREV = 'C', the vector which defines the elementary reflector
-    H(i) is stored in the i-th column of the array V, and
-
-       H  =  I - V * T * V'
+    CUNGLQ generates an M-by-N complex matrix Q with orthonormal rows,
+    which is defined as the first M rows of a product of K elementary
+    reflectors of order N
 
-    If STOREV = 'R', the vector which defines the elementary reflector
-    H(i) is stored in the i-th row of the array V, and
+          Q  =  H(k)' . . . H(2)' H(1)'
 
-       H  =  I - V' * T * V
+    as returned by CGELQF.
 
     Arguments
     =========
 
-    DIRECT  (input) CHARACTER*1
-            Specifies the order in which the elementary reflectors are
-            multiplied to form the block reflector:
-            = 'F': H = H(1) H(2) . . . H(k) (Forward)
-            = 'B': H = H(k) . . . H(2) H(1) (Backward)
-
-    STOREV  (input) CHARACTER*1
-            Specifies how the vectors which define the elementary
-            reflectors are stored (see also Further Details):
-            = 'C': columnwise
-            = 'R': rowwise
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
 
     N       (input) INTEGER
-            The order of the block reflector H. N >= 0.
+            The number of columns of the matrix Q. N >= M.
 
     K       (input) INTEGER
-            The order of the triangular factor T (= the number of
-            elementary reflectors). K >= 1.
+            The number of elementary reflectors whose product defines the
+            matrix Q. M >= K >= 0.
 
-    V       (input/output) DOUBLE PRECISION array, dimension
-                                 (LDV,K) if STOREV = 'C'
-                                 (LDV,N) if STOREV = 'R'
-            The matrix V. See further details.
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the i-th row must contain the vector which defines
+            the elementary reflector H(i), for i = 1,2,...,k, as returned
+            by CGELQF in the first k rows of its array argument A.
+            On exit, the M-by-N matrix Q.
 
-    LDV     (input) INTEGER
-            The leading dimension of the array V.
-            If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
+    TAU     (input) COMPLEX array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i).
-
-    T       (output) DOUBLE PRECISION array, dimension (LDT,K)
-            The k by k triangular factor T of the block reflector.
-            If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
-            lower triangular. The rest of the array is not used.
-
-    LDT     (input) INTEGER
-            The leading dimension of the array T. LDT >= K.
-
-    Further Details
-    ===============
+            reflector H(i), as returned by CGELQF.
 
-    The shape of the matrix V and the storage of the vectors which define
-    the H(i) is best illustrated by the following example with n = 5 and
-    k = 3. The elements equal to 1 are not stored; the corresponding
-    array elements are modified but restored on exit. The rest of the
-    array is not used.
-
-    DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
-                 V = (  1       )                 V = (  1 v1 v1 v1 v1 )
-                     ( v1  1    )                     (     1 v2 v2 v2 )
-                     ( v1 v2  1 )                     (        1 v3 v3 )
-                     ( v1 v2 v3 )
-                     ( v1 v2 v3 )
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is
+            the optimal blocksize.
 
-    DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
 
-                 V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
-                     ( v1 v2 v3 )                     ( v2 v2 v2  1    )
-                     (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
-                     (     1 v3 )
-                     (        1 )
+    INFO    (output) INTEGER
+            = 0:  successful exit;
+            < 0:  if INFO = -i, the i-th argument has an illegal value
 
     =====================================================================
 
 
-       Quick return if possible
+       Test the input arguments
 */
 
     /* Parameter adjustments */
-    v_dim1 = *ldv;
-    v_offset = 1 + v_dim1;
-    v -= v_offset;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
     --tau;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
+    --work;
 
     /* Function Body */
-    if (*n == 0) {
+    *info = 0;
+    nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = max(1,*m) * nb;
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if (*k < 0 || *k > *m) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNGLQ", &i__1);
+	return 0;
+    } else if (lquery) {
 	return 0;
     }
 
-    if (lsame_(direct, "F")) {
-	i__1 = *k;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (tau[i__] == 0.) {
+/*     Quick return if possible */
 
-/*              H(i)  =  I */
+    if (*m <= 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
 
-		i__2 = i__;
-		for (j = 1; j <= i__2; ++j) {
-		    t[j + i__ * t_dim1] = 0.;
-/* L10: */
-		}
-	    } else {
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < *k) {
 
-/*              general case */
+/*
+          Determine when to cross over from blocked to unblocked code.
 
-		vii = v[i__ + i__ * v_dim1];
-		v[i__ + i__ * v_dim1] = 1.;
-		if (lsame_(storev, "C")) {
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGLQ", " ", m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *k) {
 
-/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
+/*           Determine if workspace is large enough for blocked code. */
 
-		    i__2 = *n - i__ + 1;
-		    i__3 = i__ - 1;
-		    d__1 = -tau[i__];
-		    dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1],
-			     ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b29, &t[
-			    i__ * t_dim1 + 1], &c__1);
-		} else {
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
 
-/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
 
-		    i__2 = i__ - 1;
-		    i__3 = *n - i__ + 1;
-		    d__1 = -tau[i__];
-		    dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ *
-			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
-			    c_b29, &t[i__ * t_dim1 + 1], &c__1);
-		}
-		v[i__ + i__ * v_dim1] = vii;
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGLQ", " ", m, n, k, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
 
-/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+    if (nb >= nbmin && nb < *k && nx < *k) {
 
-		i__2 = i__ - 1;
-		dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
-			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
-		t[i__ + i__ * t_dim1] = tau[i__];
+/*
+          Use blocked code after the last block.
+          The first kk rows are handled by the block method.
+*/
+
+	ki = (*k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = *k, i__2 = ki + nb;
+	kk = min(i__1,i__2);
+
+/*        Set A(kk+1:m,1:kk) to zero. */
+
+	i__1 = kk;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = kk + 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L10: */
 	    }
 /* L20: */
 	}
     } else {
-	for (i__ = *k; i__ >= 1; --i__) {
-	    if (tau[i__] == 0.) {
+	kk = 0;
+    }
 
-/*              H(i)  =  I */
+/*     Use unblocked code for the last or only block. */
 
-		i__1 = *k;
-		for (j = i__; j <= i__1; ++j) {
-		    t[j + i__ * t_dim1] = 0.;
-/* L30: */
-		}
-	    } else {
+    if (kk < *m) {
+	i__1 = *m - kk;
+	i__2 = *n - kk;
+	i__3 = *k - kk;
+	cungl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+		tau[kk + 1], &work[1], &iinfo);
+    }
 
-/*              general case */
+    if (kk > 0) {
 
-		if (i__ < *k) {
-		    if (lsame_(storev, "C")) {
-			vii = v[*n - *k + i__ + i__ * v_dim1];
-			v[*n - *k + i__ + i__ * v_dim1] = 1.;
+/*        Use blocked code */
+
+	i__1 = -nb;
+	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *k - i__ + 1;
+	    ib = min(i__2,i__3);
+	    if (i__ + ib <= *m) {
 
 /*
-                      T(i+1:k,i) :=
-                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
 */
 
-			i__1 = *n - *k + i__;
-			i__2 = *k - i__;
-			d__1 = -tau[i__];
-			dgemv_("Transpose", &i__1, &i__2, &d__1, &v[(i__ + 1)
-				* v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], &
-				c__1, &c_b29, &t[i__ + 1 + i__ * t_dim1], &
-				c__1);
-			v[*n - *k + i__ + i__ * v_dim1] = vii;
-		    } else {
-			vii = v[i__ + (*n - *k + i__) * v_dim1];
-			v[i__ + (*n - *k + i__) * v_dim1] = 1.;
+		i__2 = *n - i__ + 1;
+		clarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 
-/*
-                      T(i+1:k,i) :=
-                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
-*/
+/*              Apply H' to A(i+ib:m,i:n) from the right */
 
-			i__1 = *k - i__;
-			i__2 = *n - *k + i__;
-			d__1 = -tau[i__];
-			dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ +
-				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
-				c_b29, &t[i__ + 1 + i__ * t_dim1], &c__1);
-			v[i__ + (*n - *k + i__) * v_dim1] = vii;
-		    }
+		i__2 = *m - i__ - ib + 1;
+		i__3 = *n - i__ + 1;
+		clarfb_("Right", "Conjugate transpose", "Forward", "Rowwise",
+			&i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
+			1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[
+			ib + 1], &ldwork);
+	    }
 
-/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+/*           Apply H' to columns i:n of current block */
 
-		    i__1 = *k - i__;
-		    dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
-			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
-			     t_dim1], &c__1)
-			    ;
+	    i__2 = *n - i__ + 1;
+	    cungl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+
+/*           Set columns 1:i-1 of current block to zero */
+
+	    i__2 = i__ - 1;
+	    for (j = 1; j <= i__2; ++j) {
+		i__3 = i__ + ib - 1;
+		for (l = i__; l <= i__3; ++l) {
+		    i__4 = l + j * a_dim1;
+		    a[i__4].r = 0.f, a[i__4].i = 0.f;
+/* L30: */
 		}
-		t[i__ + i__ * t_dim1] = tau[i__];
-	    }
 /* L40: */
+	    }
+/* L50: */
 	}
     }
+
+    work[1].r = (real) iws, work[1].i = 0.f;
     return 0;
 
-/*     End of DLARFT */
+/*     End of CUNGLQ */
 
-} /* dlarft_ */
+} /* cunglq_ */
 
-/* Subroutine */ int dlarfx_(char *side, integer *m, integer *n, doublereal *
-	v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work)
+/* Subroutine */ int cungqr_(integer *m, integer *n, integer *k, complex *a,
+	integer *lda, complex *tau, complex *work, integer *lwork, integer *
+	info)
 {
     /* System generated locals */
-    integer c_dim1, c_offset, i__1;
-    doublereal d__1;
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
-    static integer j;
-    static doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5,
-	    v6, v7, v8, v9, t10, v10, sum;
-    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int cung2r_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *), clarfb_(
+	    char *, char *, char *, char *, integer *, integer *, integer *,
+	    complex *, integer *, complex *, integer *, complex *, integer *,
+	    complex *, integer *), clarft_(
+	    char *, char *, integer *, integer *, complex *, integer *,
+	    complex *, complex *, integer *), xerbla_(char *,
 	    integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
+    -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+       June 30, 1999
 
 
     Purpose
     =======
 
-    DLARFX applies a real elementary reflector H to a real m by n
-    matrix C, from either the left or the right. H is represented in the
-    form
-
-          H = I - tau * v * v'
-
-    where tau is a real scalar and v is a real vector.
+    CUNGQR generates an M-by-N complex matrix Q with orthonormal columns,
+    which is defined as the first N columns of a product of K elementary
+    reflectors of order M
 
-    If tau = 0, then H is taken to be the unit matrix
+          Q  =  H(1) H(2) . . . H(k)
 
-    This version uses inline code if H has order < 11.
+    as returned by CGEQRF.
 
     Arguments
     =========
 
-    SIDE    (input) CHARACTER*1
-            = 'L': form  H * C
-            = 'R': form  C * H
-
     M       (input) INTEGER
-            The number of rows of the matrix C.
+            The number of rows of the matrix Q. M >= 0.
 
     N       (input) INTEGER
-            The number of columns of the matrix C.
+            The number of columns of the matrix Q. M >= N >= 0.
 
-    V       (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L'
-                                       or (N) if SIDE = 'R'
-            The vector v in the representation of H.
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. N >= K >= 0.
 
-    TAU     (input) DOUBLE PRECISION
-            The value tau in the representation of H.
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the i-th column must contain the vector which
+            defines the elementary reflector H(i), for i = 1,2,...,k, as
+            returned by CGEQRF in the first k columns of its array
+            argument A.
+            On exit, the M-by-N matrix Q.
 
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
-            On entry, the m by n matrix C.
-            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
-            or C * H if SIDE = 'R'.
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
 
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDA >= (1,M).
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEQRF.
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension
-                        (N) if SIDE = 'L'
-                        or (M) if SIDE = 'R'
-            WORK is not referenced if H has order < 11.
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument has an illegal value
 
     =====================================================================
-*/
 
 
+       Test the input arguments
+*/
+
     /* Parameter adjustments */
-    --v;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
     --work;
 
     /* Function Body */
-    if (*tau == 0.) {
+    *info = 0;
+    nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = max(1,*n) * nb;
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0 || *n > *m) {
+	*info = -2;
+    } else if (*k < 0 || *k > *n) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNGQR", &i__1);
+	return 0;
+    } else if (lquery) {
 	return 0;
     }
-    if (lsame_(side, "L")) {
 
-/*        Form  H * C, where H has order m. */
+/*     Quick return if possible */
 
-	switch (*m) {
-	    case 1:  goto L10;
-	    case 2:  goto L30;
-	    case 3:  goto L50;
-	    case 4:  goto L70;
-	    case 5:  goto L90;
-	    case 6:  goto L110;
-	    case 7:  goto L130;
-	    case 8:  goto L150;
-	    case 9:  goto L170;
-	    case 10:  goto L190;
-	}
+    if (*n <= 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < *k) {
 
 /*
-          Code for general M
+          Determine when to cross over from blocked to unblocked code.
 
-          w := C'*v
+   Computing MAX
 */
+	i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGQR", " ", m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *k) {
 
-	dgemv_("Transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], &c__1, &
-		c_b29, &work[1], &c__1);
+/*           Determine if workspace is large enough for blocked code. */
 
-/*        C := C - tau * v * w' */
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
 
-	d__1 = -(*tau);
-	dger_(m, n, &d__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc)
-		;
-	goto L410;
-L10:
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
 
-/*        Special code for 1 x 1 Householder */
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGQR", " ", m, n, k, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < *k && nx < *k) {
+
+/*
+          Use blocked code after the last block.
+          The first kk columns are handled by the block method.
+*/
+
+	ki = (*k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = *k, i__2 = ki + nb;
+	kk = min(i__1,i__2);
+
+/*        Set A(1:kk,kk+1:n) to zero. */
 
-	t1 = 1. - *tau * v[1] * v[1];
 	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1];
+	for (j = kk + 1; j <= i__1; ++j) {
+	    i__2 = kk;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L10: */
+	    }
 /* L20: */
 	}
-	goto L410;
-L30:
+    } else {
+	kk = 0;
+    }
 
-/*        Special code for 2 x 2 Householder */
+/*     Use unblocked code for the last or only block. */
 
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-/* L40: */
-	}
-	goto L410;
-L50:
+    if (kk < *n) {
+	i__1 = *m - kk;
+	i__2 = *n - kk;
+	i__3 = *k - kk;
+	cung2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+		tau[kk + 1], &work[1], &iinfo);
+    }
 
-/*        Special code for 3 x 3 Householder */
+    if (kk > 0) {
 
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-/* L60: */
-	}
-	goto L410;
-L70:
+/*        Use blocked code */
 
-/*        Special code for 4 x 4 Householder */
+	i__1 = -nb;
+	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *k - i__ + 1;
+	    ib = min(i__2,i__3);
+	    if (i__ + ib <= *n) {
 
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-	    c__[j * c_dim1 + 4] -= sum * t4;
-/* L80: */
-	}
-	goto L410;
-L90:
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
 
-/*        Special code for 5 x 5 Householder */
+		i__2 = *m - i__ + 1;
+		clarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
-		    j * c_dim1 + 5];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-	    c__[j * c_dim1 + 4] -= sum * t4;
-	    c__[j * c_dim1 + 5] -= sum * t5;
-/* L100: */
-	}
-	goto L410;
-L110:
+/*              Apply H to A(i:m,i+ib:n) from the left */
 
-/*        Special code for 6 x 6 Householder */
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__ - ib + 1;
+		clarfb_("Left", "No transpose", "Forward", "Columnwise", &
+			i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
+			1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &
+			work[ib + 1], &ldwork);
+	    }
 
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	v6 = v[6];
-	t6 = *tau * v6;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
-		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-	    c__[j * c_dim1 + 4] -= sum * t4;
-	    c__[j * c_dim1 + 5] -= sum * t5;
-	    c__[j * c_dim1 + 6] -= sum * t6;
-/* L120: */
-	}
-	goto L410;
-L130:
+/*           Apply H to rows i:m of current block */
 
-/*        Special code for 7 x 7 Householder */
+	    i__2 = *m - i__ + 1;
+	    cung2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
 
-	v1 = v[1];
-	t1 = *tau * v1;
-	v2 = v[2];
-	t2 = *tau * v2;
-	v3 = v[3];
-	t3 = *tau * v3;
-	v4 = v[4];
-	t4 = *tau * v4;
-	v5 = v[5];
-	t5 = *tau * v5;
-	v6 = v[6];
-	t6 = *tau * v6;
-	v7 = v[7];
-	t7 = *tau * v7;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
-		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
-		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
-		    c_dim1 + 7];
-	    c__[j * c_dim1 + 1] -= sum * t1;
-	    c__[j * c_dim1 + 2] -= sum * t2;
-	    c__[j * c_dim1 + 3] -= sum * t3;
-	    c__[j * c_dim1 + 4] -= sum * t4;
-	    c__[j * c_dim1 + 5] -= sum * t5;
-	    c__[j * c_dim1 + 6] -= sum * t6;
-	    c__[j * c_dim1 + 7] -= sum * t7;
-/* L140: */
+/*           Set rows 1:i-1 of current block to zero */
+
+	    i__2 = i__ + ib - 1;
+	    for (j = i__; j <= i__2; ++j) {
+		i__3 = i__ - 1;
+		for (l = 1; l <= i__3; ++l) {
+		    i__4 = l + j * a_dim1;
+		    a[i__4].r = 0.f, a[i__4].i = 0.f;
+/* L30: */
+		}
+/* L40: */
+	    }
+/* L50: */
 	}
-	goto L410;
-L150:
+    }
 
-/*        Special code for 8 x 8 Householder */
+    work[1].r = (real) iws, work[1].i = 0.f;
+    return 0;
 
-	v1 = v[1];
-	t1 = *tau * v1;
+/*     End of CUNGQR */
+
+} /* cungqr_ */
+
+/* Subroutine */ int cunm2l_(char *side, char *trans, integer *m, integer *n,
+	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+    complex q__1;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, mi, ni, nq;
+    static complex aii;
+    static logical left;
+    static complex taui;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CUNM2L overwrites the general complex m-by-n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'C', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'C',
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by CGEQLF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'C': apply Q' (Conjugate transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            CGEQLF in the last k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEQLF.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the m-by-n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > nq) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNM2L", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	return 0;
+    }
+
+    if (left && notran || ! left && ! notran) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+    } else {
+	mi = *m;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) or H(i)' is applied to C(1:m-k+i,1:n) */
+
+	    mi = *m - *k + i__;
+	} else {
+
+/*           H(i) or H(i)' is applied to C(1:m,1:n-k+i) */
+
+	    ni = *n - *k + i__;
+	}
+
+/*        Apply H(i) or H(i)' */
+
+	if (notran) {
+	    i__3 = i__;
+	    taui.r = tau[i__3].r, taui.i = tau[i__3].i;
+	} else {
+	    r_cnjg(&q__1, &tau[i__]);
+	    taui.r = q__1.r, taui.i = q__1.i;
+	}
+	i__3 = nq - *k + i__ + i__ * a_dim1;
+	aii.r = a[i__3].r, aii.i = a[i__3].i;
+	i__3 = nq - *k + i__ + i__ * a_dim1;
+	a[i__3].r = 1.f, a[i__3].i = 0.f;
+	clarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &taui, &c__[
+		c_offset], ldc, &work[1]);
+	i__3 = nq - *k + i__ + i__ * a_dim1;
+	a[i__3].r = aii.r, a[i__3].i = aii.i;
+/* L10: */
+    }
+    return 0;
+
+/*     End of CUNM2L */
+
+} /* cunm2l_ */
+
+/* Subroutine */ int cunm2r_(char *side, char *trans, integer *m, integer *n,
+	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+    complex q__1;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
+    static complex aii;
+    static logical left;
+    static complex taui;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CUNM2R overwrites the general complex m-by-n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'C', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'C',
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(1) H(2) . . . H(k)
+
+    as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'C': apply Q' (Conjugate transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            CGEQRF in the first k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEQRF.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the m-by-n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > nq) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNM2R", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	return 0;
+    }
+
+    if (left && ! notran || ! left && notran) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+	jc = 1;
+    } else {
+	mi = *m;
+	ic = 1;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) or H(i)' is applied to C(i:m,1:n) */
+
+	    mi = *m - i__ + 1;
+	    ic = i__;
+	} else {
+
+/*           H(i) or H(i)' is applied to C(1:m,i:n) */
+
+	    ni = *n - i__ + 1;
+	    jc = i__;
+	}
+
+/*        Apply H(i) or H(i)' */
+
+	if (notran) {
+	    i__3 = i__;
+	    taui.r = tau[i__3].r, taui.i = tau[i__3].i;
+	} else {
+	    r_cnjg(&q__1, &tau[i__]);
+	    taui.r = q__1.r, taui.i = q__1.i;
+	}
+	i__3 = i__ + i__ * a_dim1;
+	aii.r = a[i__3].r, aii.i = a[i__3].i;
+	i__3 = i__ + i__ * a_dim1;
+	a[i__3].r = 1.f, a[i__3].i = 0.f;
+	clarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &taui, &c__[ic
+		+ jc * c_dim1], ldc, &work[1]);
+	i__3 = i__ + i__ * a_dim1;
+	a[i__3].r = aii.r, a[i__3].i = aii.i;
+/* L10: */
+    }
+    return 0;
+
+/*     End of CUNM2R */
+
+} /* cunm2r_ */
+
+/* Subroutine */ int cunmbr_(char *vect, char *side, char *trans, integer *m,
+	integer *n, integer *k, complex *a, integer *lda, complex *tau,
+	complex *c__, integer *ldc, complex *work, integer *lwork, integer *
+	info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2];
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int cunmlq_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *);
+    static logical notran;
+    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *);
+    static logical applyq;
+    static char transt[1];
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    If VECT = 'Q', CUNMBR overwrites the general complex M-by-N matrix C
+    with
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C
+    with
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      P * C          C * P
+    TRANS = 'C':      P**H * C       C * P**H
+
+    Here Q and P**H are the unitary matrices determined by CGEBRD when
+    reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q
+    and P**H are defined as products of elementary reflectors H(i) and
+    G(i) respectively.
+
+    Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
+    order of the unitary matrix Q or P**H that is applied.
+
+    If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
+    if nq >= k, Q = H(1) H(2) . . . H(k);
+    if nq < k, Q = H(1) H(2) . . . H(nq-1).
+
+    If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
+    if k < nq, P = G(1) G(2) . . . G(k);
+    if k >= nq, P = G(1) G(2) . . . G(nq-1).
+
+    Arguments
+    =========
+
+    VECT    (input) CHARACTER*1
+            = 'Q': apply Q or Q**H;
+            = 'P': apply P or P**H.
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q, Q**H, P or P**H from the Left;
+            = 'R': apply Q, Q**H, P or P**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q or P;
+            = 'C':  Conjugate transpose, apply Q**H or P**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            If VECT = 'Q', the number of columns in the original
+            matrix reduced by CGEBRD.
+            If VECT = 'P', the number of rows in the original
+            matrix reduced by CGEBRD.
+            K >= 0.
+
+    A       (input) COMPLEX array, dimension
+                                  (LDA,min(nq,K)) if VECT = 'Q'
+                                  (LDA,nq)        if VECT = 'P'
+            The vectors which define the elementary reflectors H(i) and
+            G(i), whose products determine the matrices Q and P, as
+            returned by CGEBRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If VECT = 'Q', LDA >= max(1,nq);
+            if VECT = 'P', LDA >= max(1,min(nq,K)).
+
+    TAU     (input) COMPLEX array, dimension (min(nq,K))
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i) or G(i) which determines Q or P, as returned
+            by CGEBRD in the array argument TAUQ or TAUP.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q
+            or P*C or P**H*C or C*P or C*P**H.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    applyq = lsame_(vect, "Q");
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q or P and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! applyq && ! lsame_(vect, "P")) {
+	*info = -1;
+    } else if (! left && ! lsame_(side, "R")) {
+	*info = -2;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*k < 0) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 1, i__2 = min(nq,*k);
+	if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) {
+	    *info = -8;
+	} else if (*ldc < max(1,*m)) {
+	    *info = -11;
+	} else if (*lwork < max(1,nw) && ! lquery) {
+	    *info = -13;
+	}
+    }
+
+    if (*info == 0) {
+	if (applyq) {
+	    if (left) {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__1, n, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__1, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	} else {
+	    if (left) {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		nb = ilaenv_(&c__1, "CUNMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		nb = ilaenv_(&c__1, "CUNMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNMBR", &i__1);
+	return 0;
+    } else if (lquery) {
+    }
+
+/*     Quick return if possible */
+
+    work[1].r = 1.f, work[1].i = 0.f;
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+    if (applyq) {
+
+/*        Apply Q */
+
+	if (nq >= *k) {
+
+/*           Q was determined by a call to CGEBRD with nq >= k */
+
+	    cunmqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		    c_offset], ldc, &work[1], lwork, &iinfo);
+	} else if (nq > 1) {
+
+/*           Q was determined by a call to CGEBRD with nq < k */
+
+	    if (left) {
+		mi = *m - 1;
+		ni = *n;
+		i1 = 2;
+		i2 = 1;
+	    } else {
+		mi = *m;
+		ni = *n - 1;
+		i1 = 1;
+		i2 = 2;
+	    }
+	    i__1 = nq - 1;
+	    cunmqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1]
+		    , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+	}
+    } else {
+
+/*        Apply P */
+
+	if (notran) {
+	    *(unsigned char *)transt = 'C';
+	} else {
+	    *(unsigned char *)transt = 'N';
+	}
+	if (nq > *k) {
+
+/*           P was determined by a call to CGEBRD with nq > k */
+
+	    cunmlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		    c_offset], ldc, &work[1], lwork, &iinfo);
+	} else if (nq > 1) {
+
+/*           P was determined by a call to CGEBRD with nq <= k */
+
+	    if (left) {
+		mi = *m - 1;
+		ni = *n;
+		i1 = 2;
+		i2 = 1;
+	    } else {
+		mi = *m;
+		ni = *n - 1;
+		i1 = 1;
+		i2 = 2;
+	    }
+	    i__1 = nq - 1;
+	    cunmlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda,
+		     &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &
+		    iinfo);
+	}
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNMBR */
+
+} /* cunmbr_ */
+
+/* Subroutine */ int cunml2_(char *side, char *trans, integer *m, integer *n,
+	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+    complex q__1;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
+    static complex aii;
+    static logical left;
+    static complex taui;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *),
+	    xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CUNML2 overwrites the general complex m-by-n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'C', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'C',
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k)' . . . H(2)' H(1)'
+
+    as returned by CGELQF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'C': apply Q' (Conjugate transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX array, dimension
+                                 (LDA,M) if SIDE = 'L',
+                                 (LDA,N) if SIDE = 'R'
+            The i-th row must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            CGELQF in the first k rows of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,K).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGELQF.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the m-by-n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > nq) {
+	*info = -5;
+    } else if (*lda < max(1,*k)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNML2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	return 0;
+    }
+
+    if (left && notran || ! left && ! notran) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+	jc = 1;
+    } else {
+	mi = *m;
+	ic = 1;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) or H(i)' is applied to C(i:m,1:n) */
+
+	    mi = *m - i__ + 1;
+	    ic = i__;
+	} else {
+
+/*           H(i) or H(i)' is applied to C(1:m,i:n) */
+
+	    ni = *n - i__ + 1;
+	    jc = i__;
+	}
+
+/*        Apply H(i) or H(i)' */
+
+	if (notran) {
+	    r_cnjg(&q__1, &tau[i__]);
+	    taui.r = q__1.r, taui.i = q__1.i;
+	} else {
+	    i__3 = i__;
+	    taui.r = tau[i__3].r, taui.i = tau[i__3].i;
+	}
+	if (i__ < nq) {
+	    i__3 = nq - i__;
+	    clacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda);
+	}
+	i__3 = i__ + i__ * a_dim1;
+	aii.r = a[i__3].r, aii.i = a[i__3].i;
+	i__3 = i__ + i__ * a_dim1;
+	a[i__3].r = 1.f, a[i__3].i = 0.f;
+	clarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &taui, &c__[ic +
+		jc * c_dim1], ldc, &work[1]);
+	i__3 = i__ + i__ * a_dim1;
+	a[i__3].r = aii.r, a[i__3].i = aii.i;
+	if (i__ < nq) {
+	    i__3 = nq - i__;
+	    clacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda);
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of CUNML2 */
+
+} /* cunml2_ */
+
+/* Subroutine */ int cunmlq_(char *side, char *trans, integer *m, integer *n,
+	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static complex t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int cunml2_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *), clarfb_(char *, char *,
+	    char *, char *, integer *, integer *, integer *, complex *,
+	    integer *, complex *, integer *, complex *, integer *, complex *,
+	    integer *), clarft_(char *, char *
+	    , integer *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran;
+    static integer ldwork;
+    static char transt[1];
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNMLQ overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k)' . . . H(2)' H(1)'
+
+    as returned by CGELQF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'C':  Conjugate transpose, apply Q**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX array, dimension
+                                 (LDA,M) if SIDE = 'L',
+                                 (LDA,N) if SIDE = 'R'
+            The i-th row must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            CGELQF in the first k rows of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,K).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGELQF.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > nq) {
+	*info = -5;
+    } else if (*lda < max(1,*k)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMLQ", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNMLQ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMLQ", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if (nb < nbmin || nb >= *k) {
+
+/*        Use unblocked code */
+
+	cunml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if (left && notran || ! left && ! notran) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	    jc = 1;
+	} else {
+	    mi = *m;
+	    ic = 1;
+	}
+
+	if (notran) {
+	    *(unsigned char *)transt = 'C';
+	} else {
+	    *(unsigned char *)transt = 'N';
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+	    i__4 = nq - i__ + 1;
+	    clarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1],
+		    lda, &tau[i__], t, &c__65);
+	    if (left) {
+
+/*              H or H' is applied to C(i:m,1:n) */
+
+		mi = *m - i__ + 1;
+		ic = i__;
+	    } else {
+
+/*              H or H' is applied to C(1:m,i:n) */
+
+		ni = *n - i__ + 1;
+		jc = i__;
+	    }
+
+/*           Apply H or H' */
+
+	    clarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__
+		    + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1],
+		    ldc, &work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNMLQ */
+
+} /* cunmlq_ */
+
+/* Subroutine */ int cunmql_(char *side, char *trans, integer *m, integer *n,
+	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static complex t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int cunm2l_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *), clarfb_(char *, char *,
+	    char *, char *, integer *, integer *, integer *, complex *,
+	    integer *, complex *, integer *, complex *, integer *, complex *,
+	    integer *), clarft_(char *, char *
+	    , integer *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran;
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNMQL overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'C':  Transpose, apply Q**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            CGEQLF in the last k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEQLF.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > nq) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQL", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNMQL", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQL", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if (nb < nbmin || nb >= *k) {
+
+/*        Use unblocked code */
+
+	cunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if (left && notran || ! left && ! notran) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	} else {
+	    mi = *m;
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i+ib-1) . . . H(i+1) H(i)
+*/
+
+	    i__4 = nq - *k + i__ + ib - 1;
+	    clarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1]
+		    , lda, &tau[i__], t, &c__65);
+	    if (left) {
+
+/*              H or H' is applied to C(1:m-k+i+ib-1,1:n) */
+
+		mi = *m - *k + i__ + ib - 1;
+	    } else {
+
+/*              H or H' is applied to C(1:m,1:n-k+i+ib-1) */
+
+		ni = *n - *k + i__ + ib - 1;
+	    }
+
+/*           Apply H or H' */
+
+	    clarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[
+		    i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, &
+		    work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNMQL */
+
+} /* cunmql_ */
+
+/* Subroutine */ int cunmqr_(char *side, char *trans, integer *m, integer *n,
+	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static complex t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int cunm2r_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *), clarfb_(char *, char *,
+	    char *, char *, integer *, integer *, integer *, complex *,
+	    integer *, complex *, integer *, complex *, integer *, complex *,
+	    integer *), clarft_(char *, char *
+	    , integer *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran;
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNMQR overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(1) H(2) . . . H(k)
+
+    as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'C':  Conjugate transpose, apply Q**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            CGEQRF in the first k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEQRF.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > nq) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQR", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNMQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQR", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if (nb < nbmin || nb >= *k) {
+
+/*        Use unblocked code */
+
+	cunm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if (left && ! notran || ! left && notran) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	    jc = 1;
+	} else {
+	    mi = *m;
+	    ic = 1;
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+	    i__4 = nq - i__ + 1;
+	    clarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ *
+		    a_dim1], lda, &tau[i__], t, &c__65)
+		    ;
+	    if (left) {
+
+/*              H or H' is applied to C(i:m,1:n) */
+
+		mi = *m - i__ + 1;
+		ic = i__;
+	    } else {
+
+/*              H or H' is applied to C(1:m,i:n) */
+
+		ni = *n - i__ + 1;
+		jc = i__;
+	    }
+
+/*           Apply H or H' */
+
+	    clarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
+		    i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc *
+		    c_dim1], ldc, &work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNMQR */
+
+} /* cunmqr_ */
+
+/* Subroutine */ int cunmtr_(char *side, char *uplo, char *trans, integer *m,
+	integer *n, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int cunmql_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *), cunmqr_(char *,
+	    char *, integer *, integer *, integer *, complex *, integer *,
+	    complex *, complex *, integer *, complex *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNMTR overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix of order nq, with nq = m if
+    SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+    nq-1 elementary reflectors, as returned by CHETRD:
+
+    if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+
+    if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    UPLO    (input) CHARACTER*1
+            = 'U': Upper triangle of A contains elementary reflectors
+                   from CHETRD;
+            = 'L': Lower triangle of A contains elementary reflectors
+                   from CHETRD.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'C':  Conjugate transpose, apply Q**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    A       (input) COMPLEX array, dimension
+                                 (LDA,M) if SIDE = 'L'
+                                 (LDA,N) if SIDE = 'R'
+            The vectors which define the elementary reflectors, as
+            returned by CHETRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+
+    TAU     (input) COMPLEX array, dimension
+                                 (M-1) if SIDE = 'L'
+                                 (N-1) if SIDE = 'R'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CHETRD.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >=M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    upper = lsame_(uplo, "U");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	*info = -2;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "C")) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+	if (upper) {
+	    if (left) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		nb = ilaenv_(&c__1, "CUNMQL", ch__1, &i__2, n, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		nb = ilaenv_(&c__1, "CUNMQL", ch__1, m, &i__2, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	} else {
+	    if (left) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__2, n, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__2, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("CUNMTR", &i__2);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || nq == 1) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    if (left) {
+	mi = *m - 1;
+	ni = *n;
+    } else {
+	mi = *m;
+	ni = *n - 1;
+    }
+
+    if (upper) {
+
+/*        Q was determined by a call to CHETRD with UPLO = 'U' */
+
+	i__2 = nq - 1;
+	cunmql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, &
+		tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
+    } else {
+
+/*        Q was determined by a call to CHETRD with UPLO = 'L' */
+
+	if (left) {
+	    i1 = 2;
+	    i2 = 1;
+	} else {
+	    i1 = 1;
+	    i2 = 2;
+	}
+	i__2 = nq - 1;
+	cunmqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], &
+		c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNMTR */
+
+} /* cunmtr_ */
+
+/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal *
+	d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt,
+	integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer *
+	iwork, integer *info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double d_sign(doublereal *, doublereal *), log(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k;
+    static doublereal p, r__;
+    static integer z__, ic, ii, kk;
+    static doublereal cs;
+    static integer is, iu;
+    static doublereal sn;
+    static integer nm1;
+    static doublereal eps;
+    static integer ivt, difl, difr, ierr, perm, mlvl, sqre;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *
+	    , doublereal *, integer *), dswap_(integer *, doublereal *,
+	    integer *, doublereal *, integer *);
+    static integer poles, iuplo, nsize, start;
+    extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    integer *, integer *, doublereal *, integer *);
+
+    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *), dlascl_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *), dlasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlaset_(char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static integer givcol;
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    static integer icompq;
+    static doublereal orgnrm;
+    static integer givnum, givptr, qstart, smlsiz, wstart, smlszp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 1, 1999
+
+
+    Purpose
+    =======
+
+    DBDSDC computes the singular value decomposition (SVD) of a real
+    N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,
+    using a divide and conquer method, where S is a diagonal matrix
+    with non-negative diagonal elements (the singular values of B), and
+    U and VT are orthogonal matrices of left and right singular vectors,
+    respectively. DBDSDC can be used to compute all singular values,
+    and optionally, singular vectors or singular vectors in compact form.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See DLASD3 for details.
+
+    The code currently call DLASDQ if singular values only are desired.
+    However, it can be slightly modified to compute singular values
+    using the divide and conquer method.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  B is upper bidiagonal.
+            = 'L':  B is lower bidiagonal.
+
+    COMPQ   (input) CHARACTER*1
+            Specifies whether singular vectors are to be computed
+            as follows:
+            = 'N':  Compute singular values only;
+            = 'P':  Compute singular values and compute singular
+                    vectors in compact form;
+            = 'I':  Compute singular values and singular vectors.
+
+    N       (input) INTEGER
+            The order of the matrix B.  N >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the n diagonal elements of the bidiagonal matrix B.
+            On exit, if INFO=0, the singular values of B.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the elements of E contain the offdiagonal
+            elements of the bidiagonal matrix whose SVD is desired.
+            On exit, E has been destroyed.
+
+    U       (output) DOUBLE PRECISION array, dimension (LDU,N)
+            If  COMPQ = 'I', then:
+               On exit, if INFO = 0, U contains the left singular vectors
+               of the bidiagonal matrix.
+            For other values of COMPQ, U is not referenced.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= 1.
+            If singular vectors are desired, then LDU >= max( 1, N ).
+
+    VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
+            If  COMPQ = 'I', then:
+               On exit, if INFO = 0, VT' contains the right singular
+               vectors of the bidiagonal matrix.
+            For other values of COMPQ, VT is not referenced.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.  LDVT >= 1.
+            If singular vectors are desired, then LDVT >= max( 1, N ).
+
+    Q       (output) DOUBLE PRECISION array, dimension (LDQ)
+            If  COMPQ = 'P', then:
+               On exit, if INFO = 0, Q and IQ contain the left
+               and right singular vectors in a compact form,
+               requiring O(N log N) space instead of 2*N**2.
+               In particular, Q contains all the DOUBLE PRECISION data in
+               LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1))))
+               words of memory, where SMLSIZ is returned by ILAENV and
+               is equal to the maximum size of the subproblems at the
+               bottom of the computation tree (usually about 25).
+            For other values of COMPQ, Q is not referenced.
+
+    IQ      (output) INTEGER array, dimension (LDIQ)
+            If  COMPQ = 'P', then:
+               On exit, if INFO = 0, Q and IQ contain the left
+               and right singular vectors in a compact form,
+               requiring O(N log N) space instead of 2*N**2.
+               In particular, IQ contains all INTEGER data in
+               LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1))))
+               words of memory, where SMLSIZ is returned by ILAENV and
+               is equal to the maximum size of the subproblems at the
+               bottom of the computation tree (usually about 25).
+            For other values of COMPQ, IQ is not referenced.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
+            If COMPQ = 'N' then LWORK >= (4 * N).
+            If COMPQ = 'P' then LWORK >= (6 * N).
+            If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).
+
+    IWORK   (workspace) INTEGER array, dimension (8*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an singular value.
+                  The update process of divide and conquer failed.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --q;
+    --iq;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    iuplo = 0;
+    if (lsame_(uplo, "U")) {
+	iuplo = 1;
+    }
+    if (lsame_(uplo, "L")) {
+	iuplo = 2;
+    }
+    if (lsame_(compq, "N")) {
+	icompq = 0;
+    } else if (lsame_(compq, "P")) {
+	icompq = 1;
+    } else if (lsame_(compq, "I")) {
+	icompq = 2;
+    } else {
+	icompq = -1;
+    }
+    if (iuplo == 0) {
+	*info = -1;
+    } else if (icompq < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ldu < 1 || icompq == 2 && *ldu < *n) {
+	*info = -7;
+    } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DBDSDC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+    if (*n == 1) {
+	if (icompq == 1) {
+	    q[1] = d_sign(&c_b2453, &d__[1]);
+	    q[smlsiz * *n + 1] = 1.;
+	} else if (icompq == 2) {
+	    u[u_dim1 + 1] = d_sign(&c_b2453, &d__[1]);
+	    vt[vt_dim1 + 1] = 1.;
+	}
+	d__[1] = abs(d__[1]);
+	return 0;
+    }
+    nm1 = *n - 1;
+
+/*
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left
+*/
+
+    wstart = 1;
+    qstart = 3;
+    if (icompq == 1) {
+	dcopy_(n, &d__[1], &c__1, &q[1], &c__1);
+	i__1 = *n - 1;
+	dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1);
+    }
+    if (iuplo == 2) {
+	qstart = 5;
+	wstart = (*n << 1) - 1;
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (icompq == 1) {
+		q[i__ + (*n << 1)] = cs;
+		q[i__ + *n * 3] = sn;
+	    } else if (icompq == 2) {
+		work[i__] = cs;
+		work[nm1 + i__] = -sn;
+	    }
+/* L10: */
+	}
+    }
+
+/*     If ICOMPQ = 0, use DLASDQ to compute the singular values. */
+
+    if (icompq == 0) {
+	dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
+		vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
+		wstart], info);
+	goto L40;
+    }
+
+/*
+       If N is smaller than the minimum divide size SMLSIZ, then solve
+       the problem with another solver.
+*/
+
+    if (*n <= smlsiz) {
+	if (icompq == 2) {
+	    dlaset_("A", n, n, &c_b2467, &c_b2453, &u[u_offset], ldu);
+	    dlaset_("A", n, n, &c_b2467, &c_b2453, &vt[vt_offset], ldvt);
+	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
+		    , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
+		    wstart], info);
+	} else if (icompq == 1) {
+	    iu = 1;
+	    ivt = iu + *n;
+	    dlaset_("A", n, n, &c_b2467, &c_b2453, &q[iu + (qstart - 1) * *n],
+		     n);
+	    dlaset_("A", n, n, &c_b2467, &c_b2453, &q[ivt + (qstart - 1) * *n]
+		    , n);
+	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (
+		    qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[
+		    iu + (qstart - 1) * *n], n, &work[wstart], info);
+	}
+	goto L40;
+    }
+
+    if (icompq == 2) {
+	dlaset_("A", n, n, &c_b2467, &c_b2453, &u[u_offset], ldu);
+	dlaset_("A", n, n, &c_b2467, &c_b2453, &vt[vt_offset], ldvt);
+    }
+
+/*     Scale. */
+
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	return 0;
+    }
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, n, &c__1, &d__[1], n, &ierr);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, &nm1, &c__1, &e[1], &nm1, &
+	    ierr);
+
+    eps = EPSILON;
+
+    mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) /
+	    log(2.)) + 1;
+    smlszp = smlsiz + 1;
+
+    if (icompq == 1) {
+	iu = 1;
+	ivt = smlsiz + 1;
+	difl = ivt + smlszp;
+	difr = difl + mlvl;
+	z__ = difr + (mlvl << 1);
+	ic = z__ + mlvl;
+	is = ic + 1;
+	poles = is + 1;
+	givnum = poles + (mlvl << 1);
+
+	k = 1;
+	givptr = 2;
+	perm = 3;
+	givcol = perm + mlvl;
+    }
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) < eps) {
+	    d__[i__] = d_sign(&eps, &d__[i__]);
+	}
+/* L20: */
+    }
+
+    start = 1;
+    sqre = 0;
+
+    i__1 = nm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) {
+
+/*
+          Subproblem found. First determine its size and then
+          apply divide and conquer on it.
+*/
+
+	    if (i__ < nm1) {
+
+/*        A subproblem with E(I) small for I < NM1. */
+
+		nsize = i__ - start + 1;
+	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
+
+/*        A subproblem with E(NM1) not too small but I = NM1. */
+
+		nsize = *n - start + 1;
+	    } else {
+
+/*
+          A subproblem with E(NM1) small. This implies an
+          1-by-1 subproblem at D(N). Solve this 1-by-1 problem
+          first.
+*/
+
+		nsize = i__ - start + 1;
+		if (icompq == 2) {
+		    u[*n + *n * u_dim1] = d_sign(&c_b2453, &d__[*n]);
+		    vt[*n + *n * vt_dim1] = 1.;
+		} else if (icompq == 1) {
+		    q[*n + (qstart - 1) * *n] = d_sign(&c_b2453, &d__[*n]);
+		    q[*n + (smlsiz + qstart - 1) * *n] = 1.;
+		}
+		d__[*n] = (d__1 = d__[*n], abs(d__1));
+	    }
+	    if (icompq == 2) {
+		dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start +
+			start * u_dim1], ldu, &vt[start + start * vt_dim1],
+			ldvt, &smlsiz, &iwork[1], &work[wstart], info);
+	    } else {
+		dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[
+			start], &q[start + (iu + qstart - 2) * *n], n, &q[
+			start + (ivt + qstart - 2) * *n], &iq[start + k * *n],
+			 &q[start + (difl + qstart - 2) * *n], &q[start + (
+			difr + qstart - 2) * *n], &q[start + (z__ + qstart -
+			2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[
+			start + givptr * *n], &iq[start + givcol * *n], n, &
+			iq[start + perm * *n], &q[start + (givnum + qstart -
+			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
+			start + (is + qstart - 2) * *n], &work[wstart], &
+			iwork[1], info);
+		if (*info != 0) {
+		    return 0;
+		}
+	    }
+	    start = i__ + 1;
+	}
+/* L30: */
+    }
+
+/*     Unscale */
+
+    dlascl_("G", &c__0, &c__0, &c_b2453, &orgnrm, n, &c__1, &d__[1], n, &ierr);
+L40:
+
+/*     Use Selection Sort to minimize swaps of singular vectors */
+
+    i__1 = *n;
+    for (ii = 2; ii <= i__1; ++ii) {
+	i__ = ii - 1;
+	kk = i__;
+	p = d__[i__];
+	i__2 = *n;
+	for (j = ii; j <= i__2; ++j) {
+	    if (d__[j] > p) {
+		kk = j;
+		p = d__[j];
+	    }
+/* L50: */
+	}
+	if (kk != i__) {
+	    d__[kk] = d__[i__];
+	    d__[i__] = p;
+	    if (icompq == 1) {
+		iq[i__] = kk;
+	    } else if (icompq == 2) {
+		dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &
+			c__1);
+		dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt);
+	    }
+	} else if (icompq == 1) {
+	    iq[i__] = i__;
+	}
+/* L60: */
+    }
+
+/*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */
+
+    if (icompq == 1) {
+	if (iuplo == 1) {
+	    iq[*n] = 1;
+	} else {
+	    iq[*n] = 0;
+	}
+    }
+
+/*
+       If B is lower bidiagonal, update U by those Givens rotations
+       which rotated B to be upper bidiagonal
+*/
+
+    if (iuplo == 2 && icompq == 2) {
+	dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu);
+    }
+
+    return 0;
+
+/*     End of DBDSDC */
+
+} /* dbdsdc_ */
+
+/* Subroutine */ int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer *
+	nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt,
+	integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer *
+	ldc, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Builtin functions */
+    double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign(
+	    doublereal *, doublereal *);
+
+    /* Local variables */
+    static doublereal f, g, h__;
+    static integer i__, j, m;
+    static doublereal r__, cs;
+    static integer ll;
+    static doublereal sn, mu;
+    static integer nm1, nm12, nm13, lll;
+    static doublereal eps, sll, tol, abse;
+    static integer idir;
+    static doublereal abss;
+    static integer oldm;
+    static doublereal cosl;
+    static integer isub, iter;
+    static doublereal unfl, sinl, cosr, smin, smax, sinr;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *), dlas2_(
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *), dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    static doublereal oldcs;
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *);
+    static integer oldll;
+    static doublereal shift, sigmn, oldsn;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer maxit;
+    static doublereal sminl, sigmx;
+    static logical lower;
+    extern /* Subroutine */ int dlasq1_(integer *, doublereal *, doublereal *,
+	     doublereal *, integer *), dlasv2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
+
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *), xerbla_(char *,
+	    integer *);
+    static doublereal sminoa, thresh;
+    static logical rotate;
+    static doublereal sminlo, tolmul;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DBDSQR computes the singular value decomposition (SVD) of a real
+    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
+    denotes the transpose of P), where S is a diagonal matrix with
+    non-negative diagonal elements (the singular values of B), and Q
+    and P are orthogonal matrices.
+
+    The routine computes S, and optionally computes U * Q, P' * VT,
+    or Q' * C, for given real input matrices U, VT, and C.
+
+    See "Computing  Small Singular Values of Bidiagonal Matrices With
+    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+    LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
+    no. 5, pp. 873-912, Sept 1990) and
+    "Accurate singular values and differential qd algorithms," by
+    B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
+    Department, University of California at Berkeley, July 1992
+    for a detailed description of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  B is upper bidiagonal;
+            = 'L':  B is lower bidiagonal.
+
+    N       (input) INTEGER
+            The order of the matrix B.  N >= 0.
+
+    NCVT    (input) INTEGER
+            The number of columns of the matrix VT. NCVT >= 0.
+
+    NRU     (input) INTEGER
+            The number of rows of the matrix U. NRU >= 0.
+
+    NCC     (input) INTEGER
+            The number of columns of the matrix C. NCC >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the n diagonal elements of the bidiagonal matrix B.
+            On exit, if INFO=0, the singular values of B in decreasing
+            order.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the elements of E contain the
+            offdiagonal elements of the bidiagonal matrix whose SVD
+            is desired. On normal exit (INFO = 0), E is destroyed.
+            If the algorithm does not converge (INFO > 0), D and E
+            will contain the diagonal and superdiagonal elements of a
+            bidiagonal matrix orthogonally equivalent to the one given
+            as input. E(N) is used for workspace.
+
+    VT      (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
+            On entry, an N-by-NCVT matrix VT.
+            On exit, VT is overwritten by P' * VT.
+            VT is not referenced if NCVT = 0.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.
+            LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
+
+    U       (input/output) DOUBLE PRECISION array, dimension (LDU, N)
+            On entry, an NRU-by-N matrix U.
+            On exit, U is overwritten by U * Q.
+            U is not referenced if NRU = 0.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= max(1,NRU).
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
+            On entry, an N-by-NCC matrix C.
+            On exit, C is overwritten by Q' * C.
+            C is not referenced if NCC = 0.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C.
+            LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (4*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  If INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm did not converge; D and E contain the
+                  elements of a bidiagonal matrix which is orthogonally
+                  similar to the input matrix B;  if INFO = i, i
+                  elements of E have not converged to zero.
+
+    Internal Parameters
+    ===================
+
+    TOLMUL  DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
+            TOLMUL controls the convergence criterion of the QR loop.
+            If it is positive, TOLMUL*EPS is the desired relative
+               precision in the computed singular values.
+            If it is negative, abs(TOLMUL*EPS*sigma_max) is the
+               desired absolute accuracy in the computed singular
+               values (corresponds to relative accuracy
+               abs(TOLMUL*EPS) in the largest singular value.
+            abs(TOLMUL) should be between 1 and 1/EPS, and preferably
+               between 10 (for fast convergence) and .1/EPS
+               (for there to be some accuracy in the results).
+            Default is to lose at either one eighth or 2 of the
+               available decimal digits in each computed singular value
+               (whichever is smaller).
+
+    MAXITR  INTEGER, default = 6
+            MAXITR controls the maximum number of passes of the
+            algorithm through its inner loop. The algorithms stops
+            (and so fails to converge) if the number of passes
+            through the inner loop exceeds MAXITR*N**2.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lower = lsame_(uplo, "L");
+    if (! lsame_(uplo, "U") && ! lower) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ncvt < 0) {
+	*info = -3;
+    } else if (*nru < 0) {
+	*info = -4;
+    } else if (*ncc < 0) {
+	*info = -5;
+    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) {
+	*info = -9;
+    } else if (*ldu < max(1,*nru)) {
+	*info = -11;
+    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DBDSQR", &i__1);
+	return 0;
+    }
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	goto L160;
+    }
+
+/*     ROTATE is true if any singular vectors desired, false otherwise */
+
+    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
+
+/*     If no singular vectors desired, use qd algorithm */
+
+    if (! rotate) {
+	dlasq1_(n, &d__[1], &e[1], &work[1], info);
+	return 0;
+    }
+
+    nm1 = *n - 1;
+    nm12 = nm1 + nm1;
+    nm13 = nm12 + nm1;
+    idir = 0;
+
+/*     Get machine constants */
+
+    eps = EPSILON;
+    unfl = SAFEMINIMUM;
+
+/*
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left
+*/
+
+    if (lower) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    work[i__] = cs;
+	    work[nm1 + i__] = sn;
+/* L10: */
+	}
+
+/*        Update singular vectors if desired */
+
+	if (*nru > 0) {
+	    dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset],
+		    ldu);
+	}
+	if (*ncc > 0) {
+	    dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
+		     ldc);
+	}
+    }
+
+/*
+       Compute singular values to relative accuracy TOL
+       (By setting TOL to be negative, algorithm will compute
+       singular values to absolute accuracy ABS(TOL)*norm(input matrix))
+
+   Computing MAX
+   Computing MIN
+*/
+    d__3 = 100., d__4 = pow_dd(&eps, &c_b2532);
+    d__1 = 10., d__2 = min(d__3,d__4);
+    tolmul = max(d__1,d__2);
+    tol = tolmul * eps;
+
+/*     Compute approximate maximum, minimum singular values */
+
+    smax = 0.;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1));
+	smax = max(d__2,d__3);
+/* L20: */
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
+	smax = max(d__2,d__3);
+/* L30: */
+    }
+    sminl = 0.;
+    if (tol >= 0.) {
+
+/*        Relative accuracy desired */
+
+	sminoa = abs(d__[1]);
+	if (sminoa == 0.) {
+	    goto L50;
+	}
+	mu = sminoa;
+	i__1 = *n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1]
+		    , abs(d__1))));
+	    sminoa = min(sminoa,mu);
+	    if (sminoa == 0.) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+	sminoa /= sqrt((doublereal) (*n));
+/* Computing MAX */
+	d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl;
+	thresh = max(d__1,d__2);
+    } else {
+
+/*
+          Absolute accuracy desired
+
+   Computing MAX
+*/
+	d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl;
+	thresh = max(d__1,d__2);
+    }
+
+/*
+       Prepare for main iteration loop for the singular values
+       (MAXIT is the maximum number of passes through the inner
+       loop permitted before nonconvergence signalled.)
+*/
+
+    maxit = *n * 6 * *n;
+    iter = 0;
+    oldll = -1;
+    oldm = -1;
+
+/*     M points to last element of unconverged part of matrix */
+
+    m = *n;
+
+/*     Begin main iteration loop */
+
+L60:
+
+/*     Check for convergence or exceeding iteration count */
+
+    if (m <= 1) {
+	goto L160;
+    }
+    if (iter > maxit) {
+	goto L200;
+    }
+
+/*     Find diagonal block of matrix to work on */
+
+    if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) {
+	d__[m] = 0.;
+    }
+    smax = (d__1 = d__[m], abs(d__1));
+    smin = smax;
+    i__1 = m - 1;
+    for (lll = 1; lll <= i__1; ++lll) {
+	ll = m - lll;
+	abss = (d__1 = d__[ll], abs(d__1));
+	abse = (d__1 = e[ll], abs(d__1));
+	if (tol < 0. && abss <= thresh) {
+	    d__[ll] = 0.;
+	}
+	if (abse <= thresh) {
+	    goto L80;
+	}
+	smin = min(smin,abss);
+/* Computing MAX */
+	d__1 = max(smax,abss);
+	smax = max(d__1,abse);
+/* L70: */
+    }
+    ll = 0;
+    goto L90;
+L80:
+    e[ll] = 0.;
+
+/*     Matrix splits since E(LL) = 0 */
+
+    if (ll == m - 1) {
+
+/*        Convergence of bottom singular value, return to top of loop */
+
+	--m;
+	goto L60;
+    }
+L90:
+    ++ll;
+
+/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */
+
+    if (ll == m - 1) {
+
+/*        2 by 2 block, handle separately */
+
+	dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
+		 &sinl, &cosl);
+	d__[m - 1] = sigmx;
+	e[m - 1] = 0.;
+	d__[m] = sigmn;
+
+/*        Compute singular vectors, if desired */
+
+	if (*ncvt > 0) {
+	    drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &
+		    cosr, &sinr);
+	}
+	if (*nru > 0) {
+	    drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &
+		    c__1, &cosl, &sinl);
+	}
+	if (*ncc > 0) {
+	    drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &
+		    cosl, &sinl);
+	}
+	m += -2;
+	goto L60;
+    }
+
+/*
+       If working on new submatrix, choose shift direction
+       (from larger end diagonal element towards smaller)
+*/
+
+    if (ll > oldm || m < oldll) {
+	if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) {
+
+/*           Chase bulge from top (big end) to bottom (small end) */
+
+	    idir = 1;
+	} else {
+
+/*           Chase bulge from bottom (big end) to top (small end) */
+
+	    idir = 2;
+	}
+    }
+
+/*     Apply convergence tests */
+
+    if (idir == 1) {
+
+/*
+          Run convergence test in forward direction
+          First apply standard test to bottom of matrix
+*/
+
+	if ((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs(
+		d__1)) || tol < 0. && (d__3 = e[m - 1], abs(d__3)) <= thresh)
+		{
+	    e[m - 1] = 0.;
+	    goto L60;
+	}
+
+	if (tol >= 0.) {
+
+/*
+             If relative accuracy desired,
+             apply convergence criterion forward
+*/
+
+	    mu = (d__1 = d__[ll], abs(d__1));
+	    sminl = mu;
+	    i__1 = m - 1;
+	    for (lll = ll; lll <= i__1; ++lll) {
+		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
+		    e[lll] = 0.;
+		    goto L60;
+		}
+		sminlo = sminl;
+		mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[
+			lll], abs(d__1))));
+		sminl = min(sminl,mu);
+/* L100: */
+	    }
+	}
+
+    } else {
+
+/*
+          Run convergence test in backward direction
+          First apply standard test to top of matrix
+*/
+
+	if ((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(d__1)
+		) || tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh) {
+	    e[ll] = 0.;
+	    goto L60;
+	}
+
+	if (tol >= 0.) {
+
+/*
+             If relative accuracy desired,
+             apply convergence criterion backward
+*/
+
+	    mu = (d__1 = d__[m], abs(d__1));
+	    sminl = mu;
+	    i__1 = ll;
+	    for (lll = m - 1; lll >= i__1; --lll) {
+		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
+		    e[lll] = 0.;
+		    goto L60;
+		}
+		sminlo = sminl;
+		mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll]
+			, abs(d__1))));
+		sminl = min(sminl,mu);
+/* L110: */
+	    }
+	}
+    }
+    oldll = ll;
+    oldm = m;
+
+/*
+       Compute shift.  First, test if shifting would ruin relative
+       accuracy, and if so set the shift to zero.
+
+   Computing MAX
+*/
+    d__1 = eps, d__2 = tol * .01;
+    if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) {
+
+/*        Use a zero shift to avoid loss of relative accuracy */
+
+	shift = 0.;
+    } else {
+
+/*        Compute the shift from 2-by-2 block at end of matrix */
+
+	if (idir == 1) {
+	    sll = (d__1 = d__[ll], abs(d__1));
+	    dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
+	} else {
+	    sll = (d__1 = d__[m], abs(d__1));
+	    dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
+	}
+
+/*        Test if shift negligible, and if so set to zero */
+
+	if (sll > 0.) {
+/* Computing 2nd power */
+	    d__1 = shift / sll;
+	    if (d__1 * d__1 < eps) {
+		shift = 0.;
+	    }
+	}
+    }
+
+/*     Increment iteration count */
+
+    iter = iter + m - ll;
+
+/*     If SHIFT = 0, do simplified QR iteration */
+
+    if (shift == 0.) {
+	if (idir == 1) {
+
+/*
+             Chase bulge from top to bottom
+             Save cosines and sines for later singular vector updates
+*/
+
+	    cs = 1.;
+	    oldcs = 1.;
+	    i__1 = m - 1;
+	    for (i__ = ll; i__ <= i__1; ++i__) {
+		d__1 = d__[i__] * cs;
+		dlartg_(&d__1, &e[i__], &cs, &sn, &r__);
+		if (i__ > ll) {
+		    e[i__ - 1] = oldsn * r__;
+		}
+		d__1 = oldcs * r__;
+		d__2 = d__[i__ + 1] * sn;
+		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
+		work[i__ - ll + 1] = cs;
+		work[i__ - ll + 1 + nm1] = sn;
+		work[i__ - ll + 1 + nm12] = oldcs;
+		work[i__ - ll + 1 + nm13] = oldsn;
+/* L120: */
+	    }
+	    h__ = d__[m] * cs;
+	    d__[m] = h__ * oldcs;
+	    e[m - 1] = h__ * oldsn;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
+			ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
+			+ 1], &u[ll * u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
+			+ 1], &c__[ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
+		e[m - 1] = 0.;
+	    }
+
+	} else {
+
+/*
+             Chase bulge from bottom to top
+             Save cosines and sines for later singular vector updates
+*/
+
+	    cs = 1.;
+	    oldcs = 1.;
+	    i__1 = ll + 1;
+	    for (i__ = m; i__ >= i__1; --i__) {
+		d__1 = d__[i__] * cs;
+		dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__);
+		if (i__ < m) {
+		    e[i__] = oldsn * r__;
+		}
+		d__1 = oldcs * r__;
+		d__2 = d__[i__ - 1] * sn;
+		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
+		work[i__ - ll] = cs;
+		work[i__ - ll + nm1] = -sn;
+		work[i__ - ll + nm12] = oldcs;
+		work[i__ - ll + nm13] = -oldsn;
+/* L130: */
+	    }
+	    h__ = d__[ll] * cs;
+	    d__[ll] = h__ * oldcs;
+	    e[ll] = h__ * oldsn;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
+			nm13 + 1], &vt[ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
+			 u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
+			ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
+		e[ll] = 0.;
+	    }
+	}
+    } else {
+
+/*        Use nonzero shift */
+
+	if (idir == 1) {
+
+/*
+             Chase bulge from top to bottom
+             Save cosines and sines for later singular vector updates
+*/
+
+	    f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b2453, &
+		    d__[ll]) + shift / d__[ll]);
+	    g = e[ll];
+	    i__1 = m - 1;
+	    for (i__ = ll; i__ <= i__1; ++i__) {
+		dlartg_(&f, &g, &cosr, &sinr, &r__);
+		if (i__ > ll) {
+		    e[i__ - 1] = r__;
+		}
+		f = cosr * d__[i__] + sinr * e[i__];
+		e[i__] = cosr * e[i__] - sinr * d__[i__];
+		g = sinr * d__[i__ + 1];
+		d__[i__ + 1] = cosr * d__[i__ + 1];
+		dlartg_(&f, &g, &cosl, &sinl, &r__);
+		d__[i__] = r__;
+		f = cosl * e[i__] + sinl * d__[i__ + 1];
+		d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
+		if (i__ < m - 1) {
+		    g = sinl * e[i__ + 1];
+		    e[i__ + 1] = cosl * e[i__ + 1];
+		}
+		work[i__ - ll + 1] = cosr;
+		work[i__ - ll + 1 + nm1] = sinr;
+		work[i__ - ll + 1 + nm12] = cosl;
+		work[i__ - ll + 1 + nm13] = sinl;
+/* L140: */
+	    }
+	    e[m - 1] = f;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
+			ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
+			+ 1], &u[ll * u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
+			+ 1], &c__[ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
+		e[m - 1] = 0.;
+	    }
+
+	} else {
+
+/*
+             Chase bulge from bottom to top
+             Save cosines and sines for later singular vector updates
+*/
+
+	    f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b2453, &d__[
+		    m]) + shift / d__[m]);
+	    g = e[m - 1];
+	    i__1 = ll + 1;
+	    for (i__ = m; i__ >= i__1; --i__) {
+		dlartg_(&f, &g, &cosr, &sinr, &r__);
+		if (i__ < m) {
+		    e[i__] = r__;
+		}
+		f = cosr * d__[i__] + sinr * e[i__ - 1];
+		e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
+		g = sinr * d__[i__ - 1];
+		d__[i__ - 1] = cosr * d__[i__ - 1];
+		dlartg_(&f, &g, &cosl, &sinl, &r__);
+		d__[i__] = r__;
+		f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
+		d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
+		if (i__ > ll + 1) {
+		    g = sinl * e[i__ - 2];
+		    e[i__ - 2] = cosl * e[i__ - 2];
+		}
+		work[i__ - ll] = cosr;
+		work[i__ - ll + nm1] = -sinr;
+		work[i__ - ll + nm12] = cosl;
+		work[i__ - ll + nm13] = -sinl;
+/* L150: */
+	    }
+	    e[ll] = f;
+
+/*           Test convergence */
+
+	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
+		e[ll] = 0.;
+	    }
+
+/*           Update singular vectors if desired */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
+			nm13 + 1], &vt[ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
+			 u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
+			ll + c_dim1], ldc);
+	    }
+	}
+    }
+
+/*     QR iteration finished, go back and check convergence */
+
+    goto L60;
+
+/*     All singular values converged, so make them positive */
+
+L160:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (d__[i__] < 0.) {
+	    d__[i__] = -d__[i__];
+
+/*           Change sign of singular vectors, if desired */
+
+	    if (*ncvt > 0) {
+		dscal_(ncvt, &c_b2589, &vt[i__ + vt_dim1], ldvt);
+	    }
+	}
+/* L170: */
+    }
+
+/*
+       Sort the singular values into decreasing order (insertion sort on
+       singular values, but only one transposition per singular vector)
+*/
+
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Scan for smallest D(I) */
+
+	isub = 1;
+	smin = d__[1];
+	i__2 = *n + 1 - i__;
+	for (j = 2; j <= i__2; ++j) {
+	    if (d__[j] <= smin) {
+		isub = j;
+		smin = d__[j];
+	    }
+/* L180: */
+	}
+	if (isub != *n + 1 - i__) {
+
+/*           Swap singular values and vectors */
+
+	    d__[isub] = d__[*n + 1 - i__];
+	    d__[*n + 1 - i__] = smin;
+	    if (*ncvt > 0) {
+		dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ +
+			vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) *
+			u_dim1 + 1], &c__1);
+	    }
+	    if (*ncc > 0) {
+		dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ +
+			c_dim1], ldc);
+	    }
+	}
+/* L190: */
+    }
+    goto L220;
+
+/*     Maximum number of iterations exceeded, failure to converge */
+
+L200:
+    *info = 0;
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (e[i__] != 0.) {
+	    ++(*info);
+	}
+/* L210: */
+    }
+L220:
+    return 0;
+
+/*     End of DBDSQR */
+
+} /* dbdsqr_ */
+
+/* Subroutine */ int dgebak_(char *job, char *side, integer *n, integer *ilo,
+	integer *ihi, doublereal *scale, integer *m, doublereal *v, integer *
+	ldv, integer *info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, i__1;
+
+    /* Local variables */
+    static integer i__, k;
+    static doublereal s;
+    static integer ii;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical leftv;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical rightv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DGEBAK forms the right or left eigenvectors of a real general matrix
+    by backward transformation on the computed eigenvectors of the
+    balanced matrix output by DGEBAL.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            Specifies the type of backward transformation required:
+            = 'N', do nothing, return immediately;
+            = 'P', do backward transformation for permutation only;
+            = 'S', do backward transformation for scaling only;
+            = 'B', do backward transformations for both permutation and
+                   scaling.
+            JOB must be the same as the argument JOB supplied to DGEBAL.
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  V contains right eigenvectors;
+            = 'L':  V contains left eigenvectors.
+
+    N       (input) INTEGER
+            The number of rows of the matrix V.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            The integers ILO and IHI determined by DGEBAL.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    SCALE   (input) DOUBLE PRECISION array, dimension (N)
+            Details of the permutation and scaling factors, as returned
+            by DGEBAL.
+
+    M       (input) INTEGER
+            The number of columns of the matrix V.  M >= 0.
+
+    V       (input/output) DOUBLE PRECISION array, dimension (LDV,M)
+            On entry, the matrix of right or left eigenvectors to be
+            transformed, as returned by DHSEIN or DTREVC.
+            On exit, V is overwritten by the transformed eigenvectors.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V. LDV >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Decode and Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --scale;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+
+    /* Function Body */
+    rightv = lsame_(side, "R");
+    leftv = lsame_(side, "L");
+
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (! rightv && ! leftv) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+	*info = -4;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -7;
+    } else if (*ldv < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEBAK", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*m == 0) {
+	return 0;
+    }
+    if (lsame_(job, "N")) {
+	return 0;
+    }
+
+    if (*ilo == *ihi) {
+	goto L30;
+    }
+
+/*     Backward balance */
+
+    if (lsame_(job, "S") || lsame_(job, "B")) {
+
+	if (rightv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = scale[i__];
+		dscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L10: */
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = 1. / scale[i__];
+		dscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L20: */
+	    }
+	}
+
+    }
+
+/*
+       Backward permutation
+
+       For  I = ILO-1 step -1 until 1,
+                IHI+1 step 1 until N do --
+*/
+
+L30:
+    if (lsame_(job, "P") || lsame_(job, "B")) {
+	if (rightv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L40;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = (integer) scale[i__];
+		if (k == i__) {
+		    goto L40;
+		}
+		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L40:
+		;
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L50;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = (integer) scale[i__];
+		if (k == i__) {
+		    goto L50;
+		}
+		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L50:
+		;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DGEBAK */
+
+} /* dgebak_ */
+
+/* Subroutine */ int dgebal_(char *job, integer *n, doublereal *a, integer *
+	lda, integer *ilo, integer *ihi, doublereal *scale, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static doublereal c__, f, g;
+    static integer i__, j, k, l, m;
+    static doublereal r__, s, ca, ra;
+    static integer ica, ira, iexc;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static doublereal sfmin1, sfmin2, sfmax1, sfmax2;
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DGEBAL balances a general real matrix A.  This involves, first,
+    permuting A by a similarity transformation to isolate eigenvalues
+    in the first 1 to ILO-1 and last IHI+1 to N elements on the
+    diagonal; and second, applying a diagonal similarity transformation
+    to rows and columns ILO to IHI to make the rows and columns as
+    close in norm as possible.  Both steps are optional.
+
+    Balancing may reduce the 1-norm of the matrix, and improve the
+    accuracy of the computed eigenvalues and/or eigenvectors.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            Specifies the operations to be performed on A:
+            = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
+                    for i = 1,...,N;
+            = 'P':  permute only;
+            = 'S':  scale only;
+            = 'B':  both permute and scale.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the input matrix A.
+            On exit,  A is overwritten by the balanced matrix.
+            If JOB = 'N', A is not referenced.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    ILO     (output) INTEGER
+    IHI     (output) INTEGER
+            ILO and IHI are set to integers such that on exit
+            A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
+            If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+
+    SCALE   (output) DOUBLE PRECISION array, dimension (N)
+            Details of the permutations and scaling factors applied to
+            A.  If P(j) is the index of the row and column interchanged
+            with row and column j and D(j) is the scaling factor
+            applied to row and column j, then
+            SCALE(j) = P(j)    for j = 1,...,ILO-1
+                     = D(j)    for j = ILO,...,IHI
+                     = P(j)    for j = IHI+1,...,N.
+            The order in which the interchanges are made is N to IHI+1,
+            then 1 to ILO-1.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The permutations consist of row and column interchanges which put
+    the matrix in the form
+
+               ( T1   X   Y  )
+       P A P = (  0   B   Z  )
+               (  0   0   T2 )
+
+    where T1 and T2 are upper triangular matrices whose eigenvalues lie
+    along the diagonal.  The column indices ILO and IHI mark the starting
+    and ending columns of the submatrix B. Balancing consists of applying
+    a diagonal similarity transformation inv(D) * B * D to make the
+    1-norms of each row of B and its corresponding column nearly equal.
+    The output matrix is
+
+       ( T1     X*D          Y    )
+       (  0  inv(D)*B*D  inv(D)*Z ).
+       (  0      0           T2   )
+
+    Information about the permutations P and the diagonal matrix D is
+    returned in the vector SCALE.
+
+    This subroutine is based on the EISPACK routine BALANC.
+
+    Modified by Tzu-Yi Chen, Computer Science Division, University of
+      California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --scale;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEBAL", &i__1);
+	return 0;
+    }
+
+    k = 1;
+    l = *n;
+
+    if (*n == 0) {
+	goto L210;
+    }
+
+    if (lsame_(job, "N")) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scale[i__] = 1.;
+/* L10: */
+	}
+	goto L210;
+    }
+
+    if (lsame_(job, "S")) {
+	goto L120;
+    }
+
+/*     Permutation to isolate eigenvalues if possible */
+
+    goto L50;
+
+/*     Row and column exchange. */
+
+L20:
+    scale[m] = (doublereal) j;
+    if (j == m) {
+	goto L30;
+    }
+
+    dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
+    i__1 = *n - k + 1;
+    dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
+
+L30:
+    switch (iexc) {
+	case 1:  goto L40;
+	case 2:  goto L80;
+    }
+
+/*     Search for rows isolating an eigenvalue and push them down. */
+
+L40:
+    if (l == 1) {
+	goto L210;
+    }
+    --l;
+
+L50:
+    for (j = l; j >= 1; --j) {
+
+	i__1 = l;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (i__ == j) {
+		goto L60;
+	    }
+	    if (a[j + i__ * a_dim1] != 0.) {
+		goto L70;
+	    }
+L60:
+	    ;
+	}
+
+	m = l;
+	iexc = 1;
+	goto L20;
+L70:
+	;
+    }
+
+    goto L90;
+
+/*     Search for columns isolating an eigenvalue and push them left. */
+
+L80:
+    ++k;
+
+L90:
+    i__1 = l;
+    for (j = k; j <= i__1; ++j) {
+
+	i__2 = l;
+	for (i__ = k; i__ <= i__2; ++i__) {
+	    if (i__ == j) {
+		goto L100;
+	    }
+	    if (a[i__ + j * a_dim1] != 0.) {
+		goto L110;
+	    }
+L100:
+	    ;
+	}
+
+	m = k;
+	iexc = 2;
+	goto L20;
+L110:
+	;
+    }
+
+L120:
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	scale[i__] = 1.;
+/* L130: */
+    }
+
+    if (lsame_(job, "P")) {
+	goto L210;
+    }
+
+/*
+       Balance the submatrix in rows K to L.
+
+       Iterative loop for norm reduction
+*/
+
+    sfmin1 = SAFEMINIMUM / PRECISION;
+    sfmax1 = 1. / sfmin1;
+    sfmin2 = sfmin1 * 8.;
+    sfmax2 = 1. / sfmin2;
+L140:
+    noconv = FALSE_;
+
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	c__ = 0.;
+	r__ = 0.;
+
+	i__2 = l;
+	for (j = k; j <= i__2; ++j) {
+	    if (j == i__) {
+		goto L150;
+	    }
+	    c__ += (d__1 = a[j + i__ * a_dim1], abs(d__1));
+	    r__ += (d__1 = a[i__ + j * a_dim1], abs(d__1));
+L150:
+	    ;
+	}
+	ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
+	ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1));
+	i__2 = *n - k + 1;
+	ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda);
+	ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1));
+
+/*        Guard against zero C or R due to underflow. */
+
+	if (c__ == 0. || r__ == 0.) {
+	    goto L200;
+	}
+	g = r__ / 8.;
+	f = 1.;
+	s = c__ + r__;
+L160:
+/* Computing MAX */
+	d__1 = max(f,c__);
+/* Computing MIN */
+	d__2 = min(r__,g);
+	if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) {
+	    goto L170;
+	}
+	f *= 8.;
+	c__ *= 8.;
+	ca *= 8.;
+	r__ /= 8.;
+	g /= 8.;
+	ra /= 8.;
+	goto L160;
+
+L170:
+	g = c__ / 8.;
+L180:
+/* Computing MIN */
+	d__1 = min(f,c__), d__1 = min(d__1,g);
+	if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) {
+	    goto L190;
+	}
+	f /= 8.;
+	c__ /= 8.;
+	g /= 8.;
+	ca /= 8.;
+	r__ *= 8.;
+	ra *= 8.;
+	goto L180;
+
+/*        Now balance. */
+
+L190:
+	if (c__ + r__ >= s * .95) {
+	    goto L200;
+	}
+	if (f < 1. && scale[i__] < 1.) {
+	    if (f * scale[i__] <= sfmin1) {
+		goto L200;
+	    }
+	}
+	if (f > 1. && scale[i__] > 1.) {
+	    if (scale[i__] >= sfmax1 / f) {
+		goto L200;
+	    }
+	}
+	g = 1. / f;
+	scale[i__] *= f;
+	noconv = TRUE_;
+
+	i__2 = *n - k + 1;
+	dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
+	dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
+
+L200:
+	;
+    }
+
+    if (noconv) {
+	goto L140;
+    }
+
+L210:
+    *ilo = k;
+    *ihi = l;
+
+    return 0;
+
+/*     End of DGEBAL */
+
+} /* dgebal_ */
+
+/* Subroutine */ int dgebd2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
+	taup, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dlarfg_(integer *, doublereal *,
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DGEBD2 reduces a real general m by n matrix A to upper or lower
+    bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
+
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the orthogonal matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the orthogonal matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+
+    TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix Q. See Further Details.
+
+    TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix P. See Further Details.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (max(M,N))
+
+    INFO    (output) INTEGER
+            = 0: successful exit.
+            < 0: if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+    If m >= n,
+
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
+    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("DGEBD2", &i__1);
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
+		    a_dim1], &c__1, &tauq[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = d__[i__];
+
+	    if (i__ < *n) {
+
+/*
+                Generate elementary reflector G(i) to annihilate
+                A(i,i+2:n)
+*/
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
+			i__3,*n) * a_dim1], lda, &taup[i__]);
+		e[i__] = a[i__ + (i__ + 1) * a_dim1];
+		a[i__ + (i__ + 1) * a_dim1] = 1.;
+
+/*              Apply G(i) to A(i+1:m,i+1:n) from the right */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1],
+			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &work[1]);
+		a[i__ + (i__ + 1) * a_dim1] = e[i__];
+	    } else {
+		taup[i__] = 0.;
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
+		    a_dim1], lda, &taup[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+
+/*           Apply G(i) to A(i+1:m,i:n) from the right */
+
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+/* Computing MIN */
+	    i__4 = i__ + 1;
+	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
+		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = d__[i__];
+
+	    if (i__ < *m) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:m,i)
+*/
+
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
+			i__ * a_dim1], &c__1, &tauq[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.;
+
+/*              Apply H(i) to A(i+1:m,i+1:n) from the left */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &work[1]);
+		a[i__ + 1 + i__ * a_dim1] = e[i__];
+	    } else {
+		tauq[i__] = 0.;
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of DGEBD2 */
+
+} /* dgebd2_ */
+
+/* Subroutine */ int dgebrd_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
+	taup, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, nb, nx;
+    static doublereal ws;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer nbmin, iinfo, minmn;
+    extern /* Subroutine */ int dgebd2_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *), dlabrd_(integer *, integer *, integer *
+	    , doublereal *, integer *, doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *, integer *, doublereal *, integer *)
+	    , xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwrkx, ldwrky, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DGEBRD reduces a general real M-by-N matrix A to upper or lower
+    bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
+
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the M-by-N general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the orthogonal matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the orthogonal matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+
+    TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix Q. See Further Details.
+
+    TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix P. See Further Details.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,M,N).
+            For optimum performance LWORK >= (M+N)*NB, where NB
+            is the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+    If m >= n,
+
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
+    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = max(i__1,i__2);
+    lwkopt = (*m + *n) * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = max(1,*m);
+	if (*lwork < max(i__1,*n) && ! lquery) {
+	    *info = -10;
+	}
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("DGEBRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    minmn = min(*m,*n);
+    if (minmn == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    ws = (doublereal) max(*m,*n);
+    ldwrkx = *m;
+    ldwrky = *n;
+
+    if (nb > 1 && nb < minmn) {
+
+/*
+          Set the crossover point NX.
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+
+/*        Determine when to switch from blocked to unblocked code. */
+
+	if (nx < minmn) {
+	    ws = (doublereal) ((*m + *n) * nb);
+	    if ((doublereal) (*lwork) < ws) {
+
+/*
+                Not enough work space for the optimal NB, consider using
+                a smaller block size.
+*/
+
+		nbmin = ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		if (*lwork >= (*m + *n) * nbmin) {
+		    nb = *lwork / (*m + *n);
+		} else {
+		    nb = 1;
+		    nx = minmn;
+		}
+	    }
+	}
+    } else {
+	nx = minmn;
+    }
+
+    i__1 = minmn - nx;
+    i__2 = nb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+
+/*
+          Reduce rows and columns i:i+nb-1 to bidiagonal form and return
+          the matrices X and Y which are needed to update the unreduced
+          part of the matrix
+*/
+
+	i__3 = *m - i__ + 1;
+	i__4 = *n - i__ + 1;
+	dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
+		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx
+		* nb + 1], &ldwrky);
+
+/*
+          Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
+          of the form  A := A - V*Y' - X*U'
+*/
+
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b2589, &a[
+		i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], &
+		ldwrky, &c_b2453, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b2589, &
+		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
+		c_b2453, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*        Copy diagonal and off-diagonal elements of B back into A */
+
+	if (*m >= *n) {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + j * a_dim1] = d__[j];
+		a[j + (j + 1) * a_dim1] = e[j];
+/* L10: */
+	    }
+	} else {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + j * a_dim1] = d__[j];
+		a[j + 1 + j * a_dim1] = e[j];
+/* L20: */
+	    }
+	}
+/* L30: */
+    }
+
+/*     Use unblocked code to reduce the remainder of the matrix */
+
+    i__2 = *m - i__ + 1;
+    i__1 = *n - i__ + 1;
+    dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
+	    tauq[i__], &taup[i__], &work[1], &iinfo);
+    work[1] = ws;
+    return 0;
+
+/*     End of DGEBRD */
+
+} /* dgebrd_ */
+
+/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal *
+	a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl,
+	integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work,
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3, i__4;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, k;
+    static doublereal r__, cs, sn;
+    static integer ihi;
+    static doublereal scl;
+    static integer ilo;
+    static doublereal dum[1], eps;
+    static integer ibal;
+    static char side[1];
+    static integer maxb;
+    static doublereal anrm;
+    static integer ierr, itau;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer iwrk, nout;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern doublereal dlapy2_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_(
+	    char *, char *, integer *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, integer *, integer *),
+	    dgebal_(char *, integer *, doublereal *, integer *, integer *,
+	    integer *, doublereal *, integer *);
+    static logical scalea;
+
+    static doublereal cscale;
+    extern doublereal dlange_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *);
+    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dlascl_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *),
+	    dlartg_(doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *), xerbla_(char *, integer *);
+    static logical select[1];
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static doublereal bignum;
+    extern /* Subroutine */ int dorghr_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dhseqr_(char *, char *, integer *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, integer *), dtrevc_(char *, char *, logical *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, doublereal *, integer *);
+    static integer minwrk, maxwrk;
+    static logical wantvl;
+    static doublereal smlnum;
+    static integer hswork;
+    static logical lquery, wantvr;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 8, 1999
+
+
+    Purpose
+    =======
+
+    DGEEV computes for an N-by-N real nonsymmetric matrix A, the
+    eigenvalues and, optionally, the left and/or right eigenvectors.
+
+    The right eigenvector v(j) of A satisfies
+                     A * v(j) = lambda(j) * v(j)
+    where lambda(j) is its eigenvalue.
+    The left eigenvector u(j) of A satisfies
+                  u(j)**H * A = lambda(j) * u(j)**H
+    where u(j)**H denotes the conjugate transpose of u(j).
+
+    The computed eigenvectors are normalized to have Euclidean norm
+    equal to 1 and largest component real.
+
+    Arguments
+    =========
+
+    JOBVL   (input) CHARACTER*1
+            = 'N': left eigenvectors of A are not computed;
+            = 'V': left eigenvectors of A are computed.
+
+    JOBVR   (input) CHARACTER*1
+            = 'N': right eigenvectors of A are not computed;
+            = 'V': right eigenvectors of A are computed.
+
+    N       (input) INTEGER
+            The order of the matrix A. N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the N-by-N matrix A.
+            On exit, A has been overwritten.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    WR      (output) DOUBLE PRECISION array, dimension (N)
+    WI      (output) DOUBLE PRECISION array, dimension (N)
+            WR and WI contain the real and imaginary parts,
+            respectively, of the computed eigenvalues.  Complex
+            conjugate pairs of eigenvalues appear consecutively
+            with the eigenvalue having the positive imaginary part
+            first.
+
+    VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
+            If JOBVL = 'V', the left eigenvectors u(j) are stored one
+            after another in the columns of VL, in the same order
+            as their eigenvalues.
+            If JOBVL = 'N', VL is not referenced.
+            If the j-th eigenvalue is real, then u(j) = VL(:,j),
+            the j-th column of VL.
+            If the j-th and (j+1)-st eigenvalues form a complex
+            conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
+            u(j+1) = VL(:,j) - i*VL(:,j+1).
+
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= 1; if
+            JOBVL = 'V', LDVL >= N.
+
+    VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
+            If JOBVR = 'V', the right eigenvectors v(j) are stored one
+            after another in the columns of VR, in the same order
+            as their eigenvalues.
+            If JOBVR = 'N', VR is not referenced.
+            If the j-th eigenvalue is real, then v(j) = VR(:,j),
+            the j-th column of VR.
+            If the j-th and (j+1)-st eigenvalues form a complex
+            conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
+            v(j+1) = VR(:,j) - i*VR(:,j+1).
+
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= 1; if
+            JOBVR = 'V', LDVR >= N.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,3*N), and
+            if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N.  For good
+            performance, LWORK must generally be larger.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = i, the QR algorithm failed to compute all the
+                  eigenvalues, and no eigenvectors have been computed;
+                  elements i+1:N of WR and WI contain eigenvalues which
+                  have converged.
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --wr;
+    --wi;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvl = lsame_(jobvl, "V");
+    wantvr = lsame_(jobvr, "V");
+    if (! wantvl && ! lsame_(jobvl, "N")) {
+	*info = -1;
+    } else if (! wantvr && ! lsame_(jobvr, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
+	*info = -9;
+    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
+	*info = -11;
+    }
+
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         NB refers to the optimal block size for the immediately
+         following subroutine, as returned by ILAENV.
+         HSWORK refers to the workspace preferred by DHSEQR, as
+         calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+         the worst case.)
+*/
+
+    minwrk = 1;
+    if (*info == 0 && (*lwork >= 1 || lquery)) {
+	maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, &
+		c__0, (ftnlen)6, (ftnlen)1);
+	if (! wantvl && ! wantvr) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n * 3;
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "DHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "EN", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = *n << 1;
+	    hswork = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
+		    hswork;
+	    maxwrk = max(i__1,i__2);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n << 2;
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "DOR"
+		    "GHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "DHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "SV", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = *n << 1;
+	    hswork = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
+		    hswork;
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n << 2;
+	    maxwrk = max(i__1,i__2);
+	}
+	work[1] = (doublereal) maxwrk;
+    }
+    if (*lwork < minwrk && ! lquery) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEEV ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = PRECISION;
+    smlnum = SAFEMINIMUM;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0. && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*
+       Balance the matrix
+       (Workspace: need N)
+*/
+
+    ibal = 1;
+    dgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
+
+/*
+       Reduce to upper Hessenberg form
+       (Workspace: need 3*N, prefer 2*N+N*NB)
+*/
+
+    itau = ibal + *n;
+    iwrk = itau + *n;
+    i__1 = *lwork - iwrk + 1;
+    dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
+
+    if (wantvl) {
+
+/*
+          Want left eigenvectors
+          Copy Householder vectors to VL
+*/
+
+	*(unsigned char *)side = 'L';
+	dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
+		;
+
+/*
+          Generate orthogonal matrix in VL
+          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*/
+
+	i__1 = *lwork - iwrk + 1;
+	dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*
+          Perform QR iteration, accumulating Schur vectors in VL
+          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vl[vl_offset], ldvl, &work[iwrk], &i__1, info);
+
+	if (wantvr) {
+
+/*
+             Want left and right eigenvectors
+             Copy Schur vectors to VR
+*/
+
+	    *(unsigned char *)side = 'B';
+	    dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
+	}
+
+    } else if (wantvr) {
+
+/*
+          Want right eigenvectors
+          Copy Householder vectors to VR
+*/
+
+	*(unsigned char *)side = 'R';
+	dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
+		;
+
+/*
+          Generate orthogonal matrix in VR
+          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*/
+
+	i__1 = *lwork - iwrk + 1;
+	dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*
+          Perform QR iteration, accumulating Schur vectors in VR
+          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+
+    } else {
+
+/*
+          Compute eigenvalues only
+          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	dhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+    }
+
+/*     If INFO > 0 from DHSEQR, then quit */
+
+    if (*info > 0) {
+	goto L50;
+    }
+
+    if (wantvl || wantvr) {
+
+/*
+          Compute left and/or right eigenvectors
+          (Workspace: need 4*N)
+*/
+
+	dtrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl,
+		 &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr);
+    }
+
+    if (wantvl) {
+
+/*
+          Undo balancing of left eigenvectors
+          (Workspace: need N)
+*/
+
+	dgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl,
+		 &ierr);
+
+/*        Normalize left eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.) {
+		scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.) {
+		d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		scl = 1. / dlapy2_(&d__1, &d__2);
+		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    d__1 = vl[k + i__ * vl_dim1];
+/* Computing 2nd power */
+		    d__2 = vl[k + (i__ + 1) * vl_dim1];
+		    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
+/* L10: */
+		}
+		k = idamax_(n, &work[iwrk], &c__1);
+		dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1],
+			&cs, &sn, &r__);
+		drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) *
+			vl_dim1 + 1], &c__1, &cs, &sn);
+		vl[k + (i__ + 1) * vl_dim1] = 0.;
+	    }
+/* L20: */
+	}
+    }
+
+    if (wantvr) {
+
+/*
+          Undo balancing of right eigenvectors
+          (Workspace: need N)
+*/
+
+	dgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr,
+		 &ierr);
+
+/*        Normalize right eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.) {
+		scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.) {
+		d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		scl = 1. / dlapy2_(&d__1, &d__2);
+		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    d__1 = vr[k + i__ * vr_dim1];
+/* Computing 2nd power */
+		    d__2 = vr[k + (i__ + 1) * vr_dim1];
+		    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
+/* L30: */
+		}
+		k = idamax_(n, &work[iwrk], &c__1);
+		dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1],
+			&cs, &sn, &r__);
+		drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) *
+			vr_dim1 + 1], &c__1, &cs, &sn);
+		vr[k + (i__ + 1) * vr_dim1] = 0.;
+	    }
+/* L40: */
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+L50:
+    if (scalea) {
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = max(i__3,1);
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info +
+		1], &i__2, &ierr);
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = max(i__3,1);
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info +
+		1], &i__2, &ierr);
+	if (*info > 0) {
+	    i__1 = ilo - 1;
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1],
+		    n, &ierr);
+	    i__1 = ilo - 1;
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1],
+		    n, &ierr);
+	}
+    }
+
+    work[1] = (doublereal) maxwrk;
+    return 0;
+
+/*     End of DGEEV */
+
+} /* dgeev_ */
+
+/* Subroutine */ int dgehd2_(integer *n, integer *ilo, integer *ihi,
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__;
+    static doublereal aii;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dlarfg_(integer *, doublereal *,
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DGEHD2 reduces a real general matrix A to upper Hessenberg form H by
+    an orthogonal similarity transformation:  Q' * A * Q = H .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to DGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= max(1,N).
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the n by n general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the orthogonal matrix Q as a product of elementary
+            reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
+
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
+
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
+
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+	*info = -2;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEHD2", &i__1);
+	return 0;
+    }
+
+    i__1 = *ihi - 1;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+
+/*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
+
+	i__2 = *ihi - i__;
+/* Computing MIN */
+	i__3 = i__ + 2;
+	dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		a_dim1], &c__1, &tau[i__]);
+	aii = a[i__ + 1 + i__ * a_dim1];
+	a[i__ + 1 + i__ * a_dim1] = 1.;
+
+/*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
+
+	i__2 = *ihi - i__;
+	dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
+
+/*        Apply H(i) to A(i+1:ihi,i+1:n) from the left */
+
+	i__2 = *ihi - i__;
+	i__3 = *n - i__;
+	dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
+
+	a[i__ + 1 + i__ * a_dim1] = aii;
+/* L10: */
+    }
+
+    return 0;
+
+/*     End of DGEHD2 */
+
+} /* dgehd2_ */
+
+/* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi,
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__;
+    static doublereal t[4160]	/* was [65][64] */;
+    static integer ib;
+    static doublereal ei;
+    static integer nb, nh, nx, iws;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int dgehd2_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *),
+	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, integer *), dlahrd_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DGEHRD reduces a real general matrix A to upper Hessenberg form H by
+    an orthogonal similarity transformation:  Q' * A * Q = H .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to DGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the N-by-N general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the orthogonal matrix Q as a product of elementary
+            reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+            zero.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
+
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
+
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
+
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MIN */
+    i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
+    lwkopt = *n * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+	*info = -2;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEHRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	tau[i__] = 0.;
+/* L10: */
+    }
+    i__1 = *n - 1;
+    for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
+	tau[i__] = 0.;
+/* L20: */
+    }
+
+/*     Quick return if possible */
+
+    nh = *ihi - *ilo + 1;
+    if (nh <= 1) {
+	work[1] = 1.;
+	return 0;
+    }
+
+/*
+       Determine the block size.
+
+   Computing MIN
+*/
+    i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
+    nbmin = 2;
+    iws = 1;
+    if (nb > 1 && nb < nh) {
+
+/*
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < nh) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    iws = *n * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code.
+
+   Computing MAX
+*/
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+		if (*lwork >= *n * nbmin) {
+		    nb = *lwork / *n;
+		} else {
+		    nb = 1;
+		}
+	    }
+	}
+    }
+    ldwork = *n;
+
+    if (nb < nbmin || nb >= nh) {
+
+/*        Use unblocked code below */
+
+	i__ = *ilo;
+
+    } else {
+
+/*        Use blocked code */
+
+	i__1 = *ihi - 1 - nx;
+	i__2 = nb;
+	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = nb, i__4 = *ihi - i__;
+	    ib = min(i__3,i__4);
+
+/*
+             Reduce columns i:i+ib-1 to Hessenberg form, returning the
+             matrices V and T of the block reflector H = I - V*T*V'
+             which performs the reduction, and also the matrix Y = A*V*T
+*/
+
+	    dlahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
+		    c__65, &work[1], &ldwork);
+
+/*
+             Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
+             right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+             to 1.
+*/
+
+	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.;
+	    i__3 = *ihi - i__ - ib + 1;
+	    dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b2589, &
+		    work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
+		    c_b2453, &a[(i__ + ib) * a_dim1 + 1], lda);
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
+
+/*
+             Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
+             left
+*/
+
+	    i__3 = *ihi - i__;
+	    i__4 = *n - i__ - ib + 1;
+	    dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[
+		    i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork);
+/* L30: */
+	}
+    }
+
+/*     Use unblocked code to reduce the rest of the matrix */
+
+    dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    work[1] = (doublereal) iws;
+
+    return 0;
+
+/*     End of DGEHRD */
+
+} /* dgehrd_ */
+
+/* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, k;
+    static doublereal aii;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dlarfg_(integer *, doublereal *,
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DGELQ2 computes an LQ factorization of a real m by n matrix A:
+    A = L * Q.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m by min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (M)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(k) . . . H(2) H(1), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELQ2", &i__1);
+	return 0;
+    }
+
+    k = min(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
+
+	i__2 = *n - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1]
+		, lda, &tau[i__]);
+	if (i__ < *m) {
+
+/*           Apply H(i) to A(i+1:m,i:n) from the right */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
+		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of DGELQ2 */
+
+} /* dgelq2_ */
+
+/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *), dlarfb_(char *,
+	     char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DGELQF computes an LQ factorization of a real M-by-N matrix A:
+    A = L * Q.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m-by-min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(k) . . . H(2) H(1), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *m * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELQF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
+
+/*
+             Compute the LQ factorization of the current block
+             A(i:i+ib-1,i:n)
+*/
+
+	    i__3 = *n - i__ + 1;
+	    dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *m) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__3 = *n - i__ + 1;
+		dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i+ib:m,i:n) from the right */
+
+		i__3 = *m - i__ - ib + 1;
+		i__4 = *n - i__ + 1;
+		dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
+			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
+			1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DGELQF */
+
+} /* dgelqf_ */
+
+/* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs,
+	doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *
+	s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork,
+	 integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+
+    /* Builtin functions */
+    double log(doublereal);
+
+    /* Local variables */
+    static integer ie, il, mm;
+    static doublereal eps, anrm, bnrm;
+    static integer itau, nlvl, iascl, ibscl;
+    static doublereal sfmin;
+    static integer minmn, maxmn, itaup, itauq, mnthr, nwork;
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebrd_(
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *);
+    extern doublereal dlamch_(char *), dlange_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *, integer *),
+	    dlalsd_(char *, integer *, integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *, integer *), dlascl_(char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    integer *, doublereal *, integer *, integer *), dgeqrf_(
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static doublereal bignum;
+    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, integer *);
+    static integer wlalsd;
+    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
+    static integer ldwork;
+    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
+    static integer minwrk, maxwrk;
+    static doublereal smlnum;
+    static logical lquery;
+    static integer smlsiz;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DGELSD computes the minimum-norm solution to a real linear least
+    squares problem:
+        minimize 2-norm(| b - A*x |)
+    using the singular value decomposition (SVD) of A. A is an M-by-N
+    matrix which may be rank-deficient.
+
+    Several right hand side vectors b and solution vectors x can be
+    handled in a single call; they are stored as the columns of the
+    M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+    matrix X.
+
+    The problem is solved in three steps:
+    (1) Reduce the coefficient matrix A to bidiagonal form with
+        Householder transformations, reducing the original problem
+        into a "bidiagonal least squares problem" (BLS)
+    (2) Solve the BLS using a divide and conquer approach.
+    (3) Apply back all the Householder tranformations to solve
+        the original least squares problem.
+
+    The effective rank of A is determined by treating as zero those
+    singular values which are less than RCOND times the largest singular
+    value.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of A. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of A. N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrices B and X. NRHS >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, A has been destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+            On entry, the M-by-NRHS right hand side matrix B.
+            On exit, B is overwritten by the N-by-NRHS solution
+            matrix X.  If m >= n and RANK = n, the residual
+            sum-of-squares for the solution in the i-th column is given
+            by the sum of squares of elements n+1:m in that column.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B. LDB >= max(1,max(M,N)).
+
+    S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The singular values of A in decreasing order.
+            The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+
+    RCOND   (input) DOUBLE PRECISION
+            RCOND is used to determine the effective rank of A.
+            Singular values S(i) <= RCOND*S(1) are treated as zero.
+            If RCOND < 0, machine precision is used instead.
+
+    RANK    (output) INTEGER
+            The effective rank of A, i.e., the number of singular values
+            which are greater than RCOND*S(1).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK must be at least 1.
+            The exact minimum amount of workspace needed depends on M,
+            N and NRHS. As long as LWORK is at least
+                12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
+            if M is greater than or equal to N or
+                12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
+            if M is less than N, the code will execute correctly.
+            SMLSIZ is returned by ILAENV and is equal to the maximum
+            size of the subproblems at the bottom of the computation
+            tree (usually about 25), and
+               NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+            For good performance, LWORK should generally be larger.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    IWORK   (workspace) INTEGER array, dimension (LIWORK)
+            LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
+            where MINMN = MIN( M,N ).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  the algorithm for computing the SVD failed to converge;
+                  if INFO = i, i off-diagonal elements of an intermediate
+                  bidiagonal form did not converge to zero.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input arguments.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --s;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = min(*m,*n);
+    maxmn = max(*m,*n);
+    mnthr = ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*ldb < max(1,maxmn)) {
+	*info = -7;
+    }
+
+    smlsiz = ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       Compute workspace.
+       (Note: Comments in the code beginning "Workspace:" describe the
+       minimal amount of workspace needed at that point in the code,
+       as well as the preferred amount for good performance.
+       NB refers to the optimal block size for the immediately
+       following subroutine, as returned by ILAENV.)
+*/
+
+    minwrk = 1;
+    minmn = max(1,minmn);
+/* Computing MAX */
+    i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) /
+	    log(2.)) + 1;
+    nlvl = max(i__1,0);
+
+    if (*info == 0) {
+	maxwrk = 0;
+	mm = *m;
+	if (*m >= *n && *m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns. */
+
+	    mm = *n;
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m,
+		    n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "DORMQR", "LT",
+		    m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
+	    maxwrk = max(i__1,i__2);
+	}
+	if (*m >= *n) {
+
+/*
+             Path 1 - overdetermined or exactly determined.
+
+   Computing MAX
+*/
+	    i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "DGEBRD"
+		    , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "DORMBR",
+		    "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "DORMBR",
+		     "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+	    maxwrk = max(i__1,i__2);
+/* Computing 2nd power */
+	    i__1 = smlsiz + 1;
+	    wlalsd = *n * 9 + (*n << 1) * smlsiz + (*n << 3) * nlvl + *n * *
+		    nrhs + i__1 * i__1;
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2),
+		    i__2 = *n * 3 + wlalsd;
+	    minwrk = max(i__1,i__2);
+	}
+	if (*n > *m) {
+/* Computing 2nd power */
+	    i__1 = smlsiz + 1;
+	    wlalsd = *m * 9 + (*m << 1) * smlsiz + (*m << 3) * nlvl + *m * *
+		    nrhs + i__1 * i__1;
+	    if (*n >= mnthr) {
+
+/*
+                Path 2a - underdetermined, with many more columns
+                than rows.
+*/
+
+		maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1,
+			&c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) *
+			ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(&
+			c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1, (ftnlen)6, (
+			ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) *
+			ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1, (
+			ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+		if (*nrhs > 1) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
+		    maxwrk = max(i__1,i__2);
+		} else {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
+		    maxwrk = max(i__1,i__2);
+		}
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "DORMLQ",
+			"LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd;
+		maxwrk = max(i__1,i__2);
+	    } else {
+
+/*              Path 2 - remaining underdetermined cases. */
+
+		maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "DGEBRD", " ", m,
+			 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "DORMBR"
+			, "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR",
+			"PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
+		maxwrk = max(i__1,i__2);
+	    }
+/* Computing MAX */
+	    i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2),
+		    i__2 = *m * 3 + wlalsd;
+	    minwrk = max(i__1,i__2);
+	}
+	minwrk = min(minwrk,maxwrk);
+	work[1] = (doublereal) maxwrk;
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELSD", &i__1);
+	return 0;
+    } else if (lquery) {
+	goto L10;
+    }
+
+/*     Quick return if possible. */
+
+    if (*m == 0 || *n == 0) {
+	*rank = 0;
+	return 0;
+    }
+
+/*     Get machine parameters. */
+
+    eps = PRECISION;
+    sfmin = SAFEMINIMUM;
+    smlnum = sfmin / eps;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+
+/*     Scale A if max entry outside range [SMLNUM,BIGNUM]. */
+
+    anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]);
+    iascl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM. */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda,
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM. */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda,
+		info);
+	iascl = 2;
+    } else if (anrm == 0.) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	i__1 = max(*m,*n);
+	dlaset_("F", &i__1, nrhs, &c_b2467, &c_b2467, &b[b_offset], ldb);
+	dlaset_("F", &minmn, &c__1, &c_b2467, &c_b2467, &s[1], &c__1);
+	*rank = 0;
+	goto L10;
+    }
+
+/*     Scale B if max entry outside range [SMLNUM,BIGNUM]. */
+
+    bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
+    ibscl = 0;
+    if (bnrm > 0. && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM. */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM. */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 2;
+    }
+
+/*     If M < N make sure certain entries of B are zero. */
+
+    if (*m < *n) {
+	i__1 = *n - *m;
+	dlaset_("F", &i__1, nrhs, &c_b2467, &c_b2467, &b[*m + 1 + b_dim1],
+		ldb);
+    }
+
+/*     Overdetermined case. */
+
+    if (*m >= *n) {
+
+/*        Path 1 - overdetermined or exactly determined. */
+
+	mm = *m;
+	if (*m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns. */
+
+	    mm = *n;
+	    itau = 1;
+	    nwork = itau + *n;
+
+/*
+             Compute A=Q*R.
+             (Workspace: need 2*N, prefer N+N*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+		     info);
+
+/*
+             Multiply B by transpose(Q).
+             (Workspace: need N+NRHS, prefer N+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[nwork], &i__1, info);
+
+/*           Zero out below R. */
+
+	    if (*n > 1) {
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		dlaset_("L", &i__1, &i__2, &c_b2467, &c_b2467, &a[a_dim1 + 2],
+			 lda);
+	    }
+	}
+
+	ie = 1;
+	itauq = ie + *n;
+	itaup = itauq + *n;
+	nwork = itaup + *n;
+
+/*
+          Bidiagonalize R in A.
+          (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
+*/
+
+	i__1 = *lwork - nwork + 1;
+	dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors of R.
+          (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
+*/
+
+	i__1 = *lwork - nwork + 1;
+	dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq],
+		&b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb,
+		rcond, rank, &work[nwork], &iwork[1], info);
+	if (*info != 0) {
+	    goto L10;
+	}
+
+/*        Multiply B by right bidiagonalizing vectors of R. */
+
+	i__1 = *lwork - nwork + 1;
+	dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
+		b[b_offset], ldb, &work[nwork], &i__1, info);
+
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max(
+		i__1,*nrhs), i__2 = *n - *m * 3;
+	if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) {
+
+/*
+          Path 2a - underdetermined, with many more columns than rows
+          and sufficient workspace for an efficient algorithm.
+*/
+
+	    ldwork = *m;
+/*
+   Computing MAX
+   Computing MAX
+*/
+	    i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 =
+		    max(i__3,*nrhs), i__4 = *n - *m * 3;
+	    i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda +
+		    *m + *m * *nrhs;
+	    if (*lwork >= max(i__1,i__2)) {
+		ldwork = *lda;
+	    }
+	    itau = 1;
+	    nwork = *m + 1;
+
+/*
+          Compute A=L*Q.
+          (Workspace: need 2*M, prefer M+M*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+		     info);
+	    il = nwork;
+
+/*        Copy L to WORK(IL), zeroing out above its diagonal. */
+
+	    dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
+	    i__1 = *m - 1;
+	    i__2 = *m - 1;
+	    dlaset_("U", &i__1, &i__2, &c_b2467, &c_b2467, &work[il + ldwork],
+		     &ldwork);
+	    ie = il + ldwork * *m;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+          Bidiagonalize L in WORK(IL).
+          (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors of L.
+          (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
+		    itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	    dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
+		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
+	    if (*info != 0) {
+		goto L10;
+	    }
+
+/*        Multiply B by right bidiagonalizing vectors of L. */
+
+	    i__1 = *lwork - nwork + 1;
+	    dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
+		    itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Zero out below first M rows of B. */
+
+	    i__1 = *n - *m;
+	    dlaset_("F", &i__1, nrhs, &c_b2467, &c_b2467, &b[*m + 1 + b_dim1],
+		     ldb);
+	    nwork = itau + *m;
+
+/*
+          Multiply transpose(Q) by B.
+          (Workspace: need M+NRHS, prefer M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[nwork], &i__1, info);
+
+	} else {
+
+/*        Path 2 - remaining underdetermined cases. */
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+          Bidiagonalize A.
+          (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors.
+          (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
+		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	    dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
+		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
+	    if (*info != 0) {
+		goto L10;
+	    }
+
+/*        Multiply B by right bidiagonalizing vectors of A. */
+
+	    i__1 = *lwork - nwork + 1;
+	    dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
+		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+	}
+    }
+
+/*     Undo scaling. */
+
+    if (iascl == 1) {
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    } else if (iascl == 2) {
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    }
+    if (ibscl == 1) {
+	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    } else if (ibscl == 2) {
+	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    }
+
+L10:
+    work[1] = (doublereal) maxwrk;
+    return 0;
+
+/*     End of DGELSD */
+
+} /* dgelsd_ */
+
+/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, k;
+    static doublereal aii;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dlarfg_(integer *, doublereal *,
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DGEQR2 computes a QR factorization of a real m by n matrix A:
+    A = Q * R.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(m,n) by n upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQR2", &i__1);
+	return 0;
+    }
+
+    k = min(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1]
+		, &c__1, &tau[i__]);
+	if (i__ < *n) {
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of DGEQR2 */
+
+} /* dgeqr2_ */
+
+/* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *), dlarfb_(char *,
+	     char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DGEQRF computes a QR factorization of a real M-by-N matrix A:
+    A = Q * R.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of min(m,n) elementary reflectors (see Further
+            Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *n * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQRF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
+
+/*
+             Compute the QR factorization of the current block
+             A(i:m,i:i+ib-1)
+*/
+
+	    i__3 = *m - i__ + 1;
+	    dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *n) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__3 = *m - i__ + 1;
+		dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H' to A(i:m,i+ib:n) from the left */
+
+		i__3 = *m - i__ + 1;
+		i__4 = *n - i__ - ib + 1;
+		dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib
+			+ 1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DGEQRF */
+
+} /* dgeqrf_ */
+
+/* Subroutine */ int dgesdd_(char *jobz, integer *m, integer *n, doublereal *
+	a, integer *lda, doublereal *s, doublereal *u, integer *ldu,
+	doublereal *vt, integer *ldvt, doublereal *work, integer *lwork,
+	integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2, i__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, ie, il, ir, iu, blk;
+    static doublereal dum[1], eps;
+    static integer ivt, iscl;
+    static doublereal anrm;
+    static integer idum[1], ierr, itau;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    static integer chunk, minmn, wrkbl, itaup, itauq, mnthr;
+    static logical wntqa;
+    static integer nwork;
+    static logical wntqn, wntqo, wntqs;
+    extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal
+	    *, doublereal *, doublereal *, integer *, doublereal *, integer *,
+	     doublereal *, integer *, doublereal *, integer *, integer *), dgebrd_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *);
+    extern doublereal dlamch_(char *), dlange_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *);
+    static integer bdspac;
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *, integer *),
+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, integer *, doublereal *, integer *, integer *),
+	     dgeqrf_(integer *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *, integer *), dlacpy_(char *,
+	     integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *), dlaset_(char *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *),
+	    xerbla_(char *, integer *), dorgbr_(char *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static doublereal bignum;
+    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, integer *), dorglq_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dorgqr_(integer *, integer *, integer *, doublereal *,
+	     integer *, doublereal *, doublereal *, integer *, integer *);
+    static integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt;
+    static doublereal smlnum;
+    static logical wntqas, lquery;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DGESDD computes the singular value decomposition (SVD) of a real
+    M-by-N matrix A, optionally computing the left and right singular
+    vectors.  If singular vectors are desired, it uses a
+    divide-and-conquer algorithm.
+
+    The SVD is written
+
+         A = U * SIGMA * transpose(V)
+
+    where SIGMA is an M-by-N matrix which is zero except for its
+    min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
+    V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
+    are the singular values of A; they are real and non-negative, and
+    are returned in descending order.  The first min(m,n) columns of
+    U and V are the left and right singular vectors of A.
+
+    Note that the routine returns VT = V**T, not V.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    JOBZ    (input) CHARACTER*1
+            Specifies options for computing all or part of the matrix U:
+            = 'A':  all M columns of U and all N rows of V**T are
+                    returned in the arrays U and VT;
+            = 'S':  the first min(M,N) columns of U and the first
+                    min(M,N) rows of V**T are returned in the arrays U
+                    and VT;
+            = 'O':  If M >= N, the first N columns of U are overwritten
+                    on the array A and all rows of V**T are returned in
+                    the array VT;
+                    otherwise, all columns of U are returned in the
+                    array U and the first M rows of V**T are overwritten
+                    in the array VT;
+            = 'N':  no columns of U or rows of V**T are computed.
+
+    M       (input) INTEGER
+            The number of rows of the input matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the input matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit,
+            if JOBZ = 'O',  A is overwritten with the first N columns
+                            of U (the left singular vectors, stored
+                            columnwise) if M >= N;
+                            A is overwritten with the first M rows
+                            of V**T (the right singular vectors, stored
+                            rowwise) otherwise.
+            if JOBZ .ne. 'O', the contents of A are destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The singular values of A, sorted so that S(i) >= S(i+1).
+
+    U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
+            UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
+            UCOL = min(M,N) if JOBZ = 'S'.
+            If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
+            orthogonal matrix U;
+            if JOBZ = 'S', U contains the first min(M,N) columns of U
+            (the left singular vectors, stored columnwise);
+            if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= 1; if
+            JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
+
+    VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
+            If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
+            N-by-N orthogonal matrix V**T;
+            if JOBZ = 'S', VT contains the first min(M,N) rows of
+            V**T (the right singular vectors, stored rowwise);
+            if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.  LDVT >= 1; if
+            JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
+            if JOBZ = 'S', LDVT >= min(M,N).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= 1.
+            If JOBZ = 'N',
+              LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)).
+            If JOBZ = 'O',
+              LWORK >= 3*min(M,N)*min(M,N) +
+                       max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)).
+            If JOBZ = 'S' or 'A'
+              LWORK >= 3*min(M,N)*min(M,N) +
+                       max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)).
+            For good performance, LWORK should generally be larger.
+            If LWORK < 0 but other input arguments are legal, WORK(1)
+            returns the optimal LWORK.
+
+    IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  DBDSDC did not converge, updating process failed.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = min(*m,*n);
+    mnthr = (integer) (minmn * 11. / 6.);
+    wntqa = lsame_(jobz, "A");
+    wntqs = lsame_(jobz, "S");
+    wntqas = wntqa || wntqs;
+    wntqo = lsame_(jobz, "O");
+    wntqn = lsame_(jobz, "N");
+    minwrk = 1;
+    maxwrk = 1;
+    lquery = *lwork == -1;
+
+    if (! (wntqa || wntqs || wntqo || wntqn)) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < *
+	    m) {
+	*info = -8;
+    } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn ||
+	    wntqo && *m >= *n && *ldvt < *n) {
+	*info = -10;
+    }
+
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         NB refers to the optimal block size for the immediately
+         following subroutine, as returned by ILAENV.)
+*/
+
+    if (*info == 0 && *m > 0 && *n > 0) {
+	if (*m >= *n) {
+
+/*           Compute space needed for DBDSDC */
+
+	    if (wntqn) {
+		bdspac = *n * 7;
+	    } else {
+		bdspac = *n * 3 * *n + (*n << 2);
+	    }
+	    if (*m >= mnthr) {
+		if (wntqn) {
+
+/*                 Path 1 (M much larger than N, JOBZ='N') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = bdspac + *n;
+		} else if (wntqo) {
+
+/*                 Path 2 (M much larger than N, JOBZ='O') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + (*n << 1) * *n;
+		    minwrk = bdspac + (*n << 1) * *n + *n * 3;
+		} else if (wntqs) {
+
+/*                 Path 3 (M much larger than N, JOBZ='S') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *n * *n;
+		    minwrk = bdspac + *n * *n + *n * 3;
+		} else if (wntqa) {
+
+/*                 Path 4 (M much larger than N, JOBZ='A') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "DORGQR",
+			    " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *n * *n;
+		    minwrk = bdspac + *n * *n + *n * 3;
+		}
+	    } else {
+
+/*              Path 5 (M at least N, but not much larger) */
+
+		wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m,
+			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		if (wntqn) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *n * 3 + max(*m,bdspac);
+		} else if (wntqo) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *n;
+/* Computing MAX */
+		    i__1 = *m, i__2 = *n * *n + bdspac;
+		    minwrk = *n * 3 + max(i__1,i__2);
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *n * 3 + max(*m,bdspac);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = bdspac + *n * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *n * 3 + max(*m,bdspac);
+		}
+	    }
+	} else {
+
+/*           Compute space needed for DBDSDC */
+
+	    if (wntqn) {
+		bdspac = *m * 7;
+	    } else {
+		bdspac = *m * 3 * *m + (*m << 2);
+	    }
+	    if (*n >= mnthr) {
+		if (wntqn) {
+
+/*                 Path 1t (N much larger than M, JOBZ='N') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = bdspac + *m;
+		} else if (wntqo) {
+
+/*                 Path 2t (N much larger than M, JOBZ='O') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + (*m << 1) * *m;
+		    minwrk = bdspac + (*m << 1) * *m + *m * 3;
+		} else if (wntqs) {
+
+/*                 Path 3t (N much larger than M, JOBZ='S') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *m;
+		    minwrk = bdspac + *m * *m + *m * 3;
+		} else if (wntqa) {
+
+/*                 Path 4t (N much larger than M, JOBZ='A') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "DORGLQ",
+			    " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
+			    "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *m;
+		    minwrk = bdspac + *m * *m + *m * 3;
+		}
+	    } else {
+
+/*              Path 5t (N greater than M, but not much larger) */
+
+		wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m,
+			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		if (wntqn) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3 + max(*n,bdspac);
+		} else if (wntqo) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *n;
+/* Computing MAX */
+		    i__1 = *n, i__2 = *m * *m + bdspac;
+		    minwrk = *m * 3 + max(i__1,i__2);
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3 + max(*n,bdspac);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3 + max(*n,bdspac);
+		}
+	    }
+	}
+	work[1] = (doublereal) maxwrk;
+    }
+
+    if (*lwork < minwrk && ! lquery) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGESDD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	if (*lwork >= 1) {
+	    work[1] = 1.;
+	}
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = PRECISION;
+    smlnum = sqrt(SAFEMINIMUM) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", m, n, &a[a_offset], lda, dum);
+    iscl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+	iscl = 1;
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
+		ierr);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+    if (*m >= *n) {
+
+/*
+          A has at least as many rows as columns. If A has sufficiently
+          more rows than columns, first reduce using the QR
+          decomposition (if sufficient workspace available)
+*/
+
+	if (*m >= mnthr) {
+
+	    if (wntqn) {
+
+/*
+                Path 1 (M much larger than N, JOBZ='N')
+                No singular vectors to be computed
+*/
+
+		itau = 1;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (Workspace: need 2*N, prefer N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out below R */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		dlaset_("L", &i__1, &i__2, &c_b2467, &c_b2467, &a[a_dim1 + 2],
+			 lda);
+		ie = 1;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in A
+                (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nwork = ie + *n;
+
+/*
+                Perform bidiagonal SVD, computing singular values only
+                (Workspace: need N+BDSPAC)
+*/
+
+		dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*
+                Path 2 (M much larger than N, JOBZ = 'O')
+                N left singular vectors to be overwritten on A and
+                N right singular vectors to be computed in VT
+*/
+
+		ir = 1;
+
+/*              WORK(IR) is LDWRKR by N */
+
+		if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) {
+		    ldwrkr = *lda;
+		} else {
+		    ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n;
+		}
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		dlaset_("L", &i__1, &i__2, &c_b2467, &c_b2467, &work[ir + 1],
+			&ldwrkr);
+
+/*
+                Generate Q in A
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in VT, copying result to WORK(IR)
+                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*              WORK(IU) is N by N */
+
+		iu = nwork;
+		nwork = iu + *n * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in WORK(IU) and computing right
+                singular vectors of bidiagonal matrix in VT
+                (Workspace: need N+N*N+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite WORK(IU) by left singular vectors of R
+                and VT by right singular vectors of R
+                (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &work[iu], n, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IU), storing result in WORK(IR) and copying to A
+                (Workspace: need 2*N*N, prefer N*N+M*N)
+*/
+
+		i__1 = *m;
+		i__2 = ldwrkr;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *m - i__ + 1;
+		    chunk = min(i__3,ldwrkr);
+		    dgemm_("N", "N", &chunk, n, n, &c_b2453, &a[i__ + a_dim1],
+			     lda, &work[iu], n, &c_b2467, &work[ir], &ldwrkr);
+		    dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+			    a_dim1], lda);
+/* L10: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Path 3 (M much larger than N, JOBZ='S')
+                N left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
+
+		ir = 1;
+
+/*              WORK(IR) is N by N */
+
+		ldwrkr = *n;
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		dlaset_("L", &i__2, &i__1, &c_b2467, &c_b2467, &work[ir + 1],
+			&ldwrkr);
+
+/*
+                Generate Q in A
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in WORK(IR)
+                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagoal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need N+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of R and VT
+                by right singular vectors of R
+                (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IR), storing result in U
+                (Workspace: need N*N)
+*/
+
+		dlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
+		dgemm_("N", "N", m, n, n, &c_b2453, &a[a_offset], lda, &work[
+			ir], &ldwrkr, &c_b2467, &u[u_offset], ldu);
+
+	    } else if (wntqa) {
+
+/*
+                Path 4 (M much larger than N, JOBZ='A')
+                M left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
+
+		iu = 1;
+
+/*              WORK(IU) is N by N */
+
+		ldwrku = *n;
+		itau = iu + ldwrku * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R, copying result to U
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+/*
+                Generate Q in U
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+		i__2 = *lwork - nwork + 1;
+		dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+
+/*              Produce R in A, zeroing out other entries */
+
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		dlaset_("L", &i__2, &i__1, &c_b2467, &c_b2467, &a[a_dim1 + 2],
+			 lda);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in A
+                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in WORK(IU) and computing right
+                singular vectors of bidiagonal matrix in VT
+                (Workspace: need N+N*N+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite WORK(IU) by left singular vectors of R and VT
+                by right singular vectors of R
+                (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
+			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			ierr);
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply Q in U by left singular vectors of R in
+                WORK(IU), storing result in A
+                (Workspace: need N*N)
+*/
+
+		dgemm_("N", "N", m, n, n, &c_b2453, &u[u_offset], ldu, &work[
+			iu], &ldwrku, &c_b2467, &a[a_offset], lda);
+
+/*              Copy left singular vectors of A from A to U */
+
+		dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+	    }
+
+	} else {
+
+/*
+             M .LT. MNTHR
+
+             Path 5 (M at least N, but not much larger)
+             Reduce to bidiagonal form without QR decomposition
+*/
+
+	    ie = 1;
+	    itauq = ie + *n;
+	    itaup = itauq + *n;
+	    nwork = itaup + *n;
+
+/*
+             Bidiagonalize A
+             (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*
+                Perform bidiagonal SVD, only computing singular values
+                (Workspace: need N+BDSPAC)
+*/
+
+		dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+	    } else if (wntqo) {
+		iu = nwork;
+		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+
+/*                 WORK( IU ) is M by N */
+
+		    ldwrku = *m;
+		    nwork = iu + ldwrku * *n;
+		    dlaset_("F", m, n, &c_b2467, &c_b2467, &work[iu], &ldwrku);
+		} else {
+
+/*                 WORK( IU ) is N by N */
+
+		    ldwrku = *n;
+		    nwork = iu + ldwrku * *n;
+
+/*                 WORK(IR) is LDWRKR by N */
+
+		    ir = nwork;
+		    ldwrkr = (*lwork - *n * *n - *n * 3) / *n;
+		}
+		nwork = iu + ldwrku * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in WORK(IU) and computing right
+                singular vectors of bidiagonal matrix in VT
+                (Workspace: need N+N*N+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, &
+			vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[
+			1], info);
+
+/*
+                Overwrite VT by right singular vectors of A
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+
+/*
+                   Overwrite WORK(IU) by left singular vectors of A
+                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			    itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			    ierr);
+
+/*                 Copy left singular vectors of A from WORK(IU) to A */
+
+		    dlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate Q in A
+                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
+			    work[nwork], &i__2, &ierr);
+
+/*
+                   Multiply Q in A by left singular vectors of
+                   bidiagonal matrix in WORK(IU), storing result in
+                   WORK(IR) and copying to A
+                   (Workspace: need 2*N*N, prefer N*N+M*N)
+*/
+
+		    i__2 = *m;
+		    i__1 = ldwrkr;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *m - i__ + 1;
+			chunk = min(i__3,ldwrkr);
+			dgemm_("N", "N", &chunk, n, n, &c_b2453, &a[i__ +
+				a_dim1], lda, &work[iu], &ldwrku, &c_b2467, &
+				work[ir], &ldwrkr);
+			dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+				a_dim1], lda);
+/* L20: */
+		    }
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need N+BDSPAC)
+*/
+
+		dlaset_("F", m, n, &c_b2467, &c_b2467, &u[u_offset], ldu);
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need 3*N, prefer 2*N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else if (wntqa) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need N+BDSPAC)
+*/
+
+		dlaset_("F", m, m, &c_b2467, &c_b2467, &u[u_offset], ldu);
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*              Set the right corner of U to identity matrix */
+
+		i__1 = *m - *n;
+		i__2 = *m - *n;
+		dlaset_("F", &i__1, &i__2, &c_b2467, &c_b2453, &u[*n + 1 + (*
+			n + 1) * u_dim1], ldu);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    } else {
+
+/*
+          A has more columns than rows. If A has sufficiently more
+          columns than rows, first reduce using the LQ decomposition (if
+          sufficient workspace available)
+*/
+
+	if (*n >= mnthr) {
+
+	    if (wntqn) {
+
+/*
+                Path 1t (N much larger than M, JOBZ='N')
+                No singular vectors to be computed
+*/
+
+		itau = 1;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (Workspace: need 2*M, prefer M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out above L */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		dlaset_("U", &i__1, &i__2, &c_b2467, &c_b2467, &a[(a_dim1 <<
+			1) + 1], lda);
+		ie = 1;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in A
+                (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nwork = ie + *m;
+
+/*
+                Perform bidiagonal SVD, computing singular values only
+                (Workspace: need M+BDSPAC)
+*/
+
+		dbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*
+                Path 2t (N much larger than M, JOBZ='O')
+                M right singular vectors to be overwritten on A and
+                M left singular vectors to be computed in U
+*/
+
+		ivt = 1;
+
+/*              IVT is M by M */
+
+		il = ivt + *m * *m;
+		if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) {
+
+/*                 WORK(IL) is M by N */
+
+		    ldwrkl = *m;
+		    chunk = *n;
+		} else {
+		    ldwrkl = *m;
+		    chunk = (*lwork - *m * *m) / *m;
+		}
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy L to WORK(IL), zeroing about above it */
+
+		dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		dlaset_("U", &i__1, &i__2, &c_b2467, &c_b2467, &work[il +
+			ldwrkl], &ldwrkl);
+
+/*
+                Generate Q in A
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in WORK(IL)
+                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U, and computing right singular
+                vectors of bidiagonal matrix in WORK(IVT)
+                (Workspace: need M+M*M+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], m, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of L and WORK(IVT)
+                by right singular vectors of L
+                (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &work[ivt], m, &work[nwork], &i__1, &ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IVT) by Q
+                in A, storing result in WORK(IL) and copying to A
+                (Workspace: need 2*M*M, prefer M*M+M*N)
+*/
+
+		i__1 = *n;
+		i__2 = chunk;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *n - i__ + 1;
+		    blk = min(i__3,chunk);
+		    dgemm_("N", "N", m, &blk, m, &c_b2453, &work[ivt], m, &a[
+			    i__ * a_dim1 + 1], lda, &c_b2467, &work[il], &
+			    ldwrkl);
+		    dlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
+			    + 1], lda);
+/* L30: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Path 3t (N much larger than M, JOBZ='S')
+                M right singular vectors to be computed in VT and
+                M left singular vectors to be computed in U
+*/
+
+		il = 1;
+
+/*              WORK(IL) is M by M */
+
+		ldwrkl = *m;
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy L to WORK(IL), zeroing out above it */
+
+		dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		dlaset_("U", &i__2, &i__1, &c_b2467, &c_b2467, &work[il +
+			ldwrkl], &ldwrkl);
+
+/*
+                Generate Q in A
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in WORK(IU), copying result to U
+                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need M+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of L and VT
+                by right singular vectors of L
+                (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IL) by
+                Q in A, storing result in VT
+                (Workspace: need M*M)
+*/
+
+		dlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
+		dgemm_("N", "N", m, n, m, &c_b2453, &work[il], &ldwrkl, &a[
+			a_offset], lda, &c_b2467, &vt[vt_offset], ldvt);
+
+	    } else if (wntqa) {
+
+/*
+                Path 4t (N much larger than M, JOBZ='A')
+                N right singular vectors to be computed in VT and
+                M left singular vectors to be computed in U
+*/
+
+		ivt = 1;
+
+/*              WORK(IVT) is M by M */
+
+		ldwkvt = *m;
+		itau = ivt + ldwkvt * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q, copying result to VT
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+/*
+                Generate Q in VT
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
+			nwork], &i__2, &ierr);
+
+/*              Produce L in A, zeroing out other entries */
+
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		dlaset_("U", &i__2, &i__1, &c_b2467, &c_b2467, &a[(a_dim1 <<
+			1) + 1], lda);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in A
+                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in WORK(IVT)
+                (Workspace: need M+M*M+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
+			, info);
+
+/*
+                Overwrite U by left singular vectors of L and WORK(IVT)
+                by right singular vectors of L
+                (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[
+			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IVT) by
+                Q in VT, storing result in A
+                (Workspace: need M*M)
+*/
+
+		dgemm_("N", "N", m, n, m, &c_b2453, &work[ivt], &ldwkvt, &vt[
+			vt_offset], ldvt, &c_b2467, &a[a_offset], lda);
+
+/*              Copy right singular vectors of A from A to VT */
+
+		dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+	    }
+
+	} else {
+
+/*
+             N .LT. MNTHR
+
+             Path 5t (N greater than M, but not much larger)
+             Reduce to bidiagonal form without LQ decomposition
+*/
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+             Bidiagonalize A
+             (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*
+                Perform bidiagonal SVD, only computing singular values
+                (Workspace: need M+BDSPAC)
+*/
+
+		dbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+	    } else if (wntqo) {
+		ldwkvt = *m;
+		ivt = nwork;
+		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+
+/*                 WORK( IVT ) is M by N */
+
+		    dlaset_("F", m, n, &c_b2467, &c_b2467, &work[ivt], &
+			    ldwkvt);
+		    nwork = ivt + ldwkvt * *n;
+		} else {
+
+/*                 WORK( IVT ) is M by M */
+
+		    nwork = ivt + ldwkvt * *m;
+		    il = nwork;
+
+/*                 WORK(IL) is M by CHUNK */
+
+		    chunk = (*lwork - *m * *m - *m * 3) / *m;
+		}
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in WORK(IVT)
+                (Workspace: need M*M+BDSPAC)
+*/
+
+		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
+			, info);
+
+/*
+                Overwrite U by left singular vectors of A
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+
+/*
+                   Overwrite WORK(IVT) by left singular vectors of A
+                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2,
+			    &ierr);
+
+/*                 Copy right singular vectors of A from WORK(IVT) to A */
+
+		    dlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate P**T in A
+                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
+			    work[nwork], &i__2, &ierr);
+
+/*
+                   Multiply Q in A by right singular vectors of
+                   bidiagonal matrix in WORK(IVT), storing result in
+                   WORK(IL) and copying to A
+                   (Workspace: need 2*M*M, prefer M*M+M*N)
+*/
+
+		    i__2 = *n;
+		    i__1 = chunk;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *n - i__ + 1;
+			blk = min(i__3,chunk);
+			dgemm_("N", "N", m, &blk, m, &c_b2453, &work[ivt], &
+				ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b2467, &
+				work[il], m);
+			dlacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 +
+				1], lda);
+/* L40: */
+		    }
+		}
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need M+BDSPAC)
+*/
+
+		dlaset_("F", m, n, &c_b2467, &c_b2467, &vt[vt_offset], ldvt);
+		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need 3*M, prefer 2*M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else if (wntqa) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need M+BDSPAC)
+*/
+
+		dlaset_("F", n, n, &c_b2467, &c_b2467, &vt[vt_offset], ldvt);
+		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*              Set the right corner of VT to identity matrix */
+
+		i__1 = *n - *m;
+		i__2 = *n - *m;
+		dlaset_("F", &i__1, &i__2, &c_b2467, &c_b2453, &vt[*m + 1 + (*
+			m + 1) * vt_dim1], ldvt);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need 2*M+N, prefer 2*M+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    }
+
+/*     Undo scaling if necessary */
+
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (anrm < smlnum) {
+	    dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+    }
+
+/*     Return optimal workspace in WORK(1) */
+
+    work[1] = (doublereal) maxwrk;
+
+    return 0;
+
+/*     End of DGESDD */
+
+} /* dgesdd_ */
+
+/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer
+	*lda, integer *ipiv, doublereal *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *), dgetrs_(char *, integer *, integer *, doublereal *,
+	    integer *, integer *, doublereal *, integer *, integer *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DGESV computes the solution to a real system of linear equations
+       A * X = B,
+    where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+
+    The LU decomposition with partial pivoting and row interchanges is
+    used to factor A as
+       A = P * L * U,
+    where P is a permutation matrix, L is unit lower triangular, and U is
+    upper triangular.  The factored form of A is then used to solve the
+    system of equations A * X = B.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of linear equations, i.e., the order of the
+            matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the N-by-N coefficient matrix A.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    IPIV    (output) INTEGER array, dimension (N)
+            The pivot indices that define the permutation matrix P;
+            row i of the matrix was interchanged with row IPIV(i).
+
+    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+            On entry, the N-by-NRHS matrix of right hand side matrix B.
+            On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+                  has been completed, but the factor U is exactly
+                  singular, so the solution could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*nrhs < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGESV ", &i__1);
+	return 0;
+    }
+
+/*     Compute the LU factorization of A. */
+
+    dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
+    if (*info == 0) {
+
+/*        Solve the system A*X = B, overwriting B with X. */
+
+	dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
+		b_offset], ldb, info);
+    }
+    return 0;
+
+/*     End of DGESV */
+
+} /* dgesv_ */
+
+/* Subroutine */ int dgetf2_(integer *m, integer *n, doublereal *a, integer *
+	lda, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer j, jp;
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *), dscal_(integer *, doublereal *, doublereal *, integer
+	    *), dswap_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1992
+
+
+    Purpose
+    =======
+
+    DGETF2 computes an LU factorization of a general m-by-n matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the m by n matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, U(k,k) is exactly zero. The factorization
+                 has been completed, but the factor U is exactly
+                 singular, and division by zero will occur if it is used
+                 to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGETF2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+    i__1 = min(*m,*n);
+    for (j = 1; j <= i__1; ++j) {
+
+/*        Find pivot and test for singularity. */
+
+	i__2 = *m - j + 1;
+	jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1);
+	ipiv[j] = jp;
+	if (a[jp + j * a_dim1] != 0.) {
+
+/*           Apply the interchange to columns 1:N. */
+
+	    if (jp != j) {
+		dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
+	    }
+
+/*           Compute elements J+1:M of J-th column. */
+
+	    if (j < *m) {
+		i__2 = *m - j;
+		d__1 = 1. / a[j + j * a_dim1];
+		dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+
+	} else if (*info == 0) {
+
+	    *info = j;
+	}
+
+	if (j < min(*m,*n)) {
+
+/*           Update trailing submatrix. */
+
+	    i__2 = *m - j;
+	    i__3 = *n - j;
+	    dger_(&i__2, &i__3, &c_b2589, &a[j + 1 + j * a_dim1], &c__1, &a[j
+		    + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1],
+		    lda);
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of DGETF2 */
+
+} /* dgetf2_ */
+
+/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer *
+	lda, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    static integer i__, j, jb, nb;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer iinfo;
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), dgetf2_(
+	    integer *, integer *, doublereal *, integer *, integer *, integer
+	    *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *,
+	    integer *, integer *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DGETRF computes an LU factorization of a general M-by-N matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 3 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the M-by-N matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+                  has been completed, but the factor U is exactly
+                  singular, and division by zero will occur if it is used
+                  to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGETRF", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    if (nb <= 1 || nb >= min(*m,*n)) {
+
+/*        Use unblocked code. */
+
+	dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
+    } else {
+
+/*        Use blocked code. */
+
+	i__1 = min(*m,*n);
+	i__2 = nb;
+	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+	    i__3 = min(*m,*n) - j + 1;
+	    jb = min(i__3,nb);
+
+/*
+             Factor diagonal and subdiagonal blocks and test for exact
+             singularity.
+*/
+
+	    i__3 = *m - j + 1;
+	    dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
+
+/*           Adjust INFO and the pivot indices. */
+
+	    if (*info == 0 && iinfo > 0) {
+		*info = iinfo + j - 1;
+	    }
+/* Computing MIN */
+	    i__4 = *m, i__5 = j + jb - 1;
+	    i__3 = min(i__4,i__5);
+	    for (i__ = j; i__ <= i__3; ++i__) {
+		ipiv[i__] = j - 1 + ipiv[i__];
+/* L10: */
+	    }
+
+/*           Apply interchanges to columns 1:J-1. */
+
+	    i__3 = j - 1;
+	    i__4 = j + jb - 1;
+	    dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
+
+	    if (j + jb <= *n) {
+
+/*              Apply interchanges to columns J+JB:N. */
+
+		i__3 = *n - j - jb + 1;
+		i__4 = j + jb - 1;
+		dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
+			ipiv[1], &c__1);
+
+/*              Compute block row of U. */
+
+		i__3 = *n - j - jb + 1;
+		dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
+			c_b2453, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
+			a_dim1], lda);
+		if (j + jb <= *m) {
+
+/*                 Update trailing submatrix. */
+
+		    i__3 = *m - j - jb + 1;
+		    i__4 = *n - j - jb + 1;
+		    dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
+			    &c_b2589, &a[j + jb + j * a_dim1], lda, &a[j + (j
+			    + jb) * a_dim1], lda, &c_b2453, &a[j + jb + (j +
+			    jb) * a_dim1], lda);
+		}
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of DGETRF */
+
+} /* dgetrf_ */
+
+/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs,
+	doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *
+	ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), xerbla_(
+	    char *, integer *), dlaswp_(integer *, doublereal *,
+	    integer *, integer *, integer *, integer *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DGETRS solves a system of linear equations
+       A * X = B  or  A' * X = B
+    with a general N-by-N matrix A using the LU factorization computed
+    by DGETRF.
+
+    Arguments
+    =========
+
+    TRANS   (input) CHARACTER*1
+            Specifies the form of the system of equations:
+            = 'N':  A * X = B  (No transpose)
+            = 'T':  A'* X = B  (Transpose)
+            = 'C':  A'* X = B  (Conjugate transpose = Transpose)
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The factors L and U from the factorization A = P*L*U
+            as computed by DGETRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    IPIV    (input) INTEGER array, dimension (N)
+            The pivot indices from DGETRF; for 1<=i<=N, row i of the
+            matrix was interchanged with row IPIV(i).
+
+    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGETRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
+
+    if (notran) {
+
+/*
+          Solve A * X = B.
+
+          Apply row interchanges to the right hand sides.
+*/
+
+	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
+
+/*        Solve L*X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b2453, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b2453,
+		 &a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A' * X = B.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b2453, &
+		a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b2453, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Apply row interchanges to the solution vectors. */
+
+	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
+    }
+
+    return 0;
+
+/*     End of DGETRS */
+
+} /* dgetrs_ */
+
+/* Subroutine */ int dhseqr_(char *job, char *compz, integer *n, integer *ilo,
+	 integer *ihi, doublereal *h__, integer *ldh, doublereal *wr,
+	doublereal *wi, doublereal *z__, integer *ldz, doublereal *work,
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    doublereal d__1, d__2;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__, j, k, l;
+    static doublereal s[225]	/* was [15][15] */, v[16];
+    static integer i1, i2, ii, nh, nr, ns, nv;
+    static doublereal vv[16];
+    static integer itn;
+    static doublereal tau;
+    static integer its;
+    static doublereal ulp, tst1;
+    static integer maxb;
+    static doublereal absw;
+    static integer ierr;
+    static doublereal unfl, temp, ovfl;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+    static integer itemp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical initz, wantt, wantz;
+    extern doublereal dlapy2_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
+
+    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
+	     integer *, doublereal *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern doublereal dlanhs_(char *, integer *, doublereal *, integer *,
+	    doublereal *);
+    extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *), dlacpy_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlaset_(char *,
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int xerbla_(char *, integer *), dlarfx_(
+	    char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *, doublereal *);
+    static doublereal smlnum;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DHSEQR computes the eigenvalues of a real upper Hessenberg matrix H
+    and, optionally, the matrices T and Z from the Schur decomposition
+    H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur
+    form), and Z is the orthogonal matrix of Schur vectors.
+
+    Optionally Z may be postmultiplied into an input orthogonal matrix Q,
+    so that this routine can give the Schur factorization of a matrix A
+    which has been reduced to the Hessenberg form H by the orthogonal
+    matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            = 'E':  compute eigenvalues only;
+            = 'S':  compute eigenvalues and the Schur form T.
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  no Schur vectors are computed;
+            = 'I':  Z is initialized to the unit matrix and the matrix Z
+                    of Schur vectors of H is returned;
+            = 'V':  Z must contain an orthogonal matrix Q on entry, and
+                    the product Q*Z is returned.
+
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to DGEBAL, and then passed to SGEHRD
+            when the matrix output by DGEBAL is reduced to Hessenberg
+            form. Otherwise ILO and IHI should be set to 1 and N
+            respectively.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if JOB = 'S', H contains the upper quasi-triangular
+            matrix T from the Schur decomposition (the Schur form);
+            2-by-2 diagonal blocks (corresponding to complex conjugate
+            pairs of eigenvalues) are returned in standard form, with
+            H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E',
+            the contents of H are unspecified on exit.
+
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
+
+    WR      (output) DOUBLE PRECISION array, dimension (N)
+    WI      (output) DOUBLE PRECISION array, dimension (N)
+            The real and imaginary parts, respectively, of the computed
+            eigenvalues. If two eigenvalues are computed as a complex
+            conjugate pair, they are stored in consecutive elements of
+            WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and
+            WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the
+            same order as on the diagonal of the Schur form returned in
+            H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
+            diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and
+            WI(i+1) = -WI(i).
+
+    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+            If COMPZ = 'N': Z is not referenced.
+            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
+            contains the orthogonal matrix Z of the Schur vectors of H.
+            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
+            which is assumed to be equal to the unit matrix except for
+            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
+            Normally Q is the orthogonal matrix generated by DORGHR after
+            the call to DGEHRD which formed the Hessenberg matrix H.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.
+            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, DHSEQR failed to compute all of the
+                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
+                  elements 1:ilo-1 and i+1:n of WR and WI contain those
+                  eigenvalues which have been successfully computed.
+
+    =====================================================================
+
+
+       Decode and test the input parameters
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    wantt = lsame_(job, "S");
+    initz = lsame_(compz, "I");
+    wantz = initz || lsame_(compz, "V");
+
+    *info = 0;
+    work[1] = (doublereal) max(1,*n);
+    lquery = *lwork == -1;
+    if (! lsame_(job, "E") && ! wantt) {
+	*info = -1;
+    } else if (! lsame_(compz, "N") && ! wantz) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+	*info = -4;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -5;
+    } else if (*ldh < max(1,*n)) {
+	*info = -7;
+    } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) {
+	*info = -11;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DHSEQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Initialize Z, if necessary */
+
+    if (initz) {
+	dlaset_("Full", n, n, &c_b2467, &c_b2453, &z__[z_offset], ldz);
+    }
+
+/*     Store the eigenvalues isolated by DGEBAL. */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.;
+/* L10: */
+    }
+    i__1 = *n;
+    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.;
+/* L20: */
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
+	wi[*ilo] = 0.;
+	return 0;
+    }
+
+/*
+       Set rows and columns ILO to IHI to zero below the first
+       subdiagonal.
+*/
+
+    i__1 = *ihi - 2;
+    for (j = *ilo; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = j + 2; i__ <= i__2; ++i__) {
+	    h__[i__ + j * h_dim1] = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+    nh = *ihi - *ilo + 1;
+
+/*
+       Determine the order of the multi-shift QR algorithm to be used.
+
+   Writing concatenation
+*/
+    i__3[0] = 1, a__1[0] = job;
+    i__3[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+    ns = ilaenv_(&c__4, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+/* Writing concatenation */
+    i__3[0] = 1, a__1[0] = job;
+    i__3[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+    maxb = ilaenv_(&c__8, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (ns <= 2 || ns > nh || maxb >= nh) {
+
+/*        Use the standard double-shift algorithm */
+
+	dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
+		1], ilo, ihi, &z__[z_offset], ldz, info);
+	return 0;
+    }
+    maxb = max(3,maxb);
+/* Computing MIN */
+    i__1 = min(ns,maxb);
+    ns = min(i__1,15);
+
+/*
+       Now 2 < NS <= MAXB < NH.
+
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
+
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
+    smlnum = unfl * (nh / ulp);
+
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are set inside the main loop.
+*/
+
+    if (wantt) {
+	i1 = 1;
+	i2 = *n;
+    }
+
+/*     ITN is the total number of multiple-shift QR iterations allowed. */
+
+    itn = nh * 30;
+
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of at most MAXB. Each iteration of the loop
+       works with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
+
+    i__ = *ihi;
+L50:
+    l = *ilo;
+    if (i__ < *ilo) {
+	goto L170;
+    }
+
+/*
+       Perform multiple-shift QR iterations on rows and columns ILO to I
+       until a submatrix of order at most MAXB splits off at the bottom
+       because a subdiagonal element has become negligible.
+*/
+
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
+		     h__[k + k * h_dim1], abs(d__2));
+	    if (tst1 == 0.) {
+		i__4 = i__ - l + 1;
+		tst1 = dlanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1]
+			);
+	    }
+/* Computing MAX */
+	    d__2 = ulp * tst1;
+	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2,
+		    smlnum)) {
+		goto L70;
+	    }
+/* L60: */
+	}
+L70:
+	l = k;
+	if (l > *ilo) {
+
+/*           H(L,L-1) is negligible. */
+
+	    h__[l + (l - 1) * h_dim1] = 0.;
+	}
+
+/*        Exit from loop if a submatrix of order <= MAXB has split off. */
+
+	if (l >= i__ - maxb + 1) {
+	    goto L160;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! wantt) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if (its == 20 || its == 30) {
+
+/*           Exceptional shifts. */
+
+	    i__2 = i__;
+	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
+		wr[ii] = ((d__1 = h__[ii + (ii - 1) * h_dim1], abs(d__1)) + (
+			d__2 = h__[ii + ii * h_dim1], abs(d__2))) * 1.5;
+		wi[ii] = 0.;
+/* L80: */
+	    }
+	} else {
+
+/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
+
+	    dlacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
+		    h_dim1], ldh, s, &c__15);
+	    dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ -
+		    ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset],
+		    ldz, &ierr);
+	    if (ierr > 0) {
+
+/*
+                If DLAHQR failed to compute all NS eigenvalues, use the
+                unconverged diagonal elements as the remaining shifts.
+*/
+
+		i__2 = ierr;
+		for (ii = 1; ii <= i__2; ++ii) {
+		    wr[i__ - ns + ii] = s[ii + ii * 15 - 16];
+		    wi[i__ - ns + ii] = 0.;
+/* L90: */
+		}
+	    }
+	}
+
+/*
+          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
+          where G is the Hessenberg submatrix H(L:I,L:I) and w is
+          the vector of shifts (stored in WR and WI). The result is
+          stored in the local array V.
+*/
+
+	v[0] = 1.;
+	i__2 = ns + 1;
+	for (ii = 2; ii <= i__2; ++ii) {
+	    v[ii - 1] = 0.;
+/* L100: */
+	}
+	nv = 1;
+	i__2 = i__;
+	for (j = i__ - ns + 1; j <= i__2; ++j) {
+	    if (wi[j] >= 0.) {
+		if (wi[j] == 0.) {
+
+/*                 real shift */
+
+		    i__4 = nv + 1;
+		    dcopy_(&i__4, v, &c__1, vv, &c__1);
+		    i__4 = nv + 1;
+		    d__1 = -wr[j];
+		    dgemv_("No transpose", &i__4, &nv, &c_b2453, &h__[l + l *
+			    h_dim1], ldh, vv, &c__1, &d__1, v, &c__1);
+		    ++nv;
+		} else if (wi[j] > 0.) {
+
+/*                 complex conjugate pair of shifts */
+
+		    i__4 = nv + 1;
+		    dcopy_(&i__4, v, &c__1, vv, &c__1);
+		    i__4 = nv + 1;
+		    d__1 = wr[j] * -2.;
+		    dgemv_("No transpose", &i__4, &nv, &c_b2453, &h__[l + l *
+			    h_dim1], ldh, v, &c__1, &d__1, vv, &c__1);
+		    i__4 = nv + 1;
+		    itemp = idamax_(&i__4, vv, &c__1);
+/* Computing MAX */
+		    d__2 = (d__1 = vv[itemp - 1], abs(d__1));
+		    temp = 1. / max(d__2,smlnum);
+		    i__4 = nv + 1;
+		    dscal_(&i__4, &temp, vv, &c__1);
+		    absw = dlapy2_(&wr[j], &wi[j]);
+		    temp = temp * absw * absw;
+		    i__4 = nv + 2;
+		    i__5 = nv + 1;
+		    dgemv_("No transpose", &i__4, &i__5, &c_b2453, &h__[l + l
+			    * h_dim1], ldh, vv, &c__1, &temp, v, &c__1);
+		    nv += 2;
+		}
+
+/*
+                Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
+                reset it to the unit vector.
+*/
+
+		itemp = idamax_(&nv, v, &c__1);
+		temp = (d__1 = v[itemp - 1], abs(d__1));
+		if (temp == 0.) {
+		    v[0] = 1.;
+		    i__4 = nv;
+		    for (ii = 2; ii <= i__4; ++ii) {
+			v[ii - 1] = 0.;
+/* L110: */
+		    }
+		} else {
+		    temp = max(temp,smlnum);
+		    d__1 = 1. / temp;
+		    dscal_(&nv, &d__1, v, &c__1);
+		}
+	    }
+/* L120: */
+	}
+
+/*        Multiple-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = l; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix. NR is the order of G.
+
+   Computing MIN
+*/
+	    i__4 = ns + 1, i__5 = i__ - k + 1;
+	    nr = min(i__4,i__5);
+	    if (k > l) {
+		dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    dlarfg_(&nr, v, &v[1], &c__1, &tau);
+	    if (k > l) {
+		h__[k + (k - 1) * h_dim1] = v[0];
+		i__4 = i__;
+		for (ii = k + 1; ii <= i__4; ++ii) {
+		    h__[ii + (k - 1) * h_dim1] = 0.;
+/* L130: */
+		}
+	    }
+	    v[0] = 1.;
+
+/*
+             Apply G from the left to transform the rows of the matrix in
+             columns K to I2.
+*/
+
+	    i__4 = i2 - k + 1;
+	    dlarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, &
+		    work[1]);
+
+/*
+             Apply G from the right to transform the columns of the
+             matrix in rows I1 to min(K+NR,I).
+
+   Computing MIN
+*/
+	    i__5 = k + nr;
+	    i__4 = min(i__5,i__) - i1 + 1;
+	    dlarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
+		    &work[1]);
+
+	    if (wantz) {
+
+/*              Accumulate transformations in the matrix Z */
+
+		dlarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
+			ldz, &work[1]);
+	    }
+/* L140: */
+	}
+
+/* L150: */
+    }
+
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
+    return 0;
+
+L160:
+
+/*
+       A submatrix of order <= MAXB in rows and columns L to I has split
+       off. Use the double-shift QR algorithm to handle it.
+*/
+
+    dlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1],
+	    ilo, ihi, &z__[z_offset], ldz, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with a new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L50;
+
+L170:
+    work[1] = (doublereal) max(1,*n);
+    return 0;
+
+/*     End of DHSEQR */
+
+} /* dhseqr_ */
+
+/* Subroutine */ int dlabad_(doublereal *small, doublereal *large)
+{
+    /* Builtin functions */
+    double d_lg10(doublereal *), sqrt(doublereal);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLABAD takes as input the values computed by DLAMCH for underflow and
+    overflow, and returns the square root of each of these values if the
+    log of LARGE is sufficiently large.  This subroutine is intended to
+    identify machines with a large exponent range, such as the Crays, and
+    redefine the underflow and overflow limits to be the square roots of
+    the values computed by DLAMCH.  This subroutine is needed because
+    DLAMCH does not compensate for poor arithmetic in the upper half of
+    the exponent range, as is found on a Cray.
+
+    Arguments
+    =========
+
+    SMALL   (input/output) DOUBLE PRECISION
+            On entry, the underflow threshold as computed by DLAMCH.
+            On exit, if LOG10(LARGE) is sufficiently large, the square
+            root of SMALL, otherwise unchanged.
+
+    LARGE   (input/output) DOUBLE PRECISION
+            On entry, the overflow threshold as computed by DLAMCH.
+            On exit, if LOG10(LARGE) is sufficiently large, the square
+            root of LARGE, otherwise unchanged.
+
+    =====================================================================
+
+
+       If it looks like we're on a Cray, take the square root of
+       SMALL and LARGE to avoid overflow and underflow problems.
+*/
+
+    if (d_lg10(large) > 2e3) {
+	*small = sqrt(*small);
+	*large = sqrt(*large);
+    }
+
+    return 0;
+
+/*     End of DLABAD */
+
+} /* dlabad_ */
+
+/* Subroutine */ int dlabrd_(integer *m, integer *n, integer *nb, doublereal *
+	a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq,
+	doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer
+	*ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+
+    /* Local variables */
+    static integer i__;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dgemv_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *), dlarfg_(integer *, doublereal *,
+	     doublereal *, integer *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLABRD reduces the first NB rows and columns of a real general
+    m by n matrix A to upper or lower bidiagonal form by an orthogonal
+    transformation Q' * A * P, and returns the matrices X and Y which
+    are needed to apply the transformation to the unreduced part of A.
+
+    If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
+    bidiagonal form.
+
+    This is an auxiliary routine called by DGEBRD
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.
+
+    NB      (input) INTEGER
+            The number of leading rows and columns of A to be reduced.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit, the first NB rows and columns of the matrix are
+            overwritten; the rest of the array is unchanged.
+            If m >= n, elements on and below the diagonal in the first NB
+              columns, with the array TAUQ, represent the orthogonal
+              matrix Q as a product of elementary reflectors; and
+              elements above the diagonal in the first NB rows, with the
+              array TAUP, represent the orthogonal matrix P as a product
+              of elementary reflectors.
+            If m < n, elements below the diagonal in the first NB
+              columns, with the array TAUQ, represent the orthogonal
+              matrix Q as a product of elementary reflectors, and
+              elements on and above the diagonal in the first NB rows,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) DOUBLE PRECISION array, dimension (NB)
+            The diagonal elements of the first NB rows and columns of
+            the reduced matrix.  D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (NB)
+            The off-diagonal elements of the first NB rows and columns of
+            the reduced matrix.
+
+    TAUQ    (output) DOUBLE PRECISION array dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix Q. See Further Details.
+
+    TAUP    (output) DOUBLE PRECISION array, dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix P. See Further Details.
+
+    X       (output) DOUBLE PRECISION array, dimension (LDX,NB)
+            The m-by-nb matrix X required to update the unreduced part
+            of A.
+
+    LDX     (input) INTEGER
+            The leading dimension of the array X. LDX >= M.
+
+    Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
+            The n-by-nb matrix Y required to update the unreduced part
+            of A.
+
+    LDY     (output) INTEGER
+            The leading dimension of the array Y. LDY >= N.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+       Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors.
+
+    If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
+    A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
+    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The elements of the vectors v and u together form the m-by-nb matrix
+    V and the nb-by-n matrix U' which are needed, with X and Y, to apply
+    the transformation to the unreduced part of the matrix, using a block
+    update of the form:  A := A - V*Y' - X*U'.
+
+    The contents of A on exit are illustrated by the following examples
+    with nb = 2:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
+      (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
+      (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )
+
+    where a denotes an element of the original matrix which is unchanged,
+    vi denotes an element of the vector defining H(i), and ui an element
+    of the vector defining G(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+
+    /* Function Body */
+    if (*m <= 0 || *n <= 0) {
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i:m,i) */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[i__ + a_dim1],
+		    lda, &y[i__ + y_dim1], ldy, &c_b2453, &a[i__ + i__ *
+		    a_dim1], &c__1);
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b2589, &x[i__ + x_dim1],
+		    ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b2453, &a[i__ + i__ *
+		     a_dim1], &c__1);
+
+/*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
+		    a_dim1], &c__1, &tauq[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    if (i__ < *n) {
+		a[i__ + i__ * a_dim1] = 1.;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[i__ + (i__ + 1)
+			 * a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &
+			c_b2467, &y[i__ + 1 + i__ * y_dim1], &c__1)
+			;
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[i__ + a_dim1],
+			lda, &a[i__ + i__ * a_dim1], &c__1, &c_b2467, &y[i__ *
+			 y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2589, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b2453, &
+			y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2453, &x[i__ + x_dim1],
+			ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b2467, &y[i__ *
+			 y_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2589, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b2453, &y[i__ + 1 + i__ * y_dim1], &c__1)
+			;
+		i__2 = *n - i__;
+		dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+
+/*              Update A(i,i+1:n) */
+
+		i__2 = *n - i__;
+		dgemv_("No transpose", &i__2, &i__, &c_b2589, &y[i__ + 1 +
+			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b2453, &a[i__
+			+ (i__ + 1) * a_dim1], lda);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2589, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b2453, &a[
+			i__ + (i__ + 1) * a_dim1], lda);
+
+/*              Generate reflection P(i) to annihilate A(i,i+2:n) */
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
+			i__3,*n) * a_dim1], lda, &taup[i__]);
+		e[i__] = a[i__ + (i__ + 1) * a_dim1];
+		a[i__ + (i__ + 1) * a_dim1] = 1.;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2453, &a[i__ + 1 + (
+			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
+			 lda, &c_b2467, &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__;
+		dgemv_("Transpose", &i__2, &i__, &c_b2453, &y[i__ + 1 +
+			y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b2467, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		dgemv_("No transpose", &i__2, &i__, &c_b2589, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b2453, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2453, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b2467, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2589, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b2453, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i,i:n) */
+
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b2589, &y[i__ + y_dim1],
+		    ldy, &a[i__ + a_dim1], lda, &c_b2453, &a[i__ + i__ *
+		    a_dim1], lda);
+	    i__2 = i__ - 1;
+	    i__3 = *n - i__ + 1;
+	    dgemv_("Transpose", &i__2, &i__3, &c_b2589, &a[i__ * a_dim1 + 1],
+		    lda, &x[i__ + x_dim1], ldx, &c_b2453, &a[i__ + i__ *
+		    a_dim1], lda);
+
+/*           Generate reflection P(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
+		    a_dim1], lda, &taup[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    if (i__ < *m) {
+		a[i__ + i__ * a_dim1] = 1.;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2453, &a[i__ + 1 +
+			i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &
+			c_b2467, &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__ + 1;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2453, &y[i__ + y_dim1],
+			ldy, &a[i__ + i__ * a_dim1], lda, &c_b2467, &x[i__ *
+			x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b2453, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__ + 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2453, &a[i__ *
+			a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, &
+			c_b2467, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2589, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b2453, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+
+/*              Update A(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[i__ + 1 +
+			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b2453, &a[i__
+			+ 1 + i__ * a_dim1], &c__1);
+		i__2 = *m - i__;
+		dgemv_("No transpose", &i__2, &i__, &c_b2589, &x[i__ + 1 +
+			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b2453, &
+			a[i__ + 1 + i__ * a_dim1], &c__1);
+
+/*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
+
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
+			i__ * a_dim1], &c__1, &tauq[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[i__ + 1 + (i__
+			+ 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &c_b2467, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[i__ + 1 +
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b2467, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2589, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b2453, &
+			y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__;
+		dgemv_("Transpose", &i__2, &i__, &c_b2453, &x[i__ + 1 +
+			x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b2467, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		dgemv_("Transpose", &i__, &i__2, &c_b2589, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b2453, &y[i__ + 1 + i__ * y_dim1], &c__1)
+			;
+		i__2 = *n - i__;
+		dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of DLABRD */
+
+} /* dlabrd_ */
+
+/* Subroutine */ int dlacpy_(char *uplo, integer *m, integer *n, doublereal *
+	a, integer *lda, doublereal *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLACPY copies all or part of a two-dimensional matrix A to another
+    matrix B.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be copied to B.
+            = 'U':      Upper triangular part
+            = 'L':      Lower triangular part
+            Otherwise:  All of the matrix A
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The m by n matrix A.  If UPLO = 'U', only the upper triangle
+            or trapezoid is accessed; if UPLO = 'L', only the lower
+            triangle or trapezoid is accessed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    B       (output) DOUBLE PRECISION array, dimension (LDB,N)
+            On exit, B = A in the locations specified by UPLO.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,M).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if (lsame_(uplo, "L")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+    return 0;
+
+/*     End of DLACPY */
+
+} /* dlacpy_ */
+
+/* Subroutine */ int dladiv_(doublereal *a, doublereal *b, doublereal *c__,
+	doublereal *d__, doublereal *p, doublereal *q)
+{
+    static doublereal e, f;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLADIV performs complex division in  real arithmetic
+
+                          a + i*b
+               p + i*q = ---------
+                          c + i*d
+
+    The algorithm is due to Robert L. Smith and can be found
+    in D. Knuth, The art of Computer Programming, Vol.2, p.195
+
+    Arguments
+    =========
+
+    A       (input) DOUBLE PRECISION
+    B       (input) DOUBLE PRECISION
+    C       (input) DOUBLE PRECISION
+    D       (input) DOUBLE PRECISION
+            The scalars a, b, c, and d in the above expression.
+
+    P       (output) DOUBLE PRECISION
+    Q       (output) DOUBLE PRECISION
+            The scalars p and q in the above expression.
+
+    =====================================================================
+*/
+
+
+    if (abs(*d__) < abs(*c__)) {
+	e = *d__ / *c__;
+	f = *c__ + *d__ * e;
+	*p = (*a + *b * e) / f;
+	*q = (*b - *a * e) / f;
+    } else {
+	e = *c__ / *d__;
+	f = *d__ + *c__ * e;
+	*p = (*b + *a * e) / f;
+	*q = (-(*a) + *b * e) / f;
+    }
+
+    return 0;
+
+/*     End of DLADIV */
+
+} /* dladiv_ */
+
+/* Subroutine */ int dlae2_(doublereal *a, doublereal *b, doublereal *c__,
+	doublereal *rt1, doublereal *rt2)
+{
+    /* System generated locals */
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal ab, df, tb, sm, rt, adf, acmn, acmx;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
+       [  A   B  ]
+       [  B   C  ].
+    On return, RT1 is the eigenvalue of larger absolute value, and RT2
+    is the eigenvalue of smaller absolute value.
+
+    Arguments
+    =========
+
+    A       (input) DOUBLE PRECISION
+            The (1,1) element of the 2-by-2 matrix.
+
+    B       (input) DOUBLE PRECISION
+            The (1,2) and (2,1) elements of the 2-by-2 matrix.
+
+    C       (input) DOUBLE PRECISION
+            The (2,2) element of the 2-by-2 matrix.
+
+    RT1     (output) DOUBLE PRECISION
+            The eigenvalue of larger absolute value.
+
+    RT2     (output) DOUBLE PRECISION
+            The eigenvalue of smaller absolute value.
+
+    Further Details
+    ===============
+
+    RT1 is accurate to a few ulps barring over/underflow.
+
+    RT2 may be inaccurate if there is massive cancellation in the
+    determinant A*C-B*B; higher precision or correctly rounded or
+    correctly truncated arithmetic would be needed to compute RT2
+    accurately in all cases.
+
+    Overflow is possible only if RT1 is within a factor of 5 of overflow.
+    Underflow is harmless if the input data is 0 or exceeds
+       underflow_threshold / macheps.
+
+   =====================================================================
+
+
+       Compute the eigenvalues
+*/
+
+    sm = *a + *c__;
+    df = *a - *c__;
+    adf = abs(df);
+    tb = *b + *b;
+    ab = abs(tb);
+    if (abs(*a) > abs(*c__)) {
+	acmx = *a;
+	acmn = *c__;
+    } else {
+	acmx = *c__;
+	acmn = *a;
+    }
+    if (adf > ab) {
+/* Computing 2nd power */
+	d__1 = ab / adf;
+	rt = adf * sqrt(d__1 * d__1 + 1.);
+    } else if (adf < ab) {
+/* Computing 2nd power */
+	d__1 = adf / ab;
+	rt = ab * sqrt(d__1 * d__1 + 1.);
+    } else {
+
+/*        Includes case AB=ADF=0 */
+
+	rt = ab * sqrt(2.);
+    }
+    if (sm < 0.) {
+	*rt1 = (sm - rt) * .5;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else if (sm > 0.) {
+	*rt1 = (sm + rt) * .5;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else {
+
+/*        Includes case RT1 = RT2 = 0 */
+
+	*rt1 = rt * .5;
+	*rt2 = rt * -.5;
+    }
+    return 0;
+
+/*     End of DLAE2 */
+
+} /* dlae2_ */
+
+/* Subroutine */ int dlaed0_(integer *icompq, integer *qsiz, integer *n,
+	doublereal *d__, doublereal *e, doublereal *q, integer *ldq,
+	doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2;
+    static doublereal temp;
+    static integer curr;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer iperm;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer indxq, iwrem;
+    extern /* Subroutine */ int dlaed1_(integer *, doublereal *, doublereal *,
+	     integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *);
+    static integer iqptr;
+    extern /* Subroutine */ int dlaed7_(integer *, integer *, integer *,
+	    integer *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, integer *, integer *, doublereal
+	    *, doublereal *, integer *, integer *);
+    static integer tlvls;
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *);
+    static integer igivcl;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer igivnm, submat, curprb, subpbs, igivpt;
+    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *);
+    static integer curlvl, matsiz, iprmpt, smlsiz;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLAED0 computes all eigenvalues and corresponding eigenvectors of a
+    symmetric tridiagonal matrix using the divide and conquer method.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+            = 2:  Compute eigenvalues and eigenvectors of tridiagonal
+                  matrix.
+
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, the main diagonal of the tridiagonal matrix.
+           On exit, its eigenvalues.
+
+    E      (input) DOUBLE PRECISION array, dimension (N-1)
+           The off-diagonal elements of the tridiagonal matrix.
+           On exit, E has been destroyed.
+
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
+           On entry, Q must contain an N-by-N orthogonal matrix.
+           If ICOMPQ = 0    Q is not referenced.
+           If ICOMPQ = 1    On entry, Q is a subset of the columns of the
+                            orthogonal matrix used to reduce the full
+                            matrix to tridiagonal form corresponding to
+                            the subset of the full matrix which is being
+                            decomposed at this time.
+           If ICOMPQ = 2    On entry, Q will be the identity matrix.
+                            On exit, Q contains the eigenvectors of the
+                            tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  If eigenvectors are
+           desired, then  LDQ >= max(1,N).  In any case,  LDQ >= 1.
+
+    QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N)
+           Referenced only when ICOMPQ = 1.  Used to store parts of
+           the eigenvector matrix when the updating matrix multiplies
+           take place.
+
+    LDQS   (input) INTEGER
+           The leading dimension of the array QSTORE.  If ICOMPQ = 1,
+           then  LDQS >= max(1,N).  In any case,  LDQS >= 1.
+
+    WORK   (workspace) DOUBLE PRECISION array,
+           If ICOMPQ = 0 or 1, the dimension of WORK must be at least
+                       1 + 3*N + 2*N*lg N + 2*N**2
+                       ( lg( N ) = smallest integer k
+                                   such that 2^k >= N )
+           If ICOMPQ = 2, the dimension of WORK must be at least
+                       4*N + N**2.
+
+    IWORK  (workspace) INTEGER array,
+           If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
+                          6 + 6*N + 5*N*lg N.
+                          ( lg( N ) = smallest integer k
+                                      such that 2^k >= N )
+           If ICOMPQ = 2, the dimension of IWORK must be at least
+                          3 + 5*N.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    qstore_dim1 = *ldqs;
+    qstore_offset = 1 + qstore_dim1;
+    qstore -= qstore_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*icompq < 0 || *icompq > 2) {
+	*info = -1;
+    } else if (*icompq == 1 && *qsiz < max(0,*n)) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ldq < max(1,*n)) {
+	*info = -7;
+    } else if (*ldqs < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED0", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "DLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       Determine the size and placement of the submatrices, and save in
+       the leading elements of IWORK.
+*/
+
+    iwork[1] = *n;
+    subpbs = 1;
+    tlvls = 0;
+L10:
+    if (iwork[subpbs] > smlsiz) {
+	for (j = subpbs; j >= 1; --j) {
+	    iwork[j * 2] = (iwork[j] + 1) / 2;
+	    iwork[(j << 1) - 1] = iwork[j] / 2;
+/* L20: */
+	}
+	++tlvls;
+	subpbs <<= 1;
+	goto L10;
+    }
+    i__1 = subpbs;
+    for (j = 2; j <= i__1; ++j) {
+	iwork[j] += iwork[j - 1];
+/* L30: */
+    }
+
+/*
+       Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
+       using rank-1 modifications (cuts).
+*/
+
+    spm1 = subpbs - 1;
+    i__1 = spm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	submat = iwork[i__] + 1;
+	smm1 = submat - 1;
+	d__[smm1] -= (d__1 = e[smm1], abs(d__1));
+	d__[submat] -= (d__1 = e[smm1], abs(d__1));
+/* L40: */
+    }
+
+    indxq = (*n << 2) + 3;
+    if (*icompq != 2) {
+
+/*
+          Set up workspaces for eigenvalues only/accumulate new vectors
+          routine
+*/
+
+	temp = log((doublereal) (*n)) / log(2.);
+	lgn = (integer) temp;
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	iprmpt = indxq + *n + 1;
+	iperm = iprmpt + *n * lgn;
+	iqptr = iperm + *n * lgn;
+	igivpt = iqptr + *n + 2;
+	igivcl = igivpt + *n * lgn;
+
+	igivnm = 1;
+	iq = igivnm + (*n << 1) * lgn;
+/* Computing 2nd power */
+	i__1 = *n;
+	iwrem = iq + i__1 * i__1 + 1;
+
+/*        Initialize pointers */
+
+	i__1 = subpbs;
+	for (i__ = 0; i__ <= i__1; ++i__) {
+	    iwork[iprmpt + i__] = 1;
+	    iwork[igivpt + i__] = 1;
+/* L50: */
+	}
+	iwork[iqptr] = 1;
+    }
+
+/*
+       Solve each submatrix eigenproblem at the bottom of the divide and
+       conquer tree.
+*/
+
+    curr = 0;
+    i__1 = spm1;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+	if (i__ == 0) {
+	    submat = 1;
+	    matsiz = iwork[1];
+	} else {
+	    submat = iwork[i__] + 1;
+	    matsiz = iwork[i__ + 1] - iwork[i__];
+	}
+	if (*icompq == 2) {
+	    dsteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat +
+		    submat * q_dim1], ldq, &work[1], info);
+	    if (*info != 0) {
+		goto L130;
+	    }
+	} else {
+	    dsteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 +
+		    iwork[iqptr + curr]], &matsiz, &work[1], info);
+	    if (*info != 0) {
+		goto L130;
+	    }
+	    if (*icompq == 1) {
+		dgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b2453, &q[submat *
+			 q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]]
+			, &matsiz, &c_b2467, &qstore[submat * qstore_dim1 + 1]
+			, ldqs);
+	    }
+/* Computing 2nd power */
+	    i__2 = matsiz;
+	    iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
+	    ++curr;
+	}
+	k = 1;
+	i__2 = iwork[i__ + 1];
+	for (j = submat; j <= i__2; ++j) {
+	    iwork[indxq + j] = k;
+	    ++k;
+/* L60: */
+	}
+/* L70: */
+    }
+
+/*
+       Successively merge eigensystems of adjacent submatrices
+       into eigensystem for the corresponding larger matrix.
+
+       while ( SUBPBS > 1 )
+*/
+
+    curlvl = 1;
+L80:
+    if (subpbs > 1) {
+	spm2 = subpbs - 2;
+	i__1 = spm2;
+	for (i__ = 0; i__ <= i__1; i__ += 2) {
+	    if (i__ == 0) {
+		submat = 1;
+		matsiz = iwork[2];
+		msd2 = iwork[1];
+		curprb = 0;
+	    } else {
+		submat = iwork[i__] + 1;
+		matsiz = iwork[i__ + 2] - iwork[i__];
+		msd2 = matsiz / 2;
+		++curprb;
+	    }
+
+/*
+       Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
+       into an eigensystem of size MATSIZ.
+       DLAED1 is used only for the full eigensystem of a tridiagonal
+       matrix.
+       DLAED7 handles the cases in which eigenvalues only or eigenvalues
+       and eigenvectors of a full symmetric matrix (which was reduced to
+       tridiagonal form) are desired.
+*/
+
+	    if (*icompq == 2) {
+		dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1],
+			ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], &
+			msd2, &work[1], &iwork[subpbs + 1], info);
+	    } else {
+		dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[
+			submat], &qstore[submat * qstore_dim1 + 1], ldqs, &
+			iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &
+			work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm]
+			, &iwork[igivpt], &iwork[igivcl], &work[igivnm], &
+			work[iwrem], &iwork[subpbs + 1], info);
+	    }
+	    if (*info != 0) {
+		goto L130;
+	    }
+	    iwork[i__ / 2 + 1] = iwork[i__ + 2];
+/* L90: */
+	}
+	subpbs /= 2;
+	++curlvl;
+	goto L80;
+    }
+
+/*
+       end while
+
+       Re-merge the eigenvalues/vectors which were deflated at the final
+       merge step.
+*/
+
+    if (*icompq == 1) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    j = iwork[indxq + i__];
+	    work[i__] = d__[j];
+	    dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1
+		    + 1], &c__1);
+/* L100: */
+	}
+	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
+    } else if (*icompq == 2) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    j = iwork[indxq + i__];
+	    work[i__] = d__[j];
+	    dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
+/* L110: */
+	}
+	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
+	dlacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq);
+    } else {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    j = iwork[indxq + i__];
+	    work[i__] = d__[j];
+/* L120: */
+	}
+	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
+    }
+    goto L140;
+
+L130:
+    *info = submat * (*n + 1) + submat + matsiz - 1;
+
+L140:
+    return 0;
+
+/*     End of DLAED0 */
+
+} /* dlaed0_ */
+
+/* Subroutine */ int dlaed1_(integer *n, doublereal *d__, doublereal *q,
+	integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt,
+	doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer indxp;
+    extern /* Subroutine */ int dlaed2_(integer *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *, integer *, integer *, integer *), dlaed3_(integer *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, integer *,
+	    doublereal *, doublereal *, integer *);
+    static integer idlmda;
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *);
+    static integer coltyp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLAED1 computes the updated eigensystem of a diagonal
+    matrix after modification by a rank-one symmetric matrix.  This
+    routine is used only for the eigenproblem which requires all
+    eigenvalues and eigenvectors of a tridiagonal matrix.  DLAED7 handles
+    the case in which eigenvalues only or eigenvalues and eigenvectors
+    of a full symmetric matrix (which was reduced to tridiagonal form)
+    are desired.
+
+      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+
+       where Z = Q'u, u is a vector of length N with ones in the
+       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+
+       The eigenvectors of the original matrix are stored in Q, and the
+       eigenvalues are in D.  The algorithm consists of three stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple eigenvalues or if there is a zero in
+          the Z vector.  For each such occurence the dimension of the
+          secular equation problem is reduced by one.  This stage is
+          performed by the routine DLAED2.
+
+          The second stage consists of calculating the updated
+          eigenvalues. This is done by finding the roots of the secular
+          equation via the routine DLAED4 (as called by DLAED3).
+          This routine also calculates the eigenvectors of the current
+          problem.
+
+          The final stage consists of computing the updated eigenvectors
+          directly using the updated eigenvalues.  The eigenvectors for
+          the current problem are multiplied with the eigenvectors from
+          the overall problem.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, the eigenvalues of the rank-1-perturbed matrix.
+           On exit, the eigenvalues of the repaired matrix.
+
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+           On entry, the eigenvectors of the rank-1-perturbed matrix.
+           On exit, the eigenvectors of the repaired tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (input/output) INTEGER array, dimension (N)
+           On entry, the permutation which separately sorts the two
+           subproblems in D into ascending order.
+           On exit, the permutation which will reintegrate the
+           subproblems back into sorted order,
+           i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
+
+    RHO    (input) DOUBLE PRECISION
+           The subdiagonal entry used to create the rank-1 modification.
+
+    CUTPNT (input) INTEGER
+           The location of the last eigenvalue in the leading sub-matrix.
+           min(1,N) <= CUTPNT <= N/2.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension (4*N + N**2)
+
+    IWORK  (workspace) INTEGER array, dimension (4*N)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ldq < max(1,*n)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MIN */
+	i__1 = 1, i__2 = *n / 2;
+	if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) {
+	    *info = -7;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED1", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*
+       The following values are integer pointers which indicate
+       the portion of the workspace
+       used by a particular array in DLAED2 and DLAED3.
+*/
+
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq2 = iw + *n;
+
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
+
+
+/*
+       Form the z-vector which consists of the last row of Q_1 and the
+       first row of Q_2.
+*/
+
+    dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
+    zpp1 = *cutpnt + 1;
+    i__1 = *n - *cutpnt;
+    dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);
+
+/*     Deflate eigenvalues. */
+
+    dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[
+	    iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[
+	    indxc], &iwork[indxp], &iwork[coltyp], info);
+
+    if (*info != 0) {
+	goto L20;
+    }
+
+/*     Solve Secular Equation. */
+
+    if (k != 0) {
+	is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp +
+		1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
+	dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda],
+		 &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[
+		is], info);
+	if (*info != 0) {
+	    goto L20;
+	}
+
+/*     Prepare the INDXQ sorting permutation. */
+
+	n1 = k;
+	n2 = *n - k;
+	dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    indxq[i__] = i__;
+/* L10: */
+	}
+    }
+
+L20:
+    return 0;
+
+/*     End of DLAED1 */
+
+} /* dlaed1_ */
+
+/* Subroutine */ int dlaed2_(integer *k, integer *n, integer *n1, doublereal *
+	d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho,
+	doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2,
+	integer *indx, integer *indxc, integer *indxp, integer *coltyp,
+	integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal c__;
+    static integer i__, j;
+    static doublereal s, t;
+    static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
+    static doublereal eps, tau, tol;
+    static integer psm[4], imax, jmax;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer ctot[4];
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dcopy_(integer *, doublereal *, integer *, doublereal
+	    *, integer *);
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLAED2 merges the two sets of eigenvalues together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    eigenvalues are close together or if there is a tiny entry in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    Arguments
+    =========
+
+    K      (output) INTEGER
+           The number of non-deflated eigenvalues, and the order of the
+           related secular equation. 0 <= K <=N.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    N1     (input) INTEGER
+           The location of the last eigenvalue in the leading sub-matrix.
+           min(1,N) <= N1 <= N/2.
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, D contains the eigenvalues of the two submatrices to
+           be combined.
+           On exit, D contains the trailing (N-K) updated eigenvalues
+           (those which were deflated) sorted into increasing order.
+
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
+           On entry, Q contains the eigenvectors of two submatrices in
+           the two square blocks with corners at (1,1), (N1,N1)
+           and (N1+1, N1+1), (N,N).
+           On exit, Q contains the trailing (N-K) updated eigenvectors
+           (those which were deflated) in its last N-K columns.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (input/output) INTEGER array, dimension (N)
+           The permutation which separately sorts the two sub-problems
+           in D into ascending order.  Note that elements in the second
+           half of this permutation must first have N1 added to their
+           values. Destroyed on exit.
+
+    RHO    (input/output) DOUBLE PRECISION
+           On entry, the off-diagonal element associated with the rank-1
+           cut which originally split the two submatrices which are now
+           being recombined.
+           On exit, RHO has been modified to the value required by
+           DLAED3.
+
+    Z      (input) DOUBLE PRECISION array, dimension (N)
+           On entry, Z contains the updating vector (the last
+           row of the first sub-eigenvector matrix and the first row of
+           the second sub-eigenvector matrix).
+           On exit, the contents of Z have been destroyed by the updating
+           process.
+
+    DLAMDA (output) DOUBLE PRECISION array, dimension (N)
+           A copy of the first K eigenvalues which will be used by
+           DLAED3 to form the secular equation.
+
+    W      (output) DOUBLE PRECISION array, dimension (N)
+           The first k values of the final deflation-altered z-vector
+           which will be passed to DLAED3.
+
+    Q2     (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
+           A copy of the first K eigenvectors which will be used by
+           DLAED3 in a matrix multiply (DGEMM) to solve for the new
+           eigenvectors.
+
+    INDX   (workspace) INTEGER array, dimension (N)
+           The permutation used to sort the contents of DLAMDA into
+           ascending order.
+
+    INDXC  (output) INTEGER array, dimension (N)
+           The permutation used to arrange the columns of the deflated
+           Q matrix into three groups:  the first group contains non-zero
+           elements only at and above N1, the second contains
+           non-zero elements only below N1, and the third is dense.
+
+    INDXP  (workspace) INTEGER array, dimension (N)
+           The permutation used to place deflated values of D at the end
+           of the array.  INDXP(1:K) points to the nondeflated D-values
+           and INDXP(K+1:N) points to the deflated eigenvalues.
+
+    COLTYP (workspace/output) INTEGER array, dimension (N)
+           During execution, a label which will indicate which of the
+           following types a column in the Q2 matrix is:
+           1 : non-zero in the upper half only;
+           2 : dense;
+           3 : non-zero in the lower half only;
+           4 : deflated.
+           On exit, COLTYP(i) is the number of columns of type i,
+           for i=1 to 4 only.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --z__;
+    --dlamda;
+    --w;
+    --q2;
+    --indx;
+    --indxc;
+    --indxp;
+    --coltyp;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -2;
+    } else if (*ldq < max(1,*n)) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MIN */
+	i__1 = 1, i__2 = *n / 2;
+	if (min(i__1,i__2) > *n1 || *n / 2 < *n1) {
+	    *info = -3;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    n2 = *n - *n1;
+    n1p1 = *n1 + 1;
+
+    if (*rho < 0.) {
+	dscal_(&n2, &c_b2589, &z__[n1p1], &c__1);
+    }
+
+/*
+       Normalize z so that norm(z) = 1.  Since z is the concatenation of
+       two normalized vectors, norm2(z) = sqrt(2).
+*/
+
+    t = 1. / sqrt(2.);
+    dscal_(n, &t, &z__[1], &c__1);
+
+/*     RHO = ABS( norm(z)**2 * RHO ) */
+
+    *rho = (d__1 = *rho * 2., abs(d__1));
+
+/*     Sort the eigenvalues into increasing order */
+
+    i__1 = *n;
+    for (i__ = n1p1; i__ <= i__1; ++i__) {
+	indxq[i__] += *n1;
+/* L10: */
+    }
+
+/*     re-integrate the deflated parts from the last pass */
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = d__[indxq[i__]];
+/* L20: */
+    }
+    dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	indx[i__] = indxq[indxc[i__]];
+/* L30: */
+    }
+
+/*     Calculate the allowable deflation tolerance */
+
+    imax = idamax_(n, &z__[1], &c__1);
+    jmax = idamax_(n, &d__[1], &c__1);
+    eps = EPSILON;
+/* Computing MAX */
+    d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2))
+	    ;
+    tol = eps * 8. * max(d__3,d__4);
+
+/*
+       If the rank-1 modifier is small enough, no more needs to be done
+       except to reorganize Q so that its columns correspond with the
+       elements in D.
+*/
+
+    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
+	*k = 0;
+	iq2 = 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__ = indx[j];
+	    dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
+	    dlamda[j] = d__[i__];
+	    iq2 += *n;
+/* L40: */
+	}
+	dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
+	dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
+	goto L190;
+    }
+
+/*
+       If there are multiple eigenvalues then the problem deflates.  Here
+       the number of equal eigenvalues are found.  As each equal
+       eigenvalue is found, an elementary reflector is computed to rotate
+       the corresponding eigensubspace so that the corresponding
+       components of Z are zero in this new basis.
+*/
+
+    i__1 = *n1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	coltyp[i__] = 1;
+/* L50: */
+    }
+    i__1 = *n;
+    for (i__ = n1p1; i__ <= i__1; ++i__) {
+	coltyp[i__] = 3;
+/* L60: */
+    }
+
+
+    *k = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	nj = indx[j];
+	if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    coltyp[nj] = 4;
+	    indxp[k2] = nj;
+	    if (j == *n) {
+		goto L100;
+	    }
+	} else {
+	    pj = nj;
+	    goto L80;
+	}
+/* L70: */
+    }
+L80:
+    ++j;
+    nj = indx[j];
+    if (j > *n) {
+	goto L100;
+    }
+    if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	coltyp[nj] = 4;
+	indxp[k2] = nj;
+    } else {
+
+/*        Check if eigenvalues are close enough to allow deflation. */
+
+	s = z__[pj];
+	c__ = z__[nj];
+
+/*
+          Find sqrt(a**2+b**2) without overflow or
+          destructive underflow.
+*/
+
+	tau = dlapy2_(&c__, &s);
+	t = d__[nj] - d__[pj];
+	c__ /= tau;
+	s = -s / tau;
+	if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    z__[nj] = tau;
+	    z__[pj] = 0.;
+	    if (coltyp[nj] != coltyp[pj]) {
+		coltyp[nj] = 2;
+	    }
+	    coltyp[pj] = 4;
+	    drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
+		    c__, &s);
+/* Computing 2nd power */
+	    d__1 = c__;
+/* Computing 2nd power */
+	    d__2 = s;
+	    t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
+/* Computing 2nd power */
+	    d__1 = s;
+/* Computing 2nd power */
+	    d__2 = c__;
+	    d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
+	    d__[pj] = t;
+	    --k2;
+	    i__ = 1;
+L90:
+	    if (k2 + i__ <= *n) {
+		if (d__[pj] < d__[indxp[k2 + i__]]) {
+		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+		    indxp[k2 + i__] = pj;
+		    ++i__;
+		    goto L90;
+		} else {
+		    indxp[k2 + i__ - 1] = pj;
+		}
+	    } else {
+		indxp[k2 + i__ - 1] = pj;
+	    }
+	    pj = nj;
+	} else {
+	    ++(*k);
+	    dlamda[*k] = d__[pj];
+	    w[*k] = z__[pj];
+	    indxp[*k] = pj;
+	    pj = nj;
+	}
+    }
+    goto L80;
+L100:
+
+/*     Record the last eigenvalue. */
+
+    ++(*k);
+    dlamda[*k] = d__[pj];
+    w[*k] = z__[pj];
+    indxp[*k] = pj;
+
+/*
+       Count up the total number of the various types of columns, then
+       form a permutation which positions the four column types into
+       four uniform groups (although one or more of these groups may be
+       empty).
+*/
+
+    for (j = 1; j <= 4; ++j) {
+	ctot[j - 1] = 0;
+/* L110: */
+    }
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	ct = coltyp[j];
+	++ctot[ct - 1];
+/* L120: */
+    }
+
+/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
+
+    psm[0] = 1;
+    psm[1] = ctot[0] + 1;
+    psm[2] = psm[1] + ctot[1];
+    psm[3] = psm[2] + ctot[2];
+    *k = *n - ctot[3];
+
+/*
+       Fill out the INDXC array so that the permutation which it induces
+       will place all type-1 columns first, all type-2 columns next,
+       then all type-3's, and finally all type-4's.
+*/
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	js = indxp[j];
+	ct = coltyp[js];
+	indx[psm[ct - 1]] = js;
+	indxc[psm[ct - 1]] = j;
+	++psm[ct - 1];
+/* L130: */
+    }
+
+/*
+       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+       and Q2 respectively.  The eigenvalues/vectors which were not
+       deflated go into the first K slots of DLAMDA and Q2 respectively,
+       while those which were deflated go into the last N - K slots.
+*/
+
+    i__ = 1;
+    iq1 = 1;
+    iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
+    i__1 = ctot[0];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
+	z__[i__] = d__[js];
+	++i__;
+	iq1 += *n1;
+/* L140: */
+    }
+
+    i__1 = ctot[1];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
+	dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
+	z__[i__] = d__[js];
+	++i__;
+	iq1 += *n1;
+	iq2 += n2;
+/* L150: */
+    }
+
+    i__1 = ctot[2];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
+	z__[i__] = d__[js];
+	++i__;
+	iq2 += n2;
+/* L160: */
+    }
+
+    iq1 = iq2;
+    i__1 = ctot[3];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
+	iq2 += *n;
+	z__[i__] = d__[js];
+	++i__;
+/* L170: */
+    }
+
+/*
+       The deflated eigenvalues and their corresponding vectors go back
+       into the last N - K slots of D and Q respectively.
+*/
+
+    dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
+    i__1 = *n - *k;
+    dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
+
+/*     Copy CTOT into COLTYP for referencing in DLAED3. */
+
+    for (j = 1; j <= 4; ++j) {
+	coltyp[j] = ctot[j - 1];
+/* L180: */
+    }
+
+L190:
+    return 0;
+
+/*     End of DLAED2 */
+
+} /* dlaed2_ */
+
+/* Subroutine */ int dlaed3_(integer *k, integer *n, integer *n1, doublereal *
+	d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda,
+	 doublereal *q2, integer *indx, integer *ctot, doublereal *w,
+	doublereal *s, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer i__, j, n2, n12, ii, n23, iq2;
+    static doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *),
+	     dcopy_(integer *, doublereal *, integer *, doublereal *, integer
+	    *), dlaed4_(integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *),
+	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLAED3 finds the roots of the secular equation, as defined by the
+    values in D, W, and RHO, between 1 and K.  It makes the
+    appropriate calls to DLAED4 and then updates the eigenvectors by
+    multiplying the matrix of eigenvectors of the pair of eigensystems
+    being combined by the matrix of eigenvectors of the K-by-K system
+    which is solved here.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    K       (input) INTEGER
+            The number of terms in the rational function to be solved by
+            DLAED4.  K >= 0.
+
+    N       (input) INTEGER
+            The number of rows and columns in the Q matrix.
+            N >= K (deflation may result in N>K).
+
+    N1      (input) INTEGER
+            The location of the last eigenvalue in the leading submatrix.
+            min(1,N) <= N1 <= N/2.
+
+    D       (output) DOUBLE PRECISION array, dimension (N)
+            D(I) contains the updated eigenvalues for
+            1 <= I <= K.
+
+    Q       (output) DOUBLE PRECISION array, dimension (LDQ,N)
+            Initially the first K columns are used as workspace.
+            On output the columns 1 to K contain
+            the updated eigenvectors.
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    RHO     (input) DOUBLE PRECISION
+            The value of the parameter in the rank one update equation.
+            RHO >= 0 required.
+
+    DLAMDA  (input/output) DOUBLE PRECISION array, dimension (K)
+            The first K elements of this array contain the old roots
+            of the deflated updating problem.  These are the poles
+            of the secular equation. May be changed on output by
+            having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
+            Cray-2, or Cray C-90, as described above.
+
+    Q2      (input) DOUBLE PRECISION array, dimension (LDQ2, N)
+            The first K columns of this matrix contain the non-deflated
+            eigenvectors for the split problem.
+
+    INDX    (input) INTEGER array, dimension (N)
+            The permutation used to arrange the columns of the deflated
+            Q matrix into three groups (see DLAED2).
+            The rows of the eigenvectors found by DLAED4 must be likewise
+            permuted before the matrix multiply can take place.
+
+    CTOT    (input) INTEGER array, dimension (4)
+            A count of the total number of the various types of columns
+            in Q, as described in INDX.  The fourth column type is any
+            column which has been deflated.
+
+    W       (input/output) DOUBLE PRECISION array, dimension (K)
+            The first K elements of this array contain the components
+            of the deflation-adjusted updating vector. Destroyed on
+            output.
+
+    S       (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K
+            Will contain the eigenvectors of the repaired matrix which
+            will be multiplied by the previously accumulated eigenvectors
+            to update the system.
+
+    LDS     (input) INTEGER
+            The leading dimension of S.  LDS >= max(1,K).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dlamda;
+    --q2;
+    --indx;
+    --ctot;
+    --w;
+    --s;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*k < 0) {
+	*info = -1;
+    } else if (*n < *k) {
+	*info = -2;
+    } else if (*ldq < max(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED3", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 0) {
+	return 0;
+    }
+
+/*
+       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DLAMDA(I) if it is 1; this makes the subsequent
+       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DLAMDA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DLAMDA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
+/* L10: */
+    }
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
+		info);
+
+/*        If the zero finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    goto L120;
+	}
+/* L20: */
+    }
+
+    if (*k == 1) {
+	goto L110;
+    }
+    if (*k == 2) {
+	i__1 = *k;
+	for (j = 1; j <= i__1; ++j) {
+	    w[1] = q[j * q_dim1 + 1];
+	    w[2] = q[j * q_dim1 + 2];
+	    ii = indx[1];
+	    q[j * q_dim1 + 1] = w[ii];
+	    ii = indx[2];
+	    q[j * q_dim1 + 2] = w[ii];
+/* L30: */
+	}
+	goto L110;
+    }
+
+/*     Compute updated W. */
+
+    dcopy_(k, &w[1], &c__1, &s[1], &c__1);
+
+/*     Initialize W(I) = Q(I,I) */
+
+    i__1 = *ldq + 1;
+    dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L40: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L50: */
+	}
+/* L60: */
+    }
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__1 = sqrt(-w[i__]);
+	w[i__] = d_sign(&d__1, &s[i__]);
+/* L70: */
+    }
+
+/*     Compute eigenvectors of the modified rank-1 modification. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    s[i__] = w[i__] / q[i__ + j * q_dim1];
+/* L80: */
+	}
+	temp = dnrm2_(k, &s[1], &c__1);
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    ii = indx[i__];
+	    q[i__ + j * q_dim1] = s[ii] / temp;
+/* L90: */
+	}
+/* L100: */
+    }
+
+/*     Compute the updated eigenvectors. */
+
+L110:
+
+    n2 = *n - *n1;
+    n12 = ctot[1] + ctot[2];
+    n23 = ctot[2] + ctot[3];
+
+    dlacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23);
+    iq2 = *n1 * n12 + 1;
+    if (n23 != 0) {
+	dgemm_("N", "N", &n2, k, &n23, &c_b2453, &q2[iq2], &n2, &s[1], &n23, &
+		c_b2467, &q[*n1 + 1 + q_dim1], ldq);
+    } else {
+	dlaset_("A", &n2, k, &c_b2467, &c_b2467, &q[*n1 + 1 + q_dim1], ldq);
+    }
+
+    dlacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12);
+    if (n12 != 0) {
+	dgemm_("N", "N", n1, k, &n12, &c_b2453, &q2[1], n1, &s[1], &n12, &
+		c_b2467, &q[q_offset], ldq);
+    } else {
+	dlaset_("A", n1, k, &c_b2467, &c_b2467, &q[q_dim1 + 1], ldq);
+    }
+
+
+L120:
+    return 0;
+
+/*     End of DLAED3 */
+
+} /* dlaed3_ */
+
+/* Subroutine */ int dlaed4_(integer *n, integer *i__, doublereal *d__,
+	doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam,
+	 integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal a, b, c__;
+    static integer j;
+    static doublereal w;
+    static integer ii;
+    static doublereal dw, zz[3];
+    static integer ip1;
+    static doublereal del, eta, phi, eps, tau, psi;
+    static integer iim1, iip1;
+    static doublereal dphi, dpsi;
+    static integer iter;
+    static doublereal temp, prew, temp1, dltlb, dltub, midpt;
+    static integer niter;
+    static logical swtch;
+    extern /* Subroutine */ int dlaed5_(integer *, doublereal *, doublereal *,
+	     doublereal *, doublereal *, doublereal *), dlaed6_(integer *,
+	    logical *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *);
+    static logical swtch3;
+
+    static logical orgati;
+    static doublereal erretm, rhoinv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       December 23, 1999
+
+
+    Purpose
+    =======
+
+    This subroutine computes the I-th updated eigenvalue of a symmetric
+    rank-one modification to a diagonal matrix whose elements are
+    given in the array d, and that
+
+               D(i) < D(j)  for  i < j
+
+    and that RHO > 0.  This is arranged by the calling routine, and is
+    no loss in generality.  The rank-one modified system is thus
+
+               diag( D )  +  RHO *  Z * Z_transpose.
+
+    where we assume the Euclidean norm of Z is 1.
+
+    The method consists of approximating the rational functions in the
+    secular equation by simpler interpolating rational functions.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The length of all arrays.
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  1 <= I <= N.
+
+    D      (input) DOUBLE PRECISION array, dimension (N)
+           The original eigenvalues.  It is assumed that they are in
+           order, D(I) < D(J)  for I < J.
+
+    Z      (input) DOUBLE PRECISION array, dimension (N)
+           The components of the updating vector.
+
+    DELTA  (output) DOUBLE PRECISION array, dimension (N)
+           If N .ne. 1, DELTA contains (D(j) - lambda_I) in its  j-th
+           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
+           contains the information necessary to construct the
+           eigenvectors.
+
+    RHO    (input) DOUBLE PRECISION
+           The scalar in the symmetric updating formula.
+
+    DLAM   (output) DOUBLE PRECISION
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit
+           > 0:  if INFO = 1, the updating process failed.
+
+    Internal Parameters
+    ===================
+
+    Logical variable ORGATI (origin-at-i?) is used for distinguishing
+    whether D(i) or D(i+1) is treated as the origin.
+
+              ORGATI = .true.    origin at i
+              ORGATI = .false.   origin at i+1
+
+     Logical variable SWTCH3 (switch-for-3-poles?) is for noting
+     if we are working with THREE poles!
+
+     MAXIT is the maximum number of iterations allowed for each
+     eigenvalue.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Since this routine is called in an inner loop, we do no argument
+       checking.
+
+       Quick return for N=1 and 2.
+*/
+
+    /* Parameter adjustments */
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    if (*n == 1) {
+
+/*         Presumably, I=1 upon entry */
+
+	*dlam = d__[1] + *rho * z__[1] * z__[1];
+	delta[1] = 1.;
+	return 0;
+    }
+    if (*n == 2) {
+	dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
+	return 0;
+    }
+
+/*     Compute machine epsilon */
+
+    eps = EPSILON;
+    rhoinv = 1. / *rho;
+
+/*     The case I = N */
+
+    if (*i__ == *n) {
+
+/*        Initialize some basic variables */
+
+	ii = *n - 1;
+	niter = 1;
+
+/*        Calculate initial guess */
+
+	midpt = *rho / 2.;
+
+/*
+          If ||Z||_2 is not one, then TEMP should be set to
+          RHO * ||Z||_2^2 / TWO
+*/
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - midpt;
+/* L10: */
+	}
+
+	psi = 0.;
+	i__1 = *n - 2;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / delta[j];
+/* L20: */
+	}
+
+	c__ = rhoinv + psi;
+	w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*
+		n];
+
+	if (w <= 0.) {
+	    temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho)
+		    + z__[*n] * z__[*n] / *rho;
+	    if (c__ <= temp) {
+		tau = *rho;
+	    } else {
+		del = d__[*n] - d__[*n - 1];
+		a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]
+			;
+		b = z__[*n] * z__[*n] * del;
+		if (a < 0.) {
+		    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
+		} else {
+		    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
+		}
+	    }
+
+/*
+             It can be proved that
+                 D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO
+*/
+
+	    dltlb = midpt;
+	    dltub = *rho;
+	} else {
+	    del = d__[*n] - d__[*n - 1];
+	    a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
+	    b = z__[*n] * z__[*n] * del;
+	    if (a < 0.) {
+		tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
+	    } else {
+		tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
+	    }
+
+/*
+             It can be proved that
+                 D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
+*/
+
+	    dltlb = 0.;
+	    dltub = midpt;
+	}
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - tau;
+/* L30: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L40: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / delta[*n];
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
+		+ dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Test for convergence */
+
+	if (abs(w) <= eps * erretm) {
+	    *dlam = d__[*i__] + tau;
+	    goto L250;
+	}
+
+	if (w <= 0.) {
+	    dltlb = max(dltlb,tau);
+	} else {
+	    dltub = min(dltub,tau);
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
+	a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (
+		dpsi + dphi);
+	b = delta[*n - 1] * delta[*n] * w;
+	if (c__ < 0.) {
+	    c__ = abs(c__);
+	}
+	if (c__ == 0.) {
+/*
+            ETA = B/A
+             ETA = RHO - TAU
+*/
+	    eta = dltub - tau;
+	} else if (a >= 0.) {
+	    eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
+		    * 2.);
+	} else {
+	    eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
+		    );
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta > 0.) {
+	    eta = -w / (dpsi + dphi);
+	}
+	temp = tau + eta;
+	if (temp > dltub || temp < dltlb) {
+	    if (w < 0.) {
+		eta = (dltub - tau) / 2.;
+	    } else {
+		eta = (dltlb - tau) / 2.;
+	    }
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] -= eta;
+/* L50: */
+	}
+
+	tau += eta;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L60: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / delta[*n];
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
+		+ dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Main loop to update the values of the array   DELTA */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 30; ++niter) {
+
+/*           Test for convergence */
+
+	    if (abs(w) <= eps * erretm) {
+		*dlam = d__[*i__] + tau;
+		goto L250;
+	    }
+
+	    if (w <= 0.) {
+		dltlb = max(dltlb,tau);
+	    } else {
+		dltub = min(dltub,tau);
+	    }
+
+/*           Calculate the new step */
+
+	    c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
+	    a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] *
+		    (dpsi + dphi);
+	    b = delta[*n - 1] * delta[*n] * w;
+	    if (a >= 0.) {
+		eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
+	    } else {
+		eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta > 0.) {
+		eta = -w / (dpsi + dphi);
+	    }
+	    temp = tau + eta;
+	    if (temp > dltub || temp < dltlb) {
+		if (w < 0.) {
+		    eta = (dltub - tau) / 2.;
+		} else {
+		    eta = (dltlb - tau) / 2.;
+		}
+	    }
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] -= eta;
+/* L70: */
+	    }
+
+	    tau += eta;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.;
+	    psi = 0.;
+	    erretm = 0.;
+	    i__1 = ii;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / delta[j];
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L80: */
+	    }
+	    erretm = abs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    temp = z__[*n] / delta[*n];
+	    phi = z__[*n] * temp;
+	    dphi = temp * temp;
+	    erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
+		    dpsi + dphi);
+
+	    w = rhoinv + phi + psi;
+/* L90: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+	*dlam = d__[*i__] + tau;
+	goto L250;
+
+/*        End for the case I = N */
+
+    } else {
+
+/*        The case for I < N */
+
+	niter = 1;
+	ip1 = *i__ + 1;
+
+/*        Calculate initial guess */
+
+	del = d__[ip1] - d__[*i__];
+	midpt = del / 2.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - midpt;
+/* L100: */
+	}
+
+	psi = 0.;
+	i__1 = *i__ - 1;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / delta[j];
+/* L110: */
+	}
+
+	phi = 0.;
+	i__1 = *i__ + 2;
+	for (j = *n; j >= i__1; --j) {
+	    phi += z__[j] * z__[j] / delta[j];
+/* L120: */
+	}
+	c__ = rhoinv + psi + phi;
+	w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] /
+		delta[ip1];
+
+	if (w > 0.) {
+
+/*
+             d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
+
+             We choose d(i) as origin.
+*/
+
+	    orgati = TRUE_;
+	    a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
+	    b = z__[*i__] * z__[*i__] * del;
+	    if (a > 0.) {
+		tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
+	    } else {
+		tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
+	    }
+	    dltlb = 0.;
+	    dltub = midpt;
+	} else {
+
+/*
+             (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
+
+             We choose d(i+1) as origin.
+*/
+
+	    orgati = FALSE_;
+	    a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
+	    b = z__[ip1] * z__[ip1] * del;
+	    if (a < 0.) {
+		tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
+			d__1))));
+	    } else {
+		tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
+			(c__ * 2.);
+	    }
+	    dltlb = -midpt;
+	    dltub = 0.;
+	}
+
+	if (orgati) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] = d__[j] - d__[*i__] - tau;
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] = d__[j] - d__[ip1] - tau;
+/* L140: */
+	    }
+	}
+	if (orgati) {
+	    ii = *i__;
+	} else {
+	    ii = *i__ + 1;
+	}
+	iim1 = ii - 1;
+	iip1 = ii + 1;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L150: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.;
+	phi = 0.;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / delta[j];
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L160: */
+	}
+
+	w = rhoinv + phi + psi;
+
+/*
+          W is the value of the secular function with
+          its ii-th element removed.
+*/
+
+	swtch3 = FALSE_;
+	if (orgati) {
+	    if (w < 0.) {
+		swtch3 = TRUE_;
+	    }
+	} else {
+	    if (w > 0.) {
+		swtch3 = TRUE_;
+	    }
+	}
+	if (ii == 1 || ii == *n) {
+	    swtch3 = FALSE_;
+	}
+
+	temp = z__[ii] / delta[ii];
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w += temp;
+	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
+		abs(tau) * dw;
+
+/*        Test for convergence */
+
+	if (abs(w) <= eps * erretm) {
+	    if (orgati) {
+		*dlam = d__[*i__] + tau;
+	    } else {
+		*dlam = d__[ip1] + tau;
+	    }
+	    goto L250;
+	}
+
+	if (w <= 0.) {
+	    dltlb = max(dltlb,tau);
+	} else {
+	    dltub = min(dltub,tau);
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	if (! swtch3) {
+	    if (orgati) {
+/* Computing 2nd power */
+		d__1 = z__[*i__] / delta[*i__];
+		c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 *
+			d__1);
+	    } else {
+/* Computing 2nd power */
+		d__1 = z__[ip1] / delta[ip1];
+		c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 *
+			d__1);
+	    }
+	    a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] *
+		    dw;
+	    b = delta[*i__] * delta[ip1] * w;
+	    if (c__ == 0.) {
+		if (a == 0.) {
+		    if (orgati) {
+			a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] *
+				(dpsi + dphi);
+		    } else {
+			a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] *
+				(dpsi + dphi);
+		    }
+		}
+		eta = b / a;
+	    } else if (a <= 0.) {
+		eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
+	    } else {
+		eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
+	    }
+	} else {
+
+/*           Interpolation using THREE most relevant poles */
+
+	    temp = rhoinv + psi + phi;
+	    if (orgati) {
+		temp1 = z__[iim1] / delta[iim1];
+		temp1 *= temp1;
+		c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[
+			iip1]) * temp1;
+		zz[0] = z__[iim1] * z__[iim1];
+		zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
+	    } else {
+		temp1 = z__[iip1] / delta[iip1];
+		temp1 *= temp1;
+		c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[
+			iim1]) * temp1;
+		zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
+		zz[2] = z__[iip1] * z__[iip1];
+	    }
+	    zz[1] = z__[ii] * z__[ii];
+	    dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
+	    if (*info != 0) {
+		goto L250;
+	    }
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta >= 0.) {
+	    eta = -w / dw;
+	}
+	temp = tau + eta;
+	if (temp > dltub || temp < dltlb) {
+	    if (w < 0.) {
+		eta = (dltub - tau) / 2.;
+	    } else {
+		eta = (dltlb - tau) / 2.;
+	    }
+	}
+
+	prew = w;
+
+/* L170: */
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] -= eta;
+/* L180: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L190: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.;
+	phi = 0.;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / delta[j];
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L200: */
+	}
+
+	temp = z__[ii] / delta[ii];
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w = rhoinv + phi + psi + temp;
+	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + (
+		d__1 = tau + eta, abs(d__1)) * dw;
+
+	swtch = FALSE_;
+	if (orgati) {
+	    if (-w > abs(prew) / 10.) {
+		swtch = TRUE_;
+	    }
+	} else {
+	    if (w > abs(prew) / 10.) {
+		swtch = TRUE_;
+	    }
+	}
+
+	tau += eta;
+
+/*        Main loop to update the values of the array   DELTA */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 30; ++niter) {
+
+/*           Test for convergence */
+
+	    if (abs(w) <= eps * erretm) {
+		if (orgati) {
+		    *dlam = d__[*i__] + tau;
+		} else {
+		    *dlam = d__[ip1] + tau;
+		}
+		goto L250;
+	    }
+
+	    if (w <= 0.) {
+		dltlb = max(dltlb,tau);
+	    } else {
+		dltub = min(dltub,tau);
+	    }
+
+/*           Calculate the new step */
+
+	    if (! swtch3) {
+		if (! swtch) {
+		    if (orgati) {
+/* Computing 2nd power */
+			d__1 = z__[*i__] / delta[*i__];
+			c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (
+				d__1 * d__1);
+		    } else {
+/* Computing 2nd power */
+			d__1 = z__[ip1] / delta[ip1];
+			c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) *
+				(d__1 * d__1);
+		    }
+		} else {
+		    temp = z__[ii] / delta[ii];
+		    if (orgati) {
+			dpsi += temp * temp;
+		    } else {
+			dphi += temp * temp;
+		    }
+		    c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi;
+		}
+		a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1]
+			* dw;
+		b = delta[*i__] * delta[ip1] * w;
+		if (c__ == 0.) {
+		    if (a == 0.) {
+			if (! swtch) {
+			    if (orgati) {
+				a = z__[*i__] * z__[*i__] + delta[ip1] *
+					delta[ip1] * (dpsi + dphi);
+			    } else {
+				a = z__[ip1] * z__[ip1] + delta[*i__] * delta[
+					*i__] * (dpsi + dphi);
+			    }
+			} else {
+			    a = delta[*i__] * delta[*i__] * dpsi + delta[ip1]
+				    * delta[ip1] * dphi;
+			}
+		    }
+		    eta = b / a;
+		} else if (a <= 0.) {
+		    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
+			     / (c__ * 2.);
+		} else {
+		    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
+			    abs(d__1))));
+		}
+	    } else {
+
+/*              Interpolation using THREE most relevant poles */
+
+		temp = rhoinv + psi + phi;
+		if (swtch) {
+		    c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
+		    zz[0] = delta[iim1] * delta[iim1] * dpsi;
+		    zz[2] = delta[iip1] * delta[iip1] * dphi;
+		} else {
+		    if (orgati) {
+			temp1 = z__[iim1] / delta[iim1];
+			temp1 *= temp1;
+			c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1]
+				- d__[iip1]) * temp1;
+			zz[0] = z__[iim1] * z__[iim1];
+			zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 +
+				dphi);
+		    } else {
+			temp1 = z__[iip1] / delta[iip1];
+			temp1 *= temp1;
+			c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1]
+				- d__[iim1]) * temp1;
+			zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi -
+				temp1));
+			zz[2] = z__[iip1] * z__[iip1];
+		    }
+		}
+		dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta,
+			info);
+		if (*info != 0) {
+		    goto L250;
+		}
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta >= 0.) {
+		eta = -w / dw;
+	    }
+	    temp = tau + eta;
+	    if (temp > dltub || temp < dltlb) {
+		if (w < 0.) {
+		    eta = (dltub - tau) / 2.;
+		} else {
+		    eta = (dltlb - tau) / 2.;
+		}
+	    }
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] -= eta;
+/* L210: */
+	    }
+
+	    tau += eta;
+	    prew = w;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.;
+	    psi = 0.;
+	    erretm = 0.;
+	    i__1 = iim1;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / delta[j];
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L220: */
+	    }
+	    erretm = abs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    dphi = 0.;
+	    phi = 0.;
+	    i__1 = iip1;
+	    for (j = *n; j >= i__1; --j) {
+		temp = z__[j] / delta[j];
+		phi += z__[j] * temp;
+		dphi += temp * temp;
+		erretm += phi;
+/* L230: */
+	    }
+
+	    temp = z__[ii] / delta[ii];
+	    dw = dpsi + dphi + temp * temp;
+	    temp = z__[ii] * temp;
+	    w = rhoinv + phi + psi + temp;
+	    erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
+		    + abs(tau) * dw;
+	    if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
+		swtch = ! swtch;
+	    }
+
+/* L240: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+	if (orgati) {
+	    *dlam = d__[*i__] + tau;
+	} else {
+	    *dlam = d__[ip1] + tau;
+	}
+
+    }
+
+L250:
+
+    return 0;
+
+/*     End of DLAED4 */
+
+} /* dlaed4_ */
+
+/* Subroutine */ int dlaed5_(integer *i__, doublereal *d__, doublereal *z__,
+	doublereal *delta, doublereal *rho, doublereal *dlam)
+{
+    /* System generated locals */
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal b, c__, w, del, tau, temp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    This subroutine computes the I-th eigenvalue of a symmetric rank-one
+    modification of a 2-by-2 diagonal matrix
+
+               diag( D )  +  RHO *  Z * transpose(Z) .
+
+    The diagonal elements in the array D are assumed to satisfy
+
+               D(i) < D(j)  for  i < j .
+
+    We also assume RHO > 0 and that the Euclidean norm of the vector
+    Z is one.
+
+    Arguments
+    =========
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  I = 1 or I = 2.
+
+    D      (input) DOUBLE PRECISION array, dimension (2)
+           The original eigenvalues.  We assume D(1) < D(2).
+
+    Z      (input) DOUBLE PRECISION array, dimension (2)
+           The components of the updating vector.
+
+    DELTA  (output) DOUBLE PRECISION array, dimension (2)
+           The vector DELTA contains the information necessary
+           to construct the eigenvectors.
+
+    RHO    (input) DOUBLE PRECISION
+           The scalar in the symmetric updating formula.
+
+    DLAM   (output) DOUBLE PRECISION
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    del = d__[2] - d__[1];
+    if (*i__ == 1) {
+	w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.;
+	if (w > 0.) {
+	    b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[1] * z__[1] * del;
+
+/*           B > ZERO, always */
+
+	    tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
+	    *dlam = d__[1] + tau;
+	    delta[1] = -z__[1] / tau;
+	    delta[2] = z__[2] / (del - tau);
+	} else {
+	    b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[2] * z__[2] * del;
+	    if (b > 0.) {
+		tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
+	    } else {
+		tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
+	    }
+	    *dlam = d__[2] + tau;
+	    delta[1] = -z__[1] / (del + tau);
+	    delta[2] = -z__[2] / tau;
+	}
+	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
+	delta[1] /= temp;
+	delta[2] /= temp;
+    } else {
+
+/*     Now I=2 */
+
+	b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	c__ = *rho * z__[2] * z__[2] * del;
+	if (b > 0.) {
+	    tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
+	} else {
+	    tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
+	}
+	*dlam = d__[2] + tau;
+	delta[1] = -z__[1] / (del + tau);
+	delta[2] = -z__[2] / tau;
+	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
+	delta[1] /= temp;
+	delta[2] /= temp;
+    }
+    return 0;
+
+/*     End OF DLAED5 */
+
+} /* dlaed5_ */
+
+/* Subroutine */ int dlaed6_(integer *kniter, logical *orgati, doublereal *
+	rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal *
+	tau, integer *info)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Builtin functions */
+    double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *);
+
+    /* Local variables */
+    static doublereal a, b, c__, f;
+    static integer i__;
+    static doublereal fc, df, ddf, eta, eps, base;
+    static integer iter;
+    static doublereal temp, temp1, temp2, temp3, temp4;
+    static logical scale;
+    static integer niter;
+    static doublereal small1, small2, sminv1, sminv2;
+
+    static doublereal dscale[3], sclfac, zscale[3], erretm, sclinv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLAED6 computes the positive or negative root (closest to the origin)
+    of
+                     z(1)        z(2)        z(3)
+    f(x) =   rho + --------- + ---------- + ---------
+                    d(1)-x      d(2)-x      d(3)-x
+
+    It is assumed that
+
+          if ORGATI = .true. the root is between d(2) and d(3);
+          otherwise it is between d(1) and d(2)
+
+    This routine will be called by DLAED4 when necessary. In most cases,
+    the root sought is the smallest in magnitude, though it might not be
+    in some extremely rare situations.
+
+    Arguments
+    =========
+
+    KNITER       (input) INTEGER
+                 Refer to DLAED4 for its significance.
+
+    ORGATI       (input) LOGICAL
+                 If ORGATI is true, the needed root is between d(2) and
+                 d(3); otherwise it is between d(1) and d(2).  See
+                 DLAED4 for further details.
+
+    RHO          (input) DOUBLE PRECISION
+                 Refer to the equation f(x) above.
+
+    D            (input) DOUBLE PRECISION array, dimension (3)
+                 D satisfies d(1) < d(2) < d(3).
+
+    Z            (input) DOUBLE PRECISION array, dimension (3)
+                 Each of the elements in z must be positive.
+
+    FINIT        (input) DOUBLE PRECISION
+                 The value of f at 0. It is more accurate than the one
+                 evaluated inside this routine (if someone wants to do
+                 so).
+
+    TAU          (output) DOUBLE PRECISION
+                 The root of the equation f(x).
+
+    INFO         (output) INTEGER
+                 = 0: successful exit
+                 > 0: if INFO = 1, failure to converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+*/
+
+    /* Parameter adjustments */
+    --z__;
+    --d__;
+
+    /* Function Body */
+
+    *info = 0;
+
+    niter = 1;
+    *tau = 0.;
+    if (*kniter == 2) {
+	if (*orgati) {
+	    temp = (d__[3] - d__[2]) / 2.;
+	    c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
+	    a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
+	    b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
+	} else {
+	    temp = (d__[1] - d__[2]) / 2.;
+	    c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
+	    a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
+	    b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
+	}
+/* Computing MAX */
+	d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
+	temp = max(d__1,d__2);
+	a /= temp;
+	b /= temp;
+	c__ /= temp;
+	if (c__ == 0.) {
+	    *tau = b / a;
+	} else if (a <= 0.) {
+	    *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+		    c__ * 2.);
+	} else {
+	    *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))
+		    ));
+	}
+	temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) +
+		z__[3] / (d__[3] - *tau);
+	if (abs(*finit) <= abs(temp)) {
+	    *tau = 0.;
+	}
+    }
+
+/*
+       On first call to routine, get machine parameters for
+       possible scaling to avoid overflow
+*/
+
+    if (first) {
+	eps = EPSILON;
+	base = BASE;
+	i__1 = (integer) (log(SAFEMINIMUM) / log(base) / 3.);
+	small1 = pow_di(&base, &i__1);
+	sminv1 = 1. / small1;
+	small2 = small1 * small1;
+	sminv2 = sminv1 * sminv1;
+	first = FALSE_;
+    }
+
+/*
+       Determine if scaling of inputs necessary to avoid overflow
+       when computing 1/TEMP**3
+*/
+
+    if (*orgati) {
+/* Computing MIN */
+	d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - *
+		tau, abs(d__2));
+	temp = min(d__3,d__4);
+    } else {
+/* Computing MIN */
+	d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - *
+		tau, abs(d__2));
+	temp = min(d__3,d__4);
+    }
+    scale = FALSE_;
+    if (temp <= small1) {
+	scale = TRUE_;
+	if (temp <= small2) {
+
+/*        Scale up by power of radix nearest 1/SAFMIN**(2/3) */
+
+	    sclfac = sminv2;
+	    sclinv = small2;
+	} else {
+
+/*        Scale up by power of radix nearest 1/SAFMIN**(1/3) */
+
+	    sclfac = sminv1;
+	    sclinv = small1;
+	}
+
+/*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
+
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    dscale[i__ - 1] = d__[i__] * sclfac;
+	    zscale[i__ - 1] = z__[i__] * sclfac;
+/* L10: */
+	}
+	*tau *= sclfac;
+    } else {
+
+/*        Copy D and Z to DSCALE and ZSCALE */
+
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    dscale[i__ - 1] = d__[i__];
+	    zscale[i__ - 1] = z__[i__];
+/* L20: */
+	}
+    }
+
+    fc = 0.;
+    df = 0.;
+    ddf = 0.;
+    for (i__ = 1; i__ <= 3; ++i__) {
+	temp = 1. / (dscale[i__ - 1] - *tau);
+	temp1 = zscale[i__ - 1] * temp;
+	temp2 = temp1 * temp;
+	temp3 = temp2 * temp;
+	fc += temp1 / dscale[i__ - 1];
+	df += temp2;
+	ddf += temp3;
+/* L30: */
+    }
+    f = *finit + *tau * fc;
+
+    if (abs(f) <= 0.) {
+	goto L60;
+    }
+
+/*
+          Iteration begins
+
+       It is not hard to see that
+
+             1) Iterations will go up monotonically
+                if FINIT < 0;
+
+             2) Iterations will go down monotonically
+                if FINIT > 0.
+*/
+
+    iter = niter + 1;
+
+    for (niter = iter; niter <= 20; ++niter) {
+
+	if (*orgati) {
+	    temp1 = dscale[1] - *tau;
+	    temp2 = dscale[2] - *tau;
+	} else {
+	    temp1 = dscale[0] - *tau;
+	    temp2 = dscale[1] - *tau;
+	}
+	a = (temp1 + temp2) * f - temp1 * temp2 * df;
+	b = temp1 * temp2 * f;
+	c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
+/* Computing MAX */
+	d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
+	temp = max(d__1,d__2);
+	a /= temp;
+	b /= temp;
+	c__ /= temp;
+	if (c__ == 0.) {
+	    eta = b / a;
+	} else if (a <= 0.) {
+	    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
+		    * 2.);
+	} else {
+	    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
+		    );
+	}
+	if (f * eta >= 0.) {
+	    eta = -f / df;
+	}
+
+	temp = eta + *tau;
+	if (*orgati) {
+	    if (eta > 0. && temp >= dscale[2]) {
+		eta = (dscale[2] - *tau) / 2.;
+	    }
+	    if (eta < 0. && temp <= dscale[1]) {
+		eta = (dscale[1] - *tau) / 2.;
+	    }
+	} else {
+	    if (eta > 0. && temp >= dscale[1]) {
+		eta = (dscale[1] - *tau) / 2.;
+	    }
+	    if (eta < 0. && temp <= dscale[0]) {
+		eta = (dscale[0] - *tau) / 2.;
+	    }
+	}
+	*tau += eta;
+
+	fc = 0.;
+	erretm = 0.;
+	df = 0.;
+	ddf = 0.;
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    temp = 1. / (dscale[i__ - 1] - *tau);
+	    temp1 = zscale[i__ - 1] * temp;
+	    temp2 = temp1 * temp;
+	    temp3 = temp2 * temp;
+	    temp4 = temp1 / dscale[i__ - 1];
+	    fc += temp4;
+	    erretm += abs(temp4);
+	    df += temp2;
+	    ddf += temp3;
+/* L40: */
+	}
+	f = *finit + *tau * fc;
+	erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df;
+	if (abs(f) <= eps * erretm) {
+	    goto L60;
+	}
+/* L50: */
+    }
+    *info = 1;
+L60:
+
+/*     Undo scaling */
+
+    if (scale) {
+	*tau *= sclinv;
+    }
+    return 0;
+
+/*     End of DLAED6 */
+
+} /* dlaed6_ */
+
+/* Subroutine */ int dlaed7_(integer *icompq, integer *n, integer *qsiz,
+	integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__,
+	doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer
+	*cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer *
+	perm, integer *givptr, integer *givcol, doublereal *givnum,
+	doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer indxc, indxp;
+    extern /* Subroutine */ int dlaed8_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, integer *, integer *,
+	    doublereal *, integer *, integer *, integer *), dlaed9_(integer *,
+	     integer *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     integer *, integer *), dlaeda_(integer *, integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *, doublereal
+	    *, doublereal *, integer *, doublereal *, doublereal *, integer *)
+	    ;
+    static integer idlmda;
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *);
+    static integer coltyp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLAED7 computes the updated eigensystem of a diagonal
+    matrix after modification by a rank-one symmetric matrix. This
+    routine is used only for the eigenproblem which requires all
+    eigenvalues and optionally eigenvectors of a dense symmetric matrix
+    that has been reduced to tridiagonal form.  DLAED1 handles
+    the case in which all eigenvalues and eigenvectors of a symmetric
+    tridiagonal matrix are desired.
+
+      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+
+       where Z = Q'u, u is a vector of length N with ones in the
+       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+
+       The eigenvectors of the original matrix are stored in Q, and the
+       eigenvalues are in D.  The algorithm consists of three stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple eigenvalues or if there is a zero in
+          the Z vector.  For each such occurence the dimension of the
+          secular equation problem is reduced by one.  This stage is
+          performed by the routine DLAED8.
+
+          The second stage consists of calculating the updated
+          eigenvalues. This is done by finding the roots of the secular
+          equation via the routine DLAED4 (as called by DLAED9).
+          This routine also calculates the eigenvectors of the current
+          problem.
+
+          The final stage consists of computing the updated eigenvectors
+          directly using the updated eigenvalues.  The eigenvectors for
+          the current problem are multiplied with the eigenvectors from
+          the overall problem.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
+
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= CURLVL <= TLVLS.
+
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, the eigenvalues of the rank-1-perturbed matrix.
+           On exit, the eigenvalues of the repaired matrix.
+
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
+           On entry, the eigenvectors of the rank-1-perturbed matrix.
+           On exit, the eigenvectors of the repaired tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (output) INTEGER array, dimension (N)
+           The permutation which will reintegrate the subproblem just
+           solved back into sorted order, i.e., D( INDXQ( I = 1, N ) )
+           will be in ascending order.
+
+    RHO    (input) DOUBLE PRECISION
+           The subdiagonal element used to create the rank-1
+           modification.
+
+    CUTPNT (input) INTEGER
+           Contains the location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
+
+    QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1)
+           Stores eigenvectors of submatrices encountered during
+           divide and conquer, packed together. QPTR points to
+           beginning of the submatrices.
+
+    QPTR   (input/output) INTEGER array, dimension (N+2)
+           List of indices pointing to beginning of submatrices stored
+           in QSTORE. The submatrices are numbered starting at the
+           bottom left of the divide and conquer tree, from left to
+           right and bottom to top.
+
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and also the size of
+           the full, non-deflated problem.
+
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
+
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension (3*N+QSIZ*N)
+
+    IWORK  (workspace) INTEGER array, dimension (4*N)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --qstore;
+    --qptr;
+    --prmptr;
+    --perm;
+    --givptr;
+    givcol -= 3;
+    givnum -= 3;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*icompq < 0 || *icompq > 1) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*icompq == 1 && *qsiz < *n) {
+	*info = -4;
+    } else if (*ldq < max(1,*n)) {
+	*info = -9;
+    } else if (min(1,*n) > *cutpnt || *n < *cutpnt) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED7", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in DLAED8 and DLAED9.
+*/
+
+    if (*icompq == 1) {
+	ldq2 = *qsiz;
+    } else {
+	ldq2 = *n;
+    }
+
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq2 = iw + *n;
+    is = iq2 + *n * ldq2;
+
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
+
+/*
+       Form the z-vector which consists of the last row of Q_1 and the
+       first row of Q_2.
+*/
+
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = *tlvls - i__;
+	ptr += pow_ii(&c__2, &i__2);
+/* L10: */
+    }
+    curr = ptr + *curpbm;
+    dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
+	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz
+	    + *n], info);
+
+/*
+       When solving the final problem, we no longer need the stored data,
+       so we will overwrite the data from this level onto the previously
+       used storage space.
+*/
+
+    if (*curlvl == *tlvls) {
+	qptr[curr] = 1;
+	prmptr[curr] = 1;
+	givptr[curr] = 1;
+    }
+
+/*     Sort and Deflate eigenvalues. */
+
+    dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho,
+	    cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], &
+	    perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1)
+	     + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[
+	    indx], info);
+    prmptr[curr + 1] = prmptr[curr] + *n;
+    givptr[curr + 1] += givptr[curr];
+
+/*     Solve Secular Equation. */
+
+    if (k != 0) {
+	dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda],
+		&work[iw], &qstore[qptr[curr]], &k, info);
+	if (*info != 0) {
+	    goto L30;
+	}
+	if (*icompq == 1) {
+	    dgemm_("N", "N", qsiz, &k, &k, &c_b2453, &work[iq2], &ldq2, &
+		    qstore[qptr[curr]], &k, &c_b2467, &q[q_offset], ldq);
+	}
+/* Computing 2nd power */
+	i__1 = k;
+	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
+
+/*     Prepare the INDXQ sorting permutation. */
+
+	n1 = k;
+	n2 = *n - k;
+	dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+	qptr[curr + 1] = qptr[curr];
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    indxq[i__] = i__;
+/* L20: */
+	}
+    }
+
+L30:
+    return 0;
+
+/*     End of DLAED7 */
+
+} /* dlaed7_ */
+
+/* Subroutine */ int dlaed8_(integer *icompq, integer *k, integer *n, integer
+	*qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
+	doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda,
+	 doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer
+	*givptr, integer *givcol, doublereal *givnum, integer *indxp, integer
+	*indx, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal c__;
+    static integer i__, j;
+    static doublereal s, t;
+    static integer k2, n1, n2, jp, n1p1;
+    static doublereal eps, tau, tol;
+    static integer jlam, imax, jmax;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *), dscal_(
+	    integer *, doublereal *, doublereal *, integer *), dcopy_(integer
+	    *, doublereal *, integer *, doublereal *, integer *);
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLAED8 merges the two sets of eigenvalues together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    eigenvalues are close together or if there is a tiny element in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+
+    K      (output) INTEGER
+           The number of non-deflated eigenvalues, and the order of the
+           related secular equation.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, the eigenvalues of the two submatrices to be
+           combined.  On exit, the trailing (N-K) updated eigenvalues
+           (those which were deflated) sorted into increasing order.
+
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+           If ICOMPQ = 0, Q is not referenced.  Otherwise,
+           on entry, Q contains the eigenvectors of the partially solved
+           system which has been previously updated in matrix
+           multiplies with other partially solved eigensystems.
+           On exit, Q contains the trailing (N-K) updated eigenvectors
+           (those which were deflated) in its last N-K columns.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (input) INTEGER array, dimension (N)
+           The permutation which separately sorts the two sub-problems
+           in D into ascending order.  Note that elements in the second
+           half of this permutation must first have CUTPNT added to
+           their values in order to be accurate.
+
+    RHO    (input/output) DOUBLE PRECISION
+           On entry, the off-diagonal element associated with the rank-1
+           cut which originally split the two submatrices which are now
+           being recombined.
+           On exit, RHO has been modified to the value required by
+           DLAED3.
+
+    CUTPNT (input) INTEGER
+           The location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
+
+    Z      (input) DOUBLE PRECISION array, dimension (N)
+           On entry, Z contains the updating vector (the last row of
+           the first sub-eigenvector matrix and the first row of the
+           second sub-eigenvector matrix).
+           On exit, the contents of Z are destroyed by the updating
+           process.
+
+    DLAMDA (output) DOUBLE PRECISION array, dimension (N)
+           A copy of the first K eigenvalues which will be used by
+           DLAED3 to form the secular equation.
+
+    Q2     (output) DOUBLE PRECISION array, dimension (LDQ2,N)
+           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
+           a copy of the first K eigenvectors which will be used by
+           DLAED7 in a matrix multiply (DGEMM) to update the new
+           eigenvectors.
+
+    LDQ2   (input) INTEGER
+           The leading dimension of the array Q2.  LDQ2 >= max(1,N).
+
+    W      (output) DOUBLE PRECISION array, dimension (N)
+           The first k values of the final deflation-altered z-vector and
+           will be passed to DLAED3.
+
+    PERM   (output) INTEGER array, dimension (N)
+           The permutations (from deflation and sorting) to be applied
+           to each eigenblock.
+
+    GIVPTR (output) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem.
+
+    GIVCOL (output) INTEGER array, dimension (2, N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (output) DOUBLE PRECISION array, dimension (2, N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    INDXP  (workspace) INTEGER array, dimension (N)
+           The permutation used to place deflated values of D at the end
+           of the array.  INDXP(1:K) points to the nondeflated D-values
+           and INDXP(K+1:N) points to the deflated eigenvalues.
+
+    INDX   (workspace) INTEGER array, dimension (N)
+           The permutation used to sort the contents of D into ascending
+           order.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --z__;
+    --dlamda;
+    q2_dim1 = *ldq2;
+    q2_offset = 1 + q2_dim1;
+    q2 -= q2_offset;
+    --w;
+    --perm;
+    givcol -= 3;
+    givnum -= 3;
+    --indxp;
+    --indx;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*icompq < 0 || *icompq > 1) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*icompq == 1 && *qsiz < *n) {
+	*info = -4;
+    } else if (*ldq < max(1,*n)) {
+	*info = -7;
+    } else if (*cutpnt < min(1,*n) || *cutpnt > *n) {
+	*info = -10;
+    } else if (*ldq2 < max(1,*n)) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED8", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    n1 = *cutpnt;
+    n2 = *n - n1;
+    n1p1 = n1 + 1;
+
+    if (*rho < 0.) {
+	dscal_(&n2, &c_b2589, &z__[n1p1], &c__1);
+    }
+
+/*     Normalize z so that norm(z) = 1 */
+
+    t = 1. / sqrt(2.);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	indx[j] = j;
+/* L10: */
+    }
+    dscal_(n, &t, &z__[1], &c__1);
+    *rho = (d__1 = *rho * 2., abs(d__1));
+
+/*     Sort the eigenvalues into increasing order */
+
+    i__1 = *n;
+    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
+	indxq[i__] += *cutpnt;
+/* L20: */
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = d__[indxq[i__]];
+	w[i__] = z__[indxq[i__]];
+/* L30: */
+    }
+    i__ = 1;
+    j = *cutpnt + 1;
+    dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__[i__] = dlamda[indx[i__]];
+	z__[i__] = w[indx[i__]];
+/* L40: */
+    }
+
+/*     Calculate the allowable deflation tolerence */
+
+    imax = idamax_(n, &z__[1], &c__1);
+    jmax = idamax_(n, &d__[1], &c__1);
+    eps = EPSILON;
+    tol = eps * 8. * (d__1 = d__[jmax], abs(d__1));
+
+/*
+       If the rank-1 modifier is small enough, no more needs to be done
+       except to reorganize Q so that its columns correspond with the
+       elements in D.
+*/
+
+    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
+	*k = 0;
+	if (*icompq == 0) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		perm[j] = indxq[indx[j]];
+/* L50: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		perm[j] = indxq[indx[j]];
+		dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1
+			+ 1], &c__1);
+/* L60: */
+	    }
+	    dlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
+	}
+	return 0;
+    }
+
+/*
+       If there are multiple eigenvalues then the problem deflates.  Here
+       the number of equal eigenvalues are found.  As each equal
+       eigenvalue is found, an elementary reflector is computed to rotate
+       the corresponding eigensubspace so that the corresponding
+       components of Z are zero in this new basis.
+*/
+
+    *k = 0;
+    *givptr = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    indxp[k2] = j;
+	    if (j == *n) {
+		goto L110;
+	    }
+	} else {
+	    jlam = j;
+	    goto L80;
+	}
+/* L70: */
+    }
+L80:
+    ++j;
+    if (j > *n) {
+	goto L100;
+    }
+    if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	indxp[k2] = j;
+    } else {
+
+/*        Check if eigenvalues are close enough to allow deflation. */
+
+	s = z__[jlam];
+	c__ = z__[j];
+
+/*
+          Find sqrt(a**2+b**2) without overflow or
+          destructive underflow.
+*/
+
+	tau = dlapy2_(&c__, &s);
+	t = d__[j] - d__[jlam];
+	c__ /= tau;
+	s = -s / tau;
+	if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    z__[j] = tau;
+	    z__[jlam] = 0.;
+
+/*           Record the appropriate Givens rotation */
+
+	    ++(*givptr);
+	    givcol[(*givptr << 1) + 1] = indxq[indx[jlam]];
+	    givcol[(*givptr << 1) + 2] = indxq[indx[j]];
+	    givnum[(*givptr << 1) + 1] = c__;
+	    givnum[(*givptr << 1) + 2] = s;
+	    if (*icompq == 1) {
+		drot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[
+			indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
+	    }
+	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
+	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
+	    d__[jlam] = t;
+	    --k2;
+	    i__ = 1;
+L90:
+	    if (k2 + i__ <= *n) {
+		if (d__[jlam] < d__[indxp[k2 + i__]]) {
+		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+		    indxp[k2 + i__] = jlam;
+		    ++i__;
+		    goto L90;
+		} else {
+		    indxp[k2 + i__ - 1] = jlam;
+		}
+	    } else {
+		indxp[k2 + i__ - 1] = jlam;
+	    }
+	    jlam = j;
+	} else {
+	    ++(*k);
+	    w[*k] = z__[jlam];
+	    dlamda[*k] = d__[jlam];
+	    indxp[*k] = jlam;
+	    jlam = j;
+	}
+    }
+    goto L80;
+L100:
+
+/*     Record the last eigenvalue. */
+
+    ++(*k);
+    w[*k] = z__[jlam];
+    dlamda[*k] = d__[jlam];
+    indxp[*k] = jlam;
+
+L110:
+
+/*
+       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+       and Q2 respectively.  The eigenvalues/vectors which were not
+       deflated go into the first K slots of DLAMDA and Q2 respectively,
+       while those which were deflated go into the last N - K slots.
+*/
+
+    if (*icompq == 0) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    jp = indxp[j];
+	    dlamda[j] = d__[jp];
+	    perm[j] = indxq[indx[jp]];
+/* L120: */
+	}
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    jp = indxp[j];
+	    dlamda[j] = d__[jp];
+	    perm[j] = indxq[indx[jp]];
+	    dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
+		    , &c__1);
+/* L130: */
+	}
+    }
+
+/*
+       The deflated eigenvalues and their corresponding vectors go back
+       into the last N - K slots of D and Q respectively.
+*/
+
+    if (*k < *n) {
+	if (*icompq == 0) {
+	    i__1 = *n - *k;
+	    dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	} else {
+	    i__1 = *n - *k;
+	    dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	    i__1 = *n - *k;
+	    dlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*
+		    k + 1) * q_dim1 + 1], ldq);
+	}
+    }
+
+    return 0;
+
+/*     End of DLAED8 */
+
+} /* dlaed8_ */
+
+/* Subroutine */ int dlaed9_(integer *k, integer *kstart, integer *kstop,
+	integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal *
+	rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds,
+	integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlaed4_(integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLAED9 finds the roots of the secular equation, as defined by the
+    values in D, Z, and RHO, between KSTART and KSTOP.  It makes the
+    appropriate calls to DLAED4 and then stores the new matrix of
+    eigenvectors for use in calculating the next level of Z vectors.
+
+    Arguments
+    =========
+
+    K       (input) INTEGER
+            The number of terms in the rational function to be solved by
+            DLAED4.  K >= 0.
+
+    KSTART  (input) INTEGER
+    KSTOP   (input) INTEGER
+            The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP
+            are to be computed.  1 <= KSTART <= KSTOP <= K.
+
+    N       (input) INTEGER
+            The number of rows and columns in the Q matrix.
+            N >= K (delation may result in N > K).
+
+    D       (output) DOUBLE PRECISION array, dimension (N)
+            D(I) contains the updated eigenvalues
+            for KSTART <= I <= KSTOP.
+
+    Q       (workspace) DOUBLE PRECISION array, dimension (LDQ,N)
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max( 1, N ).
+
+    RHO     (input) DOUBLE PRECISION
+            The value of the parameter in the rank one update equation.
+            RHO >= 0 required.
+
+    DLAMDA  (input) DOUBLE PRECISION array, dimension (K)
+            The first K elements of this array contain the old roots
+            of the deflated updating problem.  These are the poles
+            of the secular equation.
+
+    W       (input) DOUBLE PRECISION array, dimension (K)
+            The first K elements of this array contain the components
+            of the deflation-adjusted updating vector.
+
+    S       (output) DOUBLE PRECISION array, dimension (LDS, K)
+            Will contain the eigenvectors of the repaired matrix which
+            will be stored for subsequent Z vector calculation and
+            multiplied by the previously accumulated eigenvectors
+            to update the system.
+
+    LDS     (input) INTEGER
+            The leading dimension of S.  LDS >= max( 1, K ).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dlamda;
+    --w;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1;
+    s -= s_offset;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*k < 0) {
+	*info = -1;
+    } else if (*kstart < 1 || *kstart > max(1,*k)) {
+	*info = -2;
+    } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) {
+	*info = -3;
+    } else if (*n < *k) {
+	*info = -4;
+    } else if (*ldq < max(1,*k)) {
+	*info = -7;
+    } else if (*lds < max(1,*k)) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED9", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 0) {
+	return 0;
+    }
+
+/*
+       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DLAMDA(I) if it is 1; this makes the subsequent
+       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DLAMDA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DLAMDA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
+/* L10: */
+    }
+
+    i__1 = *kstop;
+    for (j = *kstart; j <= i__1; ++j) {
+	dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
+		info);
+
+/*        If the zero finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    goto L120;
+	}
+/* L20: */
+    }
+
+    if (*k == 1 || *k == 2) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = *k;
+	    for (j = 1; j <= i__2; ++j) {
+		s[j + i__ * s_dim1] = q[j + i__ * q_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+	goto L120;
+    }
+
+/*     Compute updated W. */
+
+    dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1);
+
+/*     Initialize W(I) = Q(I,I) */
+
+    i__1 = *ldq + 1;
+    dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L50: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L60: */
+	}
+/* L70: */
+    }
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__1 = sqrt(-w[i__]);
+	w[i__] = d_sign(&d__1, &s[i__ + s_dim1]);
+/* L80: */
+    }
+
+/*     Compute eigenvectors of the modified rank-1 modification. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1];
+/* L90: */
+	}
+	temp = dnrm2_(k, &q[j * q_dim1 + 1], &c__1);
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp;
+/* L100: */
+	}
+/* L110: */
+    }
+
+L120:
+    return 0;
+
+/*     End of DLAED9 */
+
+} /* dlaed9_ */
+
+/* Subroutine */ int dlaeda_(integer *n, integer *tlvls, integer *curlvl,
+	integer *curpbm, integer *prmptr, integer *perm, integer *givptr,
+	integer *givcol, doublereal *givnum, doublereal *q, integer *qptr,
+	doublereal *z__, doublereal *ztemp, integer *info)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, k, mid, ptr;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer curr, bsiz1, bsiz2, psiz1, psiz2, zptr1;
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), dcopy_(integer *,
+	    doublereal *, integer *, doublereal *, integer *), xerbla_(char *,
+	     integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLAEDA computes the Z vector corresponding to the merge step in the
+    CURLVLth step of the merge process with TLVLS steps for the CURPBMth
+    problem.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
+
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= curlvl <= tlvls.
+
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
+
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and incidentally the
+           size of the full, non-deflated problem.
+
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
+
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    Q      (input) DOUBLE PRECISION array, dimension (N**2)
+           Contains the square eigenblocks from previous levels, the
+           starting positions for blocks are given by QPTR.
+
+    QPTR   (input) INTEGER array, dimension (N+2)
+           Contains a list of pointers which indicate where in Q an
+           eigenblock is stored.  SQRT( QPTR(i+1) - QPTR(i) ) indicates
+           the size of the block.
+
+    Z      (output) DOUBLE PRECISION array, dimension (N)
+           On output this vector contains the updating vector (the last
+           row of the first sub-eigenvector matrix and the first row of
+           the second sub-eigenvector matrix).
+
+    ZTEMP  (workspace) DOUBLE PRECISION array, dimension (N)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --ztemp;
+    --z__;
+    --qptr;
+    --q;
+    givnum -= 3;
+    givcol -= 3;
+    --givptr;
+    --perm;
+    --prmptr;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -1;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAEDA", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine location of first number in second half. */
+
+    mid = *n / 2 + 1;
+
+/*     Gather last/first rows of appropriate eigenblocks into center of Z */
+
+    ptr = 1;
+
+/*
+       Determine location of lowest level subproblem in the full storage
+       scheme
+*/
+
+    i__1 = *curlvl - 1;
+    curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
+
+/*
+       Determine size of these matrices.  We add HALF to the value of
+       the SQRT in case the machine underestimates one of these square
+       roots.
+*/
+
+    bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5);
+    bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])) +
+	    .5);
+    i__1 = mid - bsiz1 - 1;
+    for (k = 1; k <= i__1; ++k) {
+	z__[k] = 0.;
+/* L10: */
+    }
+    dcopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], &
+	    c__1);
+    dcopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
+    i__1 = *n;
+    for (k = mid + bsiz2; k <= i__1; ++k) {
+	z__[k] = 0.;
+/* L20: */
+    }
+
+/*
+       Loop thru remaining levels 1 -> CURLVL applying the Givens
+       rotations and permutation and then multiplying the center matrices
+       against the current Z.
+*/
+
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (k = 1; k <= i__1; ++k) {
+	i__2 = *curlvl - k;
+	i__3 = *curlvl - k - 1;
+	curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
+		1;
+	psiz1 = prmptr[curr + 1] - prmptr[curr];
+	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
+	zptr1 = mid - psiz1;
+
+/*       Apply Givens at CURR and CURR+1 */
+
+	i__2 = givptr[curr + 1] - 1;
+	for (i__ = givptr[curr]; i__ <= i__2; ++i__) {
+	    drot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, &
+		    z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[(
+		    i__ << 1) + 1], &givnum[(i__ << 1) + 2]);
+/* L30: */
+	}
+	i__2 = givptr[curr + 2] - 1;
+	for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) {
+	    drot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[
+		    mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ <<
+		    1) + 1], &givnum[(i__ << 1) + 2]);
+/* L40: */
+	}
+	psiz1 = prmptr[curr + 1] - prmptr[curr];
+	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
+	i__2 = psiz1 - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
+/* L50: */
+	}
+	i__2 = psiz2 - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] -
+		    1];
+/* L60: */
+	}
+
+/*
+          Multiply Blocks at CURR and CURR+1
+
+          Determine size of these matrices.  We add HALF to the value of
+          the SQRT in case the machine underestimates one of these
+          square roots.
+*/
+
+	bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) +
+		.5);
+	bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])
+		) + .5);
+	if (bsiz1 > 0) {
+	    dgemv_("T", &bsiz1, &bsiz1, &c_b2453, &q[qptr[curr]], &bsiz1, &
+		    ztemp[1], &c__1, &c_b2467, &z__[zptr1], &c__1);
+	}
+	i__2 = psiz1 - bsiz1;
+	dcopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
+	if (bsiz2 > 0) {
+	    dgemv_("T", &bsiz2, &bsiz2, &c_b2453, &q[qptr[curr + 1]], &bsiz2,
+		    &ztemp[psiz1 + 1], &c__1, &c_b2467, &z__[mid], &c__1);
+	}
+	i__2 = psiz2 - bsiz2;
+	dcopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], &
+		c__1);
+
+	i__2 = *tlvls - k;
+	ptr += pow_ii(&c__2, &i__2);
+/* L70: */
+    }
+
+    return 0;
+
+/*     End of DLAEDA */
+
+} /* dlaeda_ */
+
+/* Subroutine */ int dlaev2_(doublereal *a, doublereal *b, doublereal *c__,
+	doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1)
+{
+    /* System generated locals */
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal ab, df, cs, ct, tb, sm, tn, rt, adf, acs;
+    static integer sgn1, sgn2;
+    static doublereal acmn, acmx;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
+       [  A   B  ]
+       [  B   C  ].
+    On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
+    eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
+    eigenvector for RT1, giving the decomposition
+
+       [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
+       [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
+
+    Arguments
+    =========
+
+    A       (input) DOUBLE PRECISION
+            The (1,1) element of the 2-by-2 matrix.
+
+    B       (input) DOUBLE PRECISION
+            The (1,2) element and the conjugate of the (2,1) element of
+            the 2-by-2 matrix.
+
+    C       (input) DOUBLE PRECISION
+            The (2,2) element of the 2-by-2 matrix.
+
+    RT1     (output) DOUBLE PRECISION
+            The eigenvalue of larger absolute value.
+
+    RT2     (output) DOUBLE PRECISION
+            The eigenvalue of smaller absolute value.
+
+    CS1     (output) DOUBLE PRECISION
+    SN1     (output) DOUBLE PRECISION
+            The vector (CS1, SN1) is a unit right eigenvector for RT1.
+
+    Further Details
+    ===============
+
+    RT1 is accurate to a few ulps barring over/underflow.
+
+    RT2 may be inaccurate if there is massive cancellation in the
+    determinant A*C-B*B; higher precision or correctly rounded or
+    correctly truncated arithmetic would be needed to compute RT2
+    accurately in all cases.
+
+    CS1 and SN1 are accurate to a few ulps barring over/underflow.
+
+    Overflow is possible only if RT1 is within a factor of 5 of overflow.
+    Underflow is harmless if the input data is 0 or exceeds
+       underflow_threshold / macheps.
+
+   =====================================================================
+
+
+       Compute the eigenvalues
+*/
+
+    sm = *a + *c__;
+    df = *a - *c__;
+    adf = abs(df);
+    tb = *b + *b;
+    ab = abs(tb);
+    if (abs(*a) > abs(*c__)) {
+	acmx = *a;
+	acmn = *c__;
+    } else {
+	acmx = *c__;
+	acmn = *a;
+    }
+    if (adf > ab) {
+/* Computing 2nd power */
+	d__1 = ab / adf;
+	rt = adf * sqrt(d__1 * d__1 + 1.);
+    } else if (adf < ab) {
+/* Computing 2nd power */
+	d__1 = adf / ab;
+	rt = ab * sqrt(d__1 * d__1 + 1.);
+    } else {
+
+/*        Includes case AB=ADF=0 */
+
+	rt = ab * sqrt(2.);
+    }
+    if (sm < 0.) {
+	*rt1 = (sm - rt) * .5;
+	sgn1 = -1;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else if (sm > 0.) {
+	*rt1 = (sm + rt) * .5;
+	sgn1 = 1;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else {
+
+/*        Includes case RT1 = RT2 = 0 */
+
+	*rt1 = rt * .5;
+	*rt2 = rt * -.5;
+	sgn1 = 1;
+    }
+
+/*     Compute the eigenvector */
+
+    if (df >= 0.) {
+	cs = df + rt;
+	sgn2 = 1;
+    } else {
+	cs = df - rt;
+	sgn2 = -1;
+    }
+    acs = abs(cs);
+    if (acs > ab) {
+	ct = -tb / cs;
+	*sn1 = 1. / sqrt(ct * ct + 1.);
+	*cs1 = ct * *sn1;
+    } else {
+	if (ab == 0.) {
+	    *cs1 = 1.;
+	    *sn1 = 0.;
+	} else {
+	    tn = -cs / tb;
+	    *cs1 = 1. / sqrt(tn * tn + 1.);
+	    *sn1 = tn * *cs1;
+	}
+    }
+    if (sgn1 == sgn2) {
+	tn = *cs1;
+	*cs1 = -(*sn1);
+	*sn1 = tn;
+    }
+    return 0;
+
+/*     End of DLAEV2 */
+
+} /* dlaev2_ */
+
+/* Subroutine */ int dlahqr_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal
+	*wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__,
+	integer *ldz, integer *info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer i__, j, k, l, m;
+    static doublereal s, v[3];
+    static integer i1, i2;
+    static doublereal t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22,
+	    h33, h44;
+    static integer nh;
+    static doublereal cs;
+    static integer nr;
+    static doublereal sn;
+    static integer nz;
+    static doublereal ave, h33s, h44s;
+    static integer itn, its;
+    static doublereal ulp, sum, tst1, h43h34, disc, unfl, ovfl;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static doublereal work[1];
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlanv2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *), dlabad_(
+	    doublereal *, doublereal *);
+
+    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
+	     integer *, doublereal *);
+    extern doublereal dlanhs_(char *, integer *, doublereal *, integer *,
+	    doublereal *);
+    static doublereal smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLAHQR is an auxiliary routine called by DHSEQR to update the
+    eigenvalues and Schur decomposition already computed by DHSEQR, by
+    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+
+    Arguments
+    =========
+
+    WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
+
+    WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
+
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper quasi-triangular in
+            rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless
+            ILO = 1). DLAHQR works primarily with the Hessenberg
+            submatrix in rows and columns ILO to IHI, but applies
+            transformations to all of H if WANTT is .TRUE..
+            1 <= ILO <= max(1,IHI); IHI <= N.
+
+    H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if WANTT is .TRUE., H is upper quasi-triangular in
+            rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in
+            standard form. If WANTT is .FALSE., the contents of H are
+            unspecified on exit.
+
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
+
+    WR      (output) DOUBLE PRECISION array, dimension (N)
+    WI      (output) DOUBLE PRECISION array, dimension (N)
+            The real and imaginary parts, respectively, of the computed
+            eigenvalues ILO to IHI are stored in the corresponding
+            elements of WR and WI. If two eigenvalues are computed as a
+            complex conjugate pair, they are stored in consecutive
+            elements of WR and WI, say the i-th and (i+1)th, with
+            WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
+            eigenvalues are stored in the same order as on the diagonal
+            of the Schur form returned in H, with WR(i) = H(i,i), and, if
+            H(i:i+1,i:i+1) is a 2-by-2 diagonal block,
+            WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i).
+
+    ILOZ    (input) INTEGER
+    IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE..
+            1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
+
+    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+            If WANTZ is .TRUE., on entry Z must contain the current
+            matrix Z of transformations accumulated by DHSEQR, and on
+            exit Z has been updated; transformations are applied only to
+            the submatrix Z(ILOZ:IHIZ,ILO:IHI).
+            If WANTZ is .FALSE., Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z. LDZ >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            > 0: DLAHQR failed to compute all the eigenvalues ILO to IHI
+                 in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
+                 elements i+1:ihi of WR and WI contain those eigenvalues
+                 which have been successfully computed.
+
+    Further Details
+    ===============
+
+    2-96 Based on modifications by
+       David Day, Sandia National Laboratory, USA
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
+	wi[*ilo] = 0.;
+	return 0;
+    }
+
+    nh = *ihi - *ilo + 1;
+    nz = *ihiz - *iloz + 1;
+
+/*
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
+
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
+    smlnum = unfl * (nh / ulp);
+
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are set inside the main loop.
+*/
+
+    if (*wantt) {
+	i1 = 1;
+	i2 = *n;
+    }
+
+/*     ITN is the total number of QR iterations allowed. */
+
+    itn = nh * 30;
+
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of 1 or 2. Each iteration of the loop works
+       with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
+
+    i__ = *ihi;
+L10:
+    l = *ilo;
+    if (i__ < *ilo) {
+	goto L150;
+    }
+
+/*
+       Perform QR iterations on rows and columns ILO to I until a
+       submatrix of order 1 or 2 splits off at the bottom because a
+       subdiagonal element has become negligible.
+*/
+
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
+		     h__[k + k * h_dim1], abs(d__2));
+	    if (tst1 == 0.) {
+		i__3 = i__ - l + 1;
+		tst1 = dlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work);
+	    }
+/* Computing MAX */
+	    d__2 = ulp * tst1;
+	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2,
+		    smlnum)) {
+		goto L30;
+	    }
+/* L20: */
+	}
+L30:
+	l = k;
+	if (l > *ilo) {
+
+/*           H(L,L-1) is negligible */
+
+	    h__[l + (l - 1) * h_dim1] = 0.;
+	}
+
+/*        Exit from loop if a submatrix of order 1 or 2 has split off. */
+
+	if (l >= i__ - 1) {
+	    goto L140;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! (*wantt)) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if (its == 10 || its == 20) {
+
+/*           Exceptional shift. */
+
+	    s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 =
+		    h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2));
+	    h44 = s * .75 + h__[i__ + i__ * h_dim1];
+	    h33 = h44;
+	    h43h34 = s * -.4375 * s;
+	} else {
+
+/*
+             Prepare to use Francis' double shift
+             (i.e. 2nd degree generalized Rayleigh quotient)
+*/
+
+	    h44 = h__[i__ + i__ * h_dim1];
+	    h33 = h__[i__ - 1 + (i__ - 1) * h_dim1];
+	    h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ *
+		    h_dim1];
+	    s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) *
+		    h_dim1];
+	    disc = (h33 - h44) * .5;
+	    disc = disc * disc + h43h34;
+	    if (disc > 0.) {
+
+/*              Real roots: use Wilkinson's shift twice */
+
+		disc = sqrt(disc);
+		ave = (h33 + h44) * .5;
+		if (abs(h33) - abs(h44) > 0.) {
+		    h33 = h33 * h44 - h43h34;
+		    h44 = h33 / (d_sign(&disc, &ave) + ave);
+		} else {
+		    h44 = d_sign(&disc, &ave) + ave;
+		}
+		h33 = h44;
+		h43h34 = 0.;
+	    }
+	}
+
+/*        Look for two consecutive small subdiagonal elements. */
+
+	i__2 = l;
+	for (m = i__ - 2; m >= i__2; --m) {
+/*
+             Determine the effect of starting the double-shift QR
+             iteration at row M, and see if this would make H(M,M-1)
+             negligible.
+*/
+
+	    h11 = h__[m + m * h_dim1];
+	    h22 = h__[m + 1 + (m + 1) * h_dim1];
+	    h21 = h__[m + 1 + m * h_dim1];
+	    h12 = h__[m + (m + 1) * h_dim1];
+	    h44s = h44 - h11;
+	    h33s = h33 - h11;
+	    v1 = (h33s * h44s - h43h34) / h21 + h12;
+	    v2 = h22 - h11 - h33s - h44s;
+	    v3 = h__[m + 2 + (m + 1) * h_dim1];
+	    s = abs(v1) + abs(v2) + abs(v3);
+	    v1 /= s;
+	    v2 /= s;
+	    v3 /= s;
+	    v[0] = v1;
+	    v[1] = v2;
+	    v[2] = v3;
+	    if (m == l) {
+		goto L50;
+	    }
+	    h00 = h__[m - 1 + (m - 1) * h_dim1];
+	    h10 = h__[m + (m - 1) * h_dim1];
+	    tst1 = abs(v1) * (abs(h00) + abs(h11) + abs(h22));
+	    if (abs(h10) * (abs(v2) + abs(v3)) <= ulp * tst1) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+
+/*        Double-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = m; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix. NR is the order of G.
+
+   Computing MIN
+*/
+	    i__3 = 3, i__4 = i__ - k + 1;
+	    nr = min(i__3,i__4);
+	    if (k > m) {
+		dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    dlarfg_(&nr, v, &v[1], &c__1, &t1);
+	    if (k > m) {
+		h__[k + (k - 1) * h_dim1] = v[0];
+		h__[k + 1 + (k - 1) * h_dim1] = 0.;
+		if (k < i__ - 1) {
+		    h__[k + 2 + (k - 1) * h_dim1] = 0.;
+		}
+	    } else if (m > l) {
+		h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1];
+	    }
+	    v2 = v[1];
+	    t2 = t1 * v2;
+	    if (nr == 3) {
+		v3 = v[2];
+		t3 = t1 * v3;
+
+/*
+                Apply G from the left to transform the rows of the matrix
+                in columns K to I2.
+*/
+
+		i__3 = i2;
+		for (j = k; j <= i__3; ++j) {
+		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]
+			    + v3 * h__[k + 2 + j * h_dim1];
+		    h__[k + j * h_dim1] -= sum * t1;
+		    h__[k + 1 + j * h_dim1] -= sum * t2;
+		    h__[k + 2 + j * h_dim1] -= sum * t3;
+/* L60: */
+		}
+
+/*
+                Apply G from the right to transform the columns of the
+                matrix in rows I1 to min(K+3,I).
+
+   Computing MIN
+*/
+		i__4 = k + 3;
+		i__3 = min(i__4,i__);
+		for (j = i1; j <= i__3; ++j) {
+		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
+			     + v3 * h__[j + (k + 2) * h_dim1];
+		    h__[j + k * h_dim1] -= sum * t1;
+		    h__[j + (k + 1) * h_dim1] -= sum * t2;
+		    h__[j + (k + 2) * h_dim1] -= sum * t3;
+/* L70: */
+		}
+
+		if (*wantz) {
+
+/*                 Accumulate transformations in the matrix Z */
+
+		    i__3 = *ihiz;
+		    for (j = *iloz; j <= i__3; ++j) {
+			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
+				z_dim1] + v3 * z__[j + (k + 2) * z_dim1];
+			z__[j + k * z_dim1] -= sum * t1;
+			z__[j + (k + 1) * z_dim1] -= sum * t2;
+			z__[j + (k + 2) * z_dim1] -= sum * t3;
+/* L80: */
+		    }
+		}
+	    } else if (nr == 2) {
+
+/*
+                Apply G from the left to transform the rows of the matrix
+                in columns K to I2.
+*/
+
+		i__3 = i2;
+		for (j = k; j <= i__3; ++j) {
+		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1];
+		    h__[k + j * h_dim1] -= sum * t1;
+		    h__[k + 1 + j * h_dim1] -= sum * t2;
+/* L90: */
+		}
+
+/*
+                Apply G from the right to transform the columns of the
+                matrix in rows I1 to min(K+3,I).
+*/
+
+		i__3 = i__;
+		for (j = i1; j <= i__3; ++j) {
+		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
+			    ;
+		    h__[j + k * h_dim1] -= sum * t1;
+		    h__[j + (k + 1) * h_dim1] -= sum * t2;
+/* L100: */
+		}
+
+		if (*wantz) {
+
+/*                 Accumulate transformations in the matrix Z */
+
+		    i__3 = *ihiz;
+		    for (j = *iloz; j <= i__3; ++j) {
+			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
+				z_dim1];
+			z__[j + k * z_dim1] -= sum * t1;
+			z__[j + (k + 1) * z_dim1] -= sum * t2;
+/* L110: */
+		    }
+		}
+	    }
+/* L120: */
+	}
+
+/* L130: */
+    }
+
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
+    return 0;
+
+L140:
+
+    if (l == i__) {
+
+/*        H(I,I-1) is negligible: one eigenvalue has converged. */
+
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.;
+    } else if (l == i__ - 1) {
+
+/*
+          H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
+
+          Transform the 2-by-2 submatrix to standard Schur form,
+          and compute and store the eigenvalues.
+*/
+
+	dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ *
+		h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ *
+		h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs,
+		&sn);
+
+	if (*wantt) {
+
+/*           Apply the transformation to the rest of H. */
+
+	    if (i2 > i__) {
+		i__1 = i2 - i__;
+		drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[
+			i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn);
+	    }
+	    i__1 = i__ - i1 - 1;
+	    drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ *
+		     h_dim1], &c__1, &cs, &sn);
+	}
+	if (*wantz) {
+
+/*           Apply the transformation to Z. */
+
+	    drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz +
+		    i__ * z_dim1], &c__1, &cs, &sn);
+	}
+    }
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L10;
+
+L150:
+    return 0;
+
+/*     End of DLAHQR */
+
+} /* dlahqr_ */
+
+/* Subroutine */ int dlahrd_(integer *n, integer *k, integer *nb, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *t, integer *ldt,
+	doublereal *y, integer *ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer i__;
+    static doublereal ei;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dgemv_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *), dcopy_(integer *, doublereal *,
+	    integer *, doublereal *, integer *), daxpy_(integer *, doublereal
+	    *, doublereal *, integer *, doublereal *, integer *), dtrmv_(char
+	    *, char *, char *, integer *, doublereal *, integer *, doublereal
+	    *, integer *), dlarfg_(integer *,
+	    doublereal *, doublereal *, integer *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLAHRD reduces the first NB columns of a real general n-by-(n-k+1)
+    matrix A so that elements below the k-th subdiagonal are zero. The
+    reduction is performed by an orthogonal similarity transformation
+    Q' * A * Q. The routine returns the matrices V and T which determine
+    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+
+    This is an auxiliary routine called by DGEHRD.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.
+
+    K       (input) INTEGER
+            The offset for the reduction. Elements below the k-th
+            subdiagonal in the first NB columns are reduced to zero.
+
+    NB      (input) INTEGER
+            The number of columns to be reduced.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1)
+            On entry, the n-by-(n-k+1) general matrix A.
+            On exit, the elements on and above the k-th subdiagonal in
+            the first NB columns are overwritten with the corresponding
+            elements of the reduced matrix; the elements below the k-th
+            subdiagonal, with the array TAU, represent the matrix Q as a
+            product of elementary reflectors. The other columns of A are
+            unchanged. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (NB)
+            The scalar factors of the elementary reflectors. See Further
+            Details.
+
+    T       (output) DOUBLE PRECISION array, dimension (LDT,NB)
+            The upper triangular matrix T.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T.  LDT >= NB.
+
+    Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
+            The n-by-nb matrix Y.
+
+    LDY     (input) INTEGER
+            The leading dimension of the array Y. LDY >= N.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of nb elementary reflectors
+
+       Q = H(1) H(2) . . . H(nb).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
+    A(i+k+1:n,i), and tau in TAU(i).
+
+    The elements of the vectors v together form the (n-k+1)-by-nb matrix
+    V which is needed, with T and Y, to apply the transformation to the
+    unreduced part of the matrix, using an update of the form:
+    A := (I - V*T*V') * (A - Y*V').
+
+    The contents of A on exit are illustrated by the following example
+    with n = 7, k = 3 and nb = 2:
+
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( h   h   a   a   a )
+       ( v1  h   a   a   a )
+       ( v1  v2  a   a   a )
+       ( v1  v2  a   a   a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    --tau;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+
+    /* Function Body */
+    if (*n <= 1) {
+	return 0;
+    }
+
+    i__1 = *nb;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (i__ > 1) {
+
+/*
+             Update A(1:n,i)
+
+             Compute i-th column of A - Y * V'
+*/
+
+	    i__2 = i__ - 1;
+	    dgemv_("No transpose", n, &i__2, &c_b2589, &y[y_offset], ldy, &a[*
+		    k + i__ - 1 + a_dim1], lda, &c_b2453, &a[i__ * a_dim1 + 1]
+		    , &c__1);
+
+/*
+             Apply I - V * T' * V' to this column (call it b) from the
+             left, using the last column of T as workspace
+
+             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
+                      ( V2 )             ( b2 )
+
+             where V1 is unit lower triangular
+
+             w := V1' * b1
+*/
+
+	    i__2 = i__ - 1;
+	    dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
+		    1], &c__1);
+	    i__2 = i__ - 1;
+	    dtrmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1],
+		    lda, &t[*nb * t_dim1 + 1], &c__1);
+
+/*           w := w + V2'*b2 */
+
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[*k + i__ + a_dim1],
+		     lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2453, &t[*
+		    nb * t_dim1 + 1], &c__1);
+
+/*           w := T'*w */
+
+	    i__2 = i__ - 1;
+	    dtrmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		     &t[*nb * t_dim1 + 1], &c__1);
+
+/*           b2 := b2 - V2*w */
+
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[*k + i__ +
+		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b2453, &a[*
+		    k + i__ + i__ * a_dim1], &c__1);
+
+/*           b1 := b1 - V1*w */
+
+	    i__2 = i__ - 1;
+	    dtrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
+		    , lda, &t[*nb * t_dim1 + 1], &c__1);
+	    i__2 = i__ - 1;
+	    daxpy_(&i__2, &c_b2589, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 +
+		    i__ * a_dim1], &c__1);
+
+	    a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei;
+	}
+
+/*
+          Generate the elementary reflector H(i) to annihilate
+          A(k+i+1:n,i)
+*/
+
+	i__2 = *n - *k - i__ + 1;
+/* Computing MIN */
+	i__3 = *k + i__ + 1;
+	dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		a_dim1], &c__1, &tau[i__]);
+	ei = a[*k + i__ + i__ * a_dim1];
+	a[*k + i__ + i__ * a_dim1] = 1.;
+
+/*        Compute  Y(1:n,i) */
+
+	i__2 = *n - *k - i__ + 1;
+	dgemv_("No transpose", n, &i__2, &c_b2453, &a[(i__ + 1) * a_dim1 + 1],
+		 lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2467, &y[i__ *
+		y_dim1 + 1], &c__1);
+	i__2 = *n - *k - i__ + 1;
+	i__3 = i__ - 1;
+	dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[*k + i__ + a_dim1],
+		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2467, &t[i__ *
+		t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	dgemv_("No transpose", n, &i__2, &c_b2589, &y[y_offset], ldy, &t[i__ *
+		 t_dim1 + 1], &c__1, &c_b2453, &y[i__ * y_dim1 + 1], &c__1);
+	dscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+
+/*        Compute T(1:i,i) */
+
+	i__2 = i__ - 1;
+	d__1 = -tau[i__];
+	dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		&t[i__ * t_dim1 + 1], &c__1)
+		;
+	t[i__ + i__ * t_dim1] = tau[i__];
+
+/* L10: */
+    }
+    a[*k + *nb + *nb * a_dim1] = ei;
+
+    return 0;
+
+/*     End of DLAHRD */
+
+} /* dlahrd_ */
+
+/* Subroutine */ int dlaln2_(logical *ltrans, integer *na, integer *nw,
+	doublereal *smin, doublereal *ca, doublereal *a, integer *lda,
+	doublereal *d1, doublereal *d2, doublereal *b, integer *ldb,
+	doublereal *wr, doublereal *wi, doublereal *x, integer *ldx,
+	doublereal *scale, doublereal *xnorm, integer *info)
+{
+    /* Initialized data */
+
+    static logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
+    static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
+    static integer ipivot[16]	/* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2,
+	    4,3,2,1 };
+
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
+    static doublereal equiv_0[4], equiv_1[4];
+
+    /* Local variables */
+    static integer j;
+#define ci (equiv_0)
+#define cr (equiv_1)
+    static doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22,
+	    cr21, cr22, li21, csi, ui11, lr21, ui12, ui22;
+#define civ (equiv_0)
+    static doublereal csr, ur11, ur12, ur22;
+#define crv (equiv_1)
+    static doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs;
+    static integer icmax;
+    static doublereal bnorm, cnorm, smini;
+
+    extern /* Subroutine */ int dladiv_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *);
+    static doublereal bignum, smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLALN2 solves a system of the form  (ca A - w D ) X = s B
+    or (ca A' - w D) X = s B   with possible scaling ("s") and
+    perturbation of A.  (A' means A-transpose.)
+
+    A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
+    real diagonal matrix, w is a real or complex value, and X and B are
+    NA x 1 matrices -- real if w is real, complex if w is complex.  NA
+    may be 1 or 2.
+
+    If w is complex, X and B are represented as NA x 2 matrices,
+    the first column of each being the real part and the second
+    being the imaginary part.
+
+    "s" is a scaling factor (.LE. 1), computed by DLALN2, which is
+    so chosen that X can be computed without overflow.  X is further
+    scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
+    than overflow.
+
+    If both singular values of (ca A - w D) are less than SMIN,
+    SMIN*identity will be used instead of (ca A - w D).  If only one
+    singular value is less than SMIN, one element of (ca A - w D) will be
+    perturbed enough to make the smallest singular value roughly SMIN.
+    If both singular values are at least SMIN, (ca A - w D) will not be
+    perturbed.  In any case, the perturbation will be at most some small
+    multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
+    are computed by infinity-norm approximations, and thus will only be
+    correct to a factor of 2 or so.
+
+    Note: all input quantities are assumed to be smaller than overflow
+    by a reasonable factor.  (See BIGNUM.)
+
+    Arguments
+    ==========
+
+    LTRANS  (input) LOGICAL
+            =.TRUE.:  A-transpose will be used.
+            =.FALSE.: A will be used (not transposed.)
+
+    NA      (input) INTEGER
+            The size of the matrix A.  It may (only) be 1 or 2.
+
+    NW      (input) INTEGER
+            1 if "w" is real, 2 if "w" is complex.  It may only be 1
+            or 2.
+
+    SMIN    (input) DOUBLE PRECISION
+            The desired lower bound on the singular values of A.  This
+            should be a safe distance away from underflow or overflow,
+            say, between (underflow/machine precision) and  (machine
+            precision * overflow ).  (See BIGNUM and ULP.)
+
+    CA      (input) DOUBLE PRECISION
+            The coefficient c, which A is multiplied by.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,NA)
+            The NA x NA matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of A.  It must be at least NA.
+
+    D1      (input) DOUBLE PRECISION
+            The 1,1 element in the diagonal matrix D.
+
+    D2      (input) DOUBLE PRECISION
+            The 2,2 element in the diagonal matrix D.  Not used if NW=1.
+
+    B       (input) DOUBLE PRECISION array, dimension (LDB,NW)
+            The NA x NW matrix B (right-hand side).  If NW=2 ("w" is
+            complex), column 1 contains the real part of B and column 2
+            contains the imaginary part.
+
+    LDB     (input) INTEGER
+            The leading dimension of B.  It must be at least NA.
+
+    WR      (input) DOUBLE PRECISION
+            The real part of the scalar "w".
+
+    WI      (input) DOUBLE PRECISION
+            The imaginary part of the scalar "w".  Not used if NW=1.
+
+    X       (output) DOUBLE PRECISION array, dimension (LDX,NW)
+            The NA x NW matrix X (unknowns), as computed by DLALN2.
+            If NW=2 ("w" is complex), on exit, column 1 will contain
+            the real part of X and column 2 will contain the imaginary
+            part.
+
+    LDX     (input) INTEGER
+            The leading dimension of X.  It must be at least NA.
+
+    SCALE   (output) DOUBLE PRECISION
+            The scale factor that B must be multiplied by to insure
+            that overflow does not occur when computing X.  Thus,
+            (ca A - w D) X  will be SCALE*B, not B (ignoring
+            perturbations of A.)  It will be at most 1.
+
+    XNORM   (output) DOUBLE PRECISION
+            The infinity-norm of X, when X is regarded as an NA x NW
+            real matrix.
+
+    INFO    (output) INTEGER
+            An error flag.  It will be set to zero if no error occurs,
+            a negative number if an argument is in error, or a positive
+            number if  ca A - w D  had to be perturbed.
+            The possible values are:
+            = 0: No error occurred, and (ca A - w D) did not have to be
+                   perturbed.
+            = 1: (ca A - w D) had to be perturbed to make its smallest
+                 (or only) singular value greater than SMIN.
+            NOTE: In the interests of speed, this routine does not
+                  check the inputs for errors.
+
+   =====================================================================
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+
+    /* Function Body */
+
+/*     Compute BIGNUM */
+
+    smlnum = 2. * SAFEMINIMUM;
+    bignum = 1. / smlnum;
+    smini = max(*smin,smlnum);
+
+/*     Don't check for input errors */
+
+    *info = 0;
+
+/*     Standard Initializations */
+
+    *scale = 1.;
+
+    if (*na == 1) {
+
+/*        1 x 1  (i.e., scalar) system   C X = B */
+
+	if (*nw == 1) {
+
+/*
+             Real 1x1 system.
+
+             C = ca A - w D
+*/
+
+	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
+	    cnorm = abs(csr);
+
+/*           If | C | < SMINI, use C = SMINI */
+
+	    if (cnorm < smini) {
+		csr = smini;
+		cnorm = smini;
+		*info = 1;
+	    }
+
+/*           Check scaling for  X = B / C */
+
+	    bnorm = (d__1 = b[b_dim1 + 1], abs(d__1));
+	    if (cnorm < 1. && bnorm > 1.) {
+		if (bnorm > bignum * cnorm) {
+		    *scale = 1. / bnorm;
+		}
+	    }
+
+/*           Compute X */
+
+	    x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr;
+	    *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1));
+	} else {
+
+/*
+             Complex 1x1 system (w is complex)
+
+             C = ca A - w D
+*/
+
+	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
+	    csi = -(*wi) * *d1;
+	    cnorm = abs(csr) + abs(csi);
+
+/*           If | C | < SMINI, use C = SMINI */
+
+	    if (cnorm < smini) {
+		csr = smini;
+		csi = 0.;
+		cnorm = smini;
+		*info = 1;
+	    }
+
+/*           Check scaling for  X = B / C */
+
+	    bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1 <<
+		    1) + 1], abs(d__2));
+	    if (cnorm < 1. && bnorm > 1.) {
+		if (bnorm > bignum * cnorm) {
+		    *scale = 1. / bnorm;
+		}
+	    }
+
+/*           Compute X */
+
+	    d__1 = *scale * b[b_dim1 + 1];
+	    d__2 = *scale * b[(b_dim1 << 1) + 1];
+	    dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1)
+		     + 1]);
+	    *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 <<
+		    1) + 1], abs(d__2));
+	}
+
+    } else {
+
+/*
+          2x2 System
+
+          Compute the real part of  C = ca A - w D  (or  ca A' - w D )
+*/
+
+	cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1;
+	cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2;
+	if (*ltrans) {
+	    cr[2] = *ca * a[a_dim1 + 2];
+	    cr[1] = *ca * a[(a_dim1 << 1) + 1];
+	} else {
+	    cr[1] = *ca * a[a_dim1 + 2];
+	    cr[2] = *ca * a[(a_dim1 << 1) + 1];
+	}
+
+	if (*nw == 1) {
+
+/*
+             Real 2x2 system  (w is real)
+
+             Find the largest element in C
+*/
+
+	    cmax = 0.;
+	    icmax = 0;
+
+	    for (j = 1; j <= 4; ++j) {
+		if ((d__1 = crv[j - 1], abs(d__1)) > cmax) {
+		    cmax = (d__1 = crv[j - 1], abs(d__1));
+		    icmax = j;
+		}
+/* L10: */
+	    }
+
+/*           If norm(C) < SMINI, use SMINI*identity. */
+
+	    if (cmax < smini) {
+/* Computing MAX */
+		d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[
+			b_dim1 + 2], abs(d__2));
+		bnorm = max(d__3,d__4);
+		if (smini < 1. && bnorm > 1.) {
+		    if (bnorm > bignum * smini) {
+			*scale = 1. / bnorm;
+		    }
+		}
+		temp = *scale / smini;
+		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
+		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
+		*xnorm = temp * bnorm;
+		*info = 1;
+		return 0;
+	    }
+
+/*           Gaussian elimination with complete pivoting. */
+
+	    ur11 = crv[icmax - 1];
+	    cr21 = crv[ipivot[(icmax << 2) - 3] - 1];
+	    ur12 = crv[ipivot[(icmax << 2) - 2] - 1];
+	    cr22 = crv[ipivot[(icmax << 2) - 1] - 1];
+	    ur11r = 1. / ur11;
+	    lr21 = ur11r * cr21;
+	    ur22 = cr22 - ur12 * lr21;
+
+/*           If smaller pivot < SMINI, use SMINI */
+
+	    if (abs(ur22) < smini) {
+		ur22 = smini;
+		*info = 1;
+	    }
+	    if (rswap[icmax - 1]) {
+		br1 = b[b_dim1 + 2];
+		br2 = b[b_dim1 + 1];
+	    } else {
+		br1 = b[b_dim1 + 1];
+		br2 = b[b_dim1 + 2];
+	    }
+	    br2 -= lr21 * br1;
+/* Computing MAX */
+	    d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2);
+	    bbnd = max(d__2,d__3);
+	    if (bbnd > 1. && abs(ur22) < 1.) {
+		if (bbnd >= bignum * abs(ur22)) {
+		    *scale = 1. / bbnd;
+		}
+	    }
+
+	    xr2 = br2 * *scale / ur22;
+	    xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12);
+	    if (zswap[icmax - 1]) {
+		x[x_dim1 + 1] = xr2;
+		x[x_dim1 + 2] = xr1;
+	    } else {
+		x[x_dim1 + 1] = xr1;
+		x[x_dim1 + 2] = xr2;
+	    }
+/* Computing MAX */
+	    d__1 = abs(xr1), d__2 = abs(xr2);
+	    *xnorm = max(d__1,d__2);
+
+/*           Further scaling if  norm(A) norm(X) > overflow */
+
+	    if (*xnorm > 1. && cmax > 1.) {
+		if (*xnorm > bignum / cmax) {
+		    temp = cmax / bignum;
+		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
+		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
+		    *xnorm = temp * *xnorm;
+		    *scale = temp * *scale;
+		}
+	    }
+	} else {
+
+/*
+             Complex 2x2 system  (w is complex)
+
+             Find the largest element in C
+*/
+
+	    ci[0] = -(*wi) * *d1;
+	    ci[1] = 0.;
+	    ci[2] = 0.;
+	    ci[3] = -(*wi) * *d2;
+	    cmax = 0.;
+	    icmax = 0;
+
+	    for (j = 1; j <= 4; ++j) {
+		if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs(
+			d__2)) > cmax) {
+		    cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1]
+			    , abs(d__2));
+		    icmax = j;
+		}
+/* L20: */
+	    }
+
+/*           If norm(C) < SMINI, use SMINI*identity. */
+
+	    if (cmax < smini) {
+/* Computing MAX */
+		d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[(b_dim1
+			<< 1) + 1], abs(d__2)), d__6 = (d__3 = b[b_dim1 + 2],
+			abs(d__3)) + (d__4 = b[(b_dim1 << 1) + 2], abs(d__4));
+		bnorm = max(d__5,d__6);
+		if (smini < 1. && bnorm > 1.) {
+		    if (bnorm > bignum * smini) {
+			*scale = 1. / bnorm;
+		    }
+		}
+		temp = *scale / smini;
+		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
+		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
+		x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1];
+		x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2];
+		*xnorm = temp * bnorm;
+		*info = 1;
+		return 0;
+	    }
+
+/*           Gaussian elimination with complete pivoting. */
+
+	    ur11 = crv[icmax - 1];
+	    ui11 = civ[icmax - 1];
+	    cr21 = crv[ipivot[(icmax << 2) - 3] - 1];
+	    ci21 = civ[ipivot[(icmax << 2) - 3] - 1];
+	    ur12 = crv[ipivot[(icmax << 2) - 2] - 1];
+	    ui12 = civ[ipivot[(icmax << 2) - 2] - 1];
+	    cr22 = crv[ipivot[(icmax << 2) - 1] - 1];
+	    ci22 = civ[ipivot[(icmax << 2) - 1] - 1];
+	    if (icmax == 1 || icmax == 4) {
+
+/*              Code when off-diagonals of pivoted C are real */
+
+		if (abs(ur11) > abs(ui11)) {
+		    temp = ui11 / ur11;
+/* Computing 2nd power */
+		    d__1 = temp;
+		    ur11r = 1. / (ur11 * (d__1 * d__1 + 1.));
+		    ui11r = -temp * ur11r;
+		} else {
+		    temp = ur11 / ui11;
+/* Computing 2nd power */
+		    d__1 = temp;
+		    ui11r = -1. / (ui11 * (d__1 * d__1 + 1.));
+		    ur11r = -temp * ui11r;
+		}
+		lr21 = cr21 * ur11r;
+		li21 = cr21 * ui11r;
+		ur12s = ur12 * ur11r;
+		ui12s = ur12 * ui11r;
+		ur22 = cr22 - ur12 * lr21;
+		ui22 = ci22 - ur12 * li21;
+	    } else {
+
+/*              Code when diagonals of pivoted C are real */
+
+		ur11r = 1. / ur11;
+		ui11r = 0.;
+		lr21 = cr21 * ur11r;
+		li21 = ci21 * ur11r;
+		ur12s = ur12 * ur11r;
+		ui12s = ui12 * ur11r;
+		ur22 = cr22 - ur12 * lr21 + ui12 * li21;
+		ui22 = -ur12 * li21 - ui12 * lr21;
+	    }
+	    u22abs = abs(ur22) + abs(ui22);
+
+/*           If smaller pivot < SMINI, use SMINI */
+
+	    if (u22abs < smini) {
+		ur22 = smini;
+		ui22 = 0.;
+		*info = 1;
+	    }
+	    if (rswap[icmax - 1]) {
+		br2 = b[b_dim1 + 1];
+		br1 = b[b_dim1 + 2];
+		bi2 = b[(b_dim1 << 1) + 1];
+		bi1 = b[(b_dim1 << 1) + 2];
+	    } else {
+		br1 = b[b_dim1 + 1];
+		br2 = b[b_dim1 + 2];
+		bi1 = b[(b_dim1 << 1) + 1];
+		bi2 = b[(b_dim1 << 1) + 2];
+	    }
+	    br2 = br2 - lr21 * br1 + li21 * bi1;
+	    bi2 = bi2 - li21 * br1 - lr21 * bi1;
+/* Computing MAX */
+	    d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r))
+		    ), d__2 = abs(br2) + abs(bi2);
+	    bbnd = max(d__1,d__2);
+	    if (bbnd > 1. && u22abs < 1.) {
+		if (bbnd >= bignum * u22abs) {
+		    *scale = 1. / bbnd;
+		    br1 = *scale * br1;
+		    bi1 = *scale * bi1;
+		    br2 = *scale * br2;
+		    bi2 = *scale * bi2;
+		}
+	    }
+
+	    dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2);
+	    xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2;
+	    xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2;
+	    if (zswap[icmax - 1]) {
+		x[x_dim1 + 1] = xr2;
+		x[x_dim1 + 2] = xr1;
+		x[(x_dim1 << 1) + 1] = xi2;
+		x[(x_dim1 << 1) + 2] = xi1;
+	    } else {
+		x[x_dim1 + 1] = xr1;
+		x[x_dim1 + 2] = xr2;
+		x[(x_dim1 << 1) + 1] = xi1;
+		x[(x_dim1 << 1) + 2] = xi2;
+	    }
+/* Computing MAX */
+	    d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2);
+	    *xnorm = max(d__1,d__2);
+
+/*           Further scaling if  norm(A) norm(X) > overflow */
+
+	    if (*xnorm > 1. && cmax > 1.) {
+		if (*xnorm > bignum / cmax) {
+		    temp = cmax / bignum;
+		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
+		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
+		    x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1];
+		    x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2];
+		    *xnorm = temp * *xnorm;
+		    *scale = temp * *scale;
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DLALN2 */
+
+} /* dlaln2_ */
+
+#undef crv
+#undef civ
+#undef cr
+#undef ci
+
+
+/* Subroutine */ int dlals0_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal
+	*bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol,
+	integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *
+	poles, doublereal *difl, doublereal *difr, doublereal *z__, integer *
+	k, doublereal *c__, doublereal *s, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset,
+	    difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1,
+	    poles_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer i__, j, m, n;
+    static doublereal dj;
+    static integer nlp1;
+    static doublereal temp;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    static doublereal diflj, difrj, dsigj;
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), dcopy_(integer *,
+	    doublereal *, integer *, doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlacpy_(char *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, integer *),
+	    xerbla_(char *, integer *);
+    static doublereal dsigjp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 1, 1999
+
+
+    Purpose
+    =======
+
+    DLALS0 applies back the multiplying factors of either the left or the
+    right singular vector matrix of a diagonal matrix appended by a row
+    to the right hand side matrix B in solving the least squares problem
+    using the divide-and-conquer SVD approach.
+
+    For the left singular vector matrix, three types of orthogonal
+    matrices are involved:
+
+    (1L) Givens rotations: the number of such rotations is GIVPTR; the
+         pairs of columns/rows they were applied to are stored in GIVCOL;
+         and the C- and S-values of these rotations are stored in GIVNUM.
+
+    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
+         row, and for J=2:N, PERM(J)-th row of B is to be moved to the
+         J-th row.
+
+    (3L) The left singular vector matrix of the remaining matrix.
+
+    For the right singular vector matrix, four types of orthogonal
+    matrices are involved:
+
+    (1R) The right singular vector matrix of the remaining matrix.
+
+    (2R) If SQRE = 1, one extra Givens rotation to generate the right
+         null space.
+
+    (3R) The inverse transformation of (2L).
+
+    (4R) The inverse transformation of (1L).
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether singular vectors are to be computed in
+           factored form:
+           = 0: Left singular vector matrix.
+           = 1: Right singular vector matrix.
+
+    NL     (input) INTEGER
+           The row dimension of the upper block. NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block. NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has row dimension N = NL + NR + 1,
+           and column dimension M = N + SQRE.
+
+    NRHS   (input) INTEGER
+           The number of columns of B and BX. NRHS must be at least 1.
+
+    B      (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS )
+           On input, B contains the right hand sides of the least
+           squares problem in rows 1 through M. On output, B contains
+           the solution X in rows 1 through N.
+
+    LDB    (input) INTEGER
+           The leading dimension of B. LDB must be at least
+           max(1,MAX( M, N ) ).
+
+    BX     (workspace) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
+
+    LDBX   (input) INTEGER
+           The leading dimension of BX.
+
+    PERM   (input) INTEGER array, dimension ( N )
+           The permutations (from deflation and sorting) applied
+           to the two blocks.
+
+    GIVPTR (input) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem.
+
+    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
+           Each pair of numbers indicates a pair of rows/columns
+           involved in a Givens rotation.
+
+    LDGCOL (input) INTEGER
+           The leading dimension of GIVCOL, must be at least N.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           Each number indicates the C or S value used in the
+           corresponding Givens rotation.
+
+    LDGNUM (input) INTEGER
+           The leading dimension of arrays DIFR, POLES and
+           GIVNUM, must be at least K.
+
+    POLES  (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           On entry, POLES(1:K, 1) contains the new singular
+           values obtained from solving the secular equation, and
+           POLES(1:K, 2) is an array containing the poles in the secular
+           equation.
+
+    DIFL   (input) DOUBLE PRECISION array, dimension ( K ).
+           On entry, DIFL(I) is the distance between I-th updated
+           (undeflated) singular value and the I-th (undeflated) old
+           singular value.
+
+    DIFR   (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ).
+           On entry, DIFR(I, 1) contains the distances between I-th
+           updated (undeflated) singular value and the I+1-th
+           (undeflated) old singular value. And DIFR(I, 2) is the
+           normalizing factor for the I-th right singular vector.
+
+    Z      (input) DOUBLE PRECISION array, dimension ( K )
+           Contain the components of the deflation-adjusted updating row
+           vector.
+
+    K      (input) INTEGER
+           Contains the dimension of the non-deflated matrix,
+           This is the order of the related secular equation. 1 <= K <=N.
+
+    C      (input) DOUBLE PRECISION
+           C contains garbage if SQRE =0 and the C-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    S      (input) DOUBLE PRECISION
+           S contains garbage if SQRE =0 and the S-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension ( K )
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    bx_dim1 = *ldbx;
+    bx_offset = 1 + bx_dim1;
+    bx -= bx_offset;
+    --perm;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    difr_dim1 = *ldgnum;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    poles_dim1 = *ldgnum;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    givnum_dim1 = *ldgnum;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    --difl;
+    --z__;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*icompq < 0 || *icompq > 1) {
+	*info = -1;
+    } else if (*nl < 1) {
+	*info = -2;
+    } else if (*nr < 1) {
+	*info = -3;
+    } else if (*sqre < 0 || *sqre > 1) {
+	*info = -4;
+    }
+
+    n = *nl + *nr + 1;
+
+    if (*nrhs < 1) {
+	*info = -5;
+    } else if (*ldb < n) {
+	*info = -7;
+    } else if (*ldbx < n) {
+	*info = -9;
+    } else if (*givptr < 0) {
+	*info = -11;
+    } else if (*ldgcol < n) {
+	*info = -13;
+    } else if (*ldgnum < n) {
+	*info = -15;
+    } else if (*k < 1) {
+	*info = -20;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLALS0", &i__1);
+	return 0;
+    }
+
+    m = n + *sqre;
+    nlp1 = *nl + 1;
+
+    if (*icompq == 0) {
+
+/*
+          Apply back orthogonal transformations from the left.
+
+          Step (1L): apply back the Givens rotations performed.
+*/
+
+	i__1 = *givptr;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
+		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ +
+		    (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]);
+/* L10: */
+	}
+
+/*        Step (2L): permute rows of B. */
+
+	dcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
+	i__1 = n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    dcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1],
+		    ldbx);
+/* L20: */
+	}
+
+/*
+          Step (3L): apply the inverse of the left singular vector
+          matrix to BX.
+*/
+
+	if (*k == 1) {
+	    dcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
+	    if (z__[1] < 0.) {
+		dscal_(nrhs, &c_b2589, &b[b_offset], ldb);
+	    }
+	} else {
+	    i__1 = *k;
+	    for (j = 1; j <= i__1; ++j) {
+		diflj = difl[j];
+		dj = poles[j + poles_dim1];
+		dsigj = -poles[j + (poles_dim1 << 1)];
+		if (j < *k) {
+		    difrj = -difr[j + difr_dim1];
+		    dsigjp = -poles[j + 1 + (poles_dim1 << 1)];
+		}
+		if (z__[j] == 0. || poles[j + (poles_dim1 << 1)] == 0.) {
+		    work[j] = 0.;
+		} else {
+		    work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj /
+			     (poles[j + (poles_dim1 << 1)] + dj);
+		}
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] ==
+			    0.) {
+			work[i__] = 0.;
+		    } else {
+			work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__]
+				/ (dlamc3_(&poles[i__ + (poles_dim1 << 1)], &
+				dsigj) - diflj) / (poles[i__ + (poles_dim1 <<
+				1)] + dj);
+		    }
+/* L30: */
+		}
+		i__2 = *k;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    if (z__[i__] == 0. || poles[i__ + (poles_dim1 << 1)] ==
+			    0.) {
+			work[i__] = 0.;
+		    } else {
+			work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__]
+				/ (dlamc3_(&poles[i__ + (poles_dim1 << 1)], &
+				dsigjp) + difrj) / (poles[i__ + (poles_dim1 <<
+				 1)] + dj);
+		    }
+/* L40: */
+		}
+		work[1] = -1.;
+		temp = dnrm2_(k, &work[1], &c__1);
+		dgemv_("T", k, nrhs, &c_b2453, &bx[bx_offset], ldbx, &work[1],
+			 &c__1, &c_b2467, &b[j + b_dim1], ldb);
+		dlascl_("G", &c__0, &c__0, &temp, &c_b2453, &c__1, nrhs, &b[j
+			+ b_dim1], ldb, info);
+/* L50: */
+	    }
+	}
+
+/*        Move the deflated rows of BX to B also. */
+
+	if (*k < max(m,n)) {
+	    i__1 = n - *k;
+	    dlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1
+		    + b_dim1], ldb);
+	}
+    } else {
+
+/*
+          Apply back the right orthogonal transformations.
+
+          Step (1R): apply back the new right singular vector matrix
+          to B.
+*/
+
+	if (*k == 1) {
+	    dcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
+	} else {
+	    i__1 = *k;
+	    for (j = 1; j <= i__1; ++j) {
+		dsigj = poles[j + (poles_dim1 << 1)];
+		if (z__[j] == 0.) {
+		    work[j] = 0.;
+		} else {
+		    work[j] = -z__[j] / difl[j] / (dsigj + poles[j +
+			    poles_dim1]) / difr[j + (difr_dim1 << 1)];
+		}
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if (z__[j] == 0.) {
+			work[i__] = 0.;
+		    } else {
+			d__1 = -poles[i__ + 1 + (poles_dim1 << 1)];
+			work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[
+				i__ + difr_dim1]) / (dsigj + poles[i__ +
+				poles_dim1]) / difr[i__ + (difr_dim1 << 1)];
+		    }
+/* L60: */
+		}
+		i__2 = *k;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    if (z__[j] == 0.) {
+			work[i__] = 0.;
+		    } else {
+			d__1 = -poles[i__ + (poles_dim1 << 1)];
+			work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[
+				i__]) / (dsigj + poles[i__ + poles_dim1]) /
+				difr[i__ + (difr_dim1 << 1)];
+		    }
+/* L70: */
+		}
+		dgemv_("T", k, nrhs, &c_b2453, &b[b_offset], ldb, &work[1], &
+			c__1, &c_b2467, &bx[j + bx_dim1], ldbx);
+/* L80: */
+	    }
+	}
+
+/*
+          Step (2R): if SQRE = 1, apply back the rotation that is
+          related to the right null space of the subproblem.
+*/
+
+	if (*sqre == 1) {
+	    dcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
+	    drot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__,
+		    s);
+	}
+	if (*k < max(m,n)) {
+	    i__1 = n - *k;
+	    dlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 +
+		    bx_dim1], ldbx);
+	}
+
+/*        Step (3R): permute rows of B. */
+
+	dcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
+	if (*sqre == 1) {
+	    dcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
+	}
+	i__1 = n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    dcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1],
+		    ldb);
+/* L90: */
+	}
+
+/*        Step (4R): apply back the Givens rotations performed. */
+
+	for (i__ = *givptr; i__ >= 1; --i__) {
+	    d__1 = -givnum[i__ + givnum_dim1];
+	    drot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, &
+		    b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ +
+		    (givnum_dim1 << 1)], &d__1);
+/* L100: */
+	}
+    }
+
+    return 0;
+
+/*     End of DLALS0 */
+
+} /* dlals0_ */
+
+/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n,
+	integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer *
+	ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k,
+	doublereal *difl, doublereal *difr, doublereal *z__, doublereal *
+	poles, integer *givptr, integer *givcol, integer *ldgcol, integer *
+	perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *
+	work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1,
+	    b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1,
+	    difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset,
+	     u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1,
+	    i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl,
+	    ndb1, nlp1, lvl2, nrp1, nlvl, sqre;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer inode, ndiml, ndimr;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlals0_(integer *, integer *, integer *,
+	     integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *), dlasdt_(integer *, integer *, integer *, integer *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLALSA is an itermediate step in solving the least squares problem
+    by computing the SVD of the coefficient matrix in compact form (The
+    singular vectors are computed as products of simple orthorgonal
+    matrices.).
+
+    If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector
+    matrix of an upper bidiagonal matrix to the right hand side; and if
+    ICOMPQ = 1, DLALSA applies the right singular vector matrix to the
+    right hand side. The singular vector matrices were generated in
+    compact form by DLALSA.
+
+    Arguments
+    =========
+
+
+    ICOMPQ (input) INTEGER
+           Specifies whether the left or the right singular vector
+           matrix is involved.
+           = 0: Left singular vector matrix
+           = 1: Right singular vector matrix
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The row and column dimensions of the upper bidiagonal matrix.
+
+    NRHS   (input) INTEGER
+           The number of columns of B and BX. NRHS must be at least 1.
+
+    B      (input) DOUBLE PRECISION array, dimension ( LDB, NRHS )
+           On input, B contains the right hand sides of the least
+           squares problem in rows 1 through M. On output, B contains
+           the solution X in rows 1 through N.
+
+    LDB    (input) INTEGER
+           The leading dimension of B in the calling subprogram.
+           LDB must be at least max(1,MAX( M, N ) ).
+
+    BX     (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
+           On exit, the result of applying the left or right singular
+           vector matrix to B.
+
+    LDBX   (input) INTEGER
+           The leading dimension of BX.
+
+    U      (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
+           On entry, U contains the left singular vector matrices of all
+           subproblems at the bottom level.
+
+    LDU    (input) INTEGER, LDU = > N.
+           The leading dimension of arrays U, VT, DIFL, DIFR,
+           POLES, GIVNUM, and Z.
+
+    VT     (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
+           On entry, VT' contains the right singular vector matrices of
+           all subproblems at the bottom level.
+
+    K      (input) INTEGER array, dimension ( N ).
+
+    DIFL   (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+           where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+
+    DIFR   (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+           On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+           distances between singular values on the I-th level and
+           singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+           record the normalizing factors of the right singular vectors
+           matrices of subproblems on I-th level.
+
+    Z      (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+           On entry, Z(1, I) contains the components of the deflation-
+           adjusted updating row vector for subproblems on the I-th
+           level.
+
+    POLES  (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+           On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+           singular values involved in the secular equations on the I-th
+           level.
+
+    GIVPTR (input) INTEGER array, dimension ( N ).
+           On entry, GIVPTR( I ) records the number of Givens
+           rotations performed on the I-th problem on the computation
+           tree.
+
+    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+           On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+           locations of Givens rotations performed on the I-th level on
+           the computation tree.
+
+    LDGCOL (input) INTEGER, LDGCOL = > N.
+           The leading dimension of arrays GIVCOL and PERM.
+
+    PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
+           On entry, PERM(*, I) records permutations done on the I-th
+           level of the computation tree.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+           On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+           values of Givens rotations performed on the I-th level on the
+           computation tree.
+
+    C      (input) DOUBLE PRECISION array, dimension ( N ).
+           On entry, if the I-th subproblem is not square,
+           C( I ) contains the C-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    S      (input) DOUBLE PRECISION array, dimension ( N ).
+           On entry, if the I-th subproblem is not square,
+           S( I ) contains the S-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    WORK   (workspace) DOUBLE PRECISION array.
+           The dimension must be at least N.
+
+    IWORK  (workspace) INTEGER array.
+           The dimension must be at least 3 * N
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    bx_dim1 = *ldbx;
+    bx_offset = 1 + bx_dim1;
+    bx -= bx_offset;
+    givnum_dim1 = *ldu;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    poles_dim1 = *ldu;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    z_dim1 = *ldu;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    difr_dim1 = *ldu;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    difl_dim1 = *ldu;
+    difl_offset = 1 + difl_dim1;
+    difl -= difl_offset;
+    vt_dim1 = *ldu;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    --k;
+    --givptr;
+    perm_dim1 = *ldgcol;
+    perm_offset = 1 + perm_dim1;
+    perm -= perm_offset;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    --c__;
+    --s;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*icompq < 0 || *icompq > 1) {
+	*info = -1;
+    } else if (*smlsiz < 3) {
+	*info = -2;
+    } else if (*n < *smlsiz) {
+	*info = -3;
+    } else if (*nrhs < 1) {
+	*info = -4;
+    } else if (*ldb < *n) {
+	*info = -6;
+    } else if (*ldbx < *n) {
+	*info = -8;
+    } else if (*ldu < *n) {
+	*info = -10;
+    } else if (*ldgcol < *n) {
+	*info = -19;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLALSA", &i__1);
+	return 0;
+    }
+
+/*     Book-keeping and  setting up the computation tree. */
+
+    inode = 1;
+    ndiml = inode + *n;
+    ndimr = ndiml + *n;
+
+    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+	    smlsiz);
+
+/*
+       The following code applies back the left singular vector factors.
+       For applying back the right singular vector factors, go to 50.
+*/
+
+    if (*icompq == 1) {
+	goto L50;
+    }
+
+/*
+       The nodes on the bottom level of the tree were solved
+       by DLASDQ. The corresponding left and right singular vector
+       matrices are in explicit form. First apply back the left
+       singular vector matrices.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+
+/*
+          IC : center row of each node
+          NL : number of rows of left  subproblem
+          NR : number of rows of right subproblem
+          NLF: starting row of the left   subproblem
+          NRF: starting row of the right  subproblem
+*/
+
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nr = iwork[ndimr + i1];
+	nlf = ic - nl;
+	nrf = ic + 1;
+	dgemm_("T", "N", &nl, nrhs, &nl, &c_b2453, &u[nlf + u_dim1], ldu, &b[
+		nlf + b_dim1], ldb, &c_b2467, &bx[nlf + bx_dim1], ldbx);
+	dgemm_("T", "N", &nr, nrhs, &nr, &c_b2453, &u[nrf + u_dim1], ldu, &b[
+		nrf + b_dim1], ldb, &c_b2467, &bx[nrf + bx_dim1], ldbx);
+/* L10: */
+    }
+
+/*
+       Next copy the rows of B that correspond to unchanged rows
+       in the bidiagonal matrix to BX.
+*/
+
+    i__1 = nd;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	ic = iwork[inode + i__ - 1];
+	dcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx);
+/* L20: */
+    }
+
+/*
+       Finally go through the left singular vector matrices of all
+       the other subproblems bottom-up on the tree.
+*/
+
+    j = pow_ii(&c__2, &nlvl);
+    sqre = 0;
+
+    for (lvl = nlvl; lvl >= 1; --lvl) {
+	lvl2 = (lvl << 1) - 1;
+
+/*
+          find the first node LF and last node LL on
+          the current level LVL
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__1 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__1);
+	    ll = (lf << 1) - 1;
+	}
+	i__1 = ll;
+	for (i__ = lf; i__ <= i__1; ++i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    --j;
+	    dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, &
+		    b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], &
+		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
+		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
+		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
+		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
+		    j], &s[j], &work[1], info);
+/* L30: */
+	}
+/* L40: */
+    }
+    goto L90;
+
+/*     ICOMPQ = 1: applying back the right singular vector factors. */
+
+L50:
+
+/*
+       First now go through the right singular vector matrices of all
+       the tree nodes top-down.
+*/
+
+    j = 0;
+    i__1 = nlvl;
+    for (lvl = 1; lvl <= i__1; ++lvl) {
+	lvl2 = (lvl << 1) - 1;
+
+/*
+          Find the first node LF and last node LL on
+          the current level LVL.
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__2 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__2);
+	    ll = (lf << 1) - 1;
+	}
+	i__2 = lf;
+	for (i__ = ll; i__ >= i__2; --i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    if (i__ == ll) {
+		sqre = 0;
+	    } else {
+		sqre = 1;
+	    }
+	    ++j;
+	    dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[
+		    nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], &
+		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
+		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
+		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
+		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
+		    j], &s[j], &work[1], info);
+/* L60: */
+	}
+/* L70: */
+    }
+
+/*
+       The nodes on the bottom level of the tree were solved
+       by DLASDQ. The corresponding right singular vector
+       matrices are in explicit form. Apply them back.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nr = iwork[ndimr + i1];
+	nlp1 = nl + 1;
+	if (i__ == nd) {
+	    nrp1 = nr;
+	} else {
+	    nrp1 = nr + 1;
+	}
+	nlf = ic - nl;
+	nrf = ic + 1;
+	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b2453, &vt[nlf + vt_dim1],
+		ldu, &b[nlf + b_dim1], ldb, &c_b2467, &bx[nlf + bx_dim1],
+		ldbx);
+	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b2453, &vt[nrf + vt_dim1],
+		ldu, &b[nrf + b_dim1], ldb, &c_b2467, &bx[nrf + bx_dim1],
+		ldbx);
+/* L80: */
+    }
+
+L90:
+
+    return 0;
+
+/*     End of DLALSA */
+
+} /* dlalsa_ */
+
+/* Subroutine */ int dlalsd_(char *uplo, integer *smlsiz, integer *n, integer
+	*nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb,
+	doublereal *rcond, integer *rank, doublereal *work, integer *iwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double log(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer c__, i__, j, k;
+    static doublereal r__;
+    static integer s, u, z__;
+    static doublereal cs;
+    static integer bx;
+    static doublereal sn;
+    static integer st, vt, nm1, st1;
+    static doublereal eps;
+    static integer iwk;
+    static doublereal tol;
+    static integer difl, difr, perm, nsub;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer nlvl, sqre, bxst;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *),
+	     dcopy_(integer *, doublereal *, integer *, doublereal *, integer
+	    *);
+    static integer poles, sizei, nsize, nwork, icmpq1, icmpq2;
+
+    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *), dlalsa_(integer *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     integer *, integer *), dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *), dlaset_(char *, integer *, integer *,
+	     doublereal *, doublereal *, doublereal *, integer *),
+	    xerbla_(char *, integer *);
+    static integer givcol;
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
+    static doublereal orgnrm;
+    static integer givnum, givptr, smlszp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLALSD uses the singular value decomposition of A to solve the least
+    squares problem of finding X to minimize the Euclidean norm of each
+    column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
+    are N-by-NRHS. The solution X overwrites B.
+
+    The singular values of A smaller than RCOND times the largest
+    singular value are treated as zero in solving the least squares
+    problem; in this case a minimum norm solution is returned.
+    The actual singular values are returned in D in ascending order.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    UPLO   (input) CHARACTER*1
+           = 'U': D and E define an upper bidiagonal matrix.
+           = 'L': D and E define a  lower bidiagonal matrix.
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The dimension of the  bidiagonal matrix.  N >= 0.
+
+    NRHS   (input) INTEGER
+           The number of columns of B. NRHS must be at least 1.
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry D contains the main diagonal of the bidiagonal
+           matrix. On exit, if INFO = 0, D contains its singular values.
+
+    E      (input) DOUBLE PRECISION array, dimension (N-1)
+           Contains the super-diagonal entries of the bidiagonal matrix.
+           On exit, E has been destroyed.
+
+    B      (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+           On input, B contains the right hand sides of the least
+           squares problem. On output, B contains the solution X.
+
+    LDB    (input) INTEGER
+           The leading dimension of B in the calling subprogram.
+           LDB must be at least max(1,N).
+
+    RCOND  (input) DOUBLE PRECISION
+           The singular values of A less than or equal to RCOND times
+           the largest singular value are treated as zero in solving
+           the least squares problem. If RCOND is negative,
+           machine precision is used instead.
+           For example, if diag(S)*X=B were the least squares problem,
+           where diag(S) is a diagonal matrix of singular values, the
+           solution would be X(i) = B(i) / S(i) if S(i) is greater than
+           RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
+           RCOND*max(S).
+
+    RANK   (output) INTEGER
+           The number of singular values of A greater than RCOND times
+           the largest singular value.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension at least
+           (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),
+           where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).
+
+    IWORK  (workspace) INTEGER array, dimension at least
+           (3*N*NLVL + 11*N)
+
+    INFO   (output) INTEGER
+           = 0:  successful exit.
+           < 0:  if INFO = -i, the i-th argument had an illegal value.
+           > 0:  The algorithm failed to compute an singular value while
+                 working on the submatrix lying in rows and columns
+                 INFO/(N+1) through MOD(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 1) {
+	*info = -4;
+    } else if (*ldb < 1 || *ldb < *n) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLALSD", &i__1);
+	return 0;
+    }
+
+    eps = EPSILON;
+
+/*     Set up the tolerance. */
+
+    if (*rcond <= 0. || *rcond >= 1.) {
+	*rcond = eps;
+    }
+
+    *rank = 0;
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    } else if (*n == 1) {
+	if (d__[1] == 0.) {
+	    dlaset_("A", &c__1, nrhs, &c_b2467, &c_b2467, &b[b_offset], ldb);
+	} else {
+	    *rank = 1;
+	    dlascl_("G", &c__0, &c__0, &d__[1], &c_b2453, &c__1, nrhs, &b[
+		    b_offset], ldb, info);
+	    d__[1] = abs(d__[1]);
+	}
+	return 0;
+    }
+
+/*     Rotate the matrix if it is lower bidiagonal. */
+
+    if (*(unsigned char *)uplo == 'L') {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (*nrhs == 1) {
+		drot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], &
+			c__1, &cs, &sn);
+	    } else {
+		work[(i__ << 1) - 1] = cs;
+		work[i__ * 2] = sn;
+	    }
+/* L10: */
+	}
+	if (*nrhs > 1) {
+	    i__1 = *nrhs;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		i__2 = *n - 1;
+		for (j = 1; j <= i__2; ++j) {
+		    cs = work[(j << 1) - 1];
+		    sn = work[j * 2];
+		    drot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ *
+			     b_dim1], &c__1, &cs, &sn);
+/* L20: */
+		}
+/* L30: */
+	    }
+	}
+    }
+
+/*     Scale. */
+
+    nm1 = *n - 1;
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	dlaset_("A", n, nrhs, &c_b2467, &c_b2467, &b[b_offset], ldb);
+	return 0;
+    }
+
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, n, &c__1, &d__[1], n, info);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, &nm1, &c__1, &e[1], &nm1,
+	    info);
+
+/*
+       If N is smaller than the minimum divide size SMLSIZ, then solve
+       the problem with another solver.
+*/
+
+    if (*n <= *smlsiz) {
+	nwork = *n * *n + 1;
+	dlaset_("A", n, n, &c_b2467, &c_b2453, &work[1], n);
+	dlasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, &
+		work[1], n, &b[b_offset], ldb, &work[nwork], info);
+	if (*info != 0) {
+	    return 0;
+	}
+	tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (d__[i__] <= tol) {
+		dlaset_("A", &c__1, nrhs, &c_b2467, &c_b2467, &b[i__ + b_dim1]
+			, ldb);
+	    } else {
+		dlascl_("G", &c__0, &c__0, &d__[i__], &c_b2453, &c__1, nrhs, &
+			b[i__ + b_dim1], ldb, info);
+		++(*rank);
+	    }
+/* L40: */
+	}
+	dgemm_("T", "N", n, nrhs, n, &c_b2453, &work[1], n, &b[b_offset], ldb,
+		 &c_b2467, &work[nwork], n);
+	dlacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb);
+
+/*        Unscale. */
+
+	dlascl_("G", &c__0, &c__0, &c_b2453, &orgnrm, n, &c__1, &d__[1], n,
+		info);
+	dlasrt_("D", n, &d__[1], info);
+	dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, n, nrhs, &b[b_offset],
+		ldb, info);
+
+	return 0;
+    }
+
+/*     Book-keeping and setting up some constants. */
+
+    nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) /
+	    log(2.)) + 1;
+
+    smlszp = *smlsiz + 1;
+
+    u = 1;
+    vt = *smlsiz * *n + 1;
+    difl = vt + smlszp * *n;
+    difr = difl + nlvl * *n;
+    z__ = difr + (nlvl * *n << 1);
+    c__ = z__ + nlvl * *n;
+    s = c__ + *n;
+    poles = s + *n;
+    givnum = poles + (nlvl << 1) * *n;
+    bx = givnum + (nlvl << 1) * *n;
+    nwork = bx + *n * *nrhs;
+
+    sizei = *n + 1;
+    k = sizei + *n;
+    givptr = k + *n;
+    perm = givptr + *n;
+    givcol = perm + nlvl * *n;
+    iwk = givcol + (nlvl * *n << 1);
+
+    st = 1;
+    sqre = 0;
+    icmpq1 = 1;
+    icmpq2 = 0;
+    nsub = 0;
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) < eps) {
+	    d__[i__] = d_sign(&eps, &d__[i__]);
+	}
+/* L50: */
+    }
+
+    i__1 = nm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = e[i__], abs(d__1)) < eps || i__ == nm1) {
+	    ++nsub;
+	    iwork[nsub] = st;
+
+/*
+             Subproblem found. First determine its size and then
+             apply divide and conquer on it.
+*/
+
+	    if (i__ < nm1) {
+
+/*              A subproblem with E(I) small for I < NM1. */
+
+		nsize = i__ - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
+
+/*              A subproblem with E(NM1) not too small but I = NM1. */
+
+		nsize = *n - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+	    } else {
+
+/*
+                A subproblem with E(NM1) small. This implies an
+                1-by-1 subproblem at D(N), which is not solved
+                explicitly.
+*/
+
+		nsize = i__ - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+		++nsub;
+		iwork[nsub] = *n;
+		iwork[sizei + nsub - 1] = 1;
+		dcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n);
+	    }
+	    st1 = st - 1;
+	    if (nsize == 1) {
+
+/*
+                This is a 1-by-1 subproblem and is not solved
+                explicitly.
+*/
+
+		dcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n);
+	    } else if (nsize <= *smlsiz) {
+
+/*              This is a small subproblem and is solved by DLASDQ. */
+
+		dlaset_("A", &nsize, &nsize, &c_b2467, &c_b2453, &work[vt +
+			st1], n);
+		dlasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[
+			st], &work[vt + st1], n, &work[nwork], n, &b[st +
+			b_dim1], ldb, &work[nwork], info);
+		if (*info != 0) {
+		    return 0;
+		}
+		dlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx +
+			st1], n);
+	    } else {
+
+/*              A large problem. Solve it using divide and conquer. */
+
+		dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], &
+			work[u + st1], n, &work[vt + st1], &iwork[k + st1], &
+			work[difl + st1], &work[difr + st1], &work[z__ + st1],
+			 &work[poles + st1], &iwork[givptr + st1], &iwork[
+			givcol + st1], n, &iwork[perm + st1], &work[givnum +
+			st1], &work[c__ + st1], &work[s + st1], &work[nwork],
+			&iwork[iwk], info);
+		if (*info != 0) {
+		    return 0;
+		}
+		bxst = bx + st1;
+		dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, &
+			work[bxst], n, &work[u + st1], n, &work[vt + st1], &
+			iwork[k + st1], &work[difl + st1], &work[difr + st1],
+			&work[z__ + st1], &work[poles + st1], &iwork[givptr +
+			st1], &iwork[givcol + st1], n, &iwork[perm + st1], &
+			work[givnum + st1], &work[c__ + st1], &work[s + st1],
+			&work[nwork], &iwork[iwk], info);
+		if (*info != 0) {
+		    return 0;
+		}
+	    }
+	    st = i__ + 1;
+	}
+/* L60: */
+    }
+
+/*     Apply the singular values and treat the tiny ones as zero. */
+
+    tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*
+          Some of the elements in D can be negative because 1-by-1
+          subproblems were not solved explicitly.
+*/
+
+	if ((d__1 = d__[i__], abs(d__1)) <= tol) {
+	    dlaset_("A", &c__1, nrhs, &c_b2467, &c_b2467, &work[bx + i__ - 1],
+		     n);
+	} else {
+	    ++(*rank);
+	    dlascl_("G", &c__0, &c__0, &d__[i__], &c_b2453, &c__1, nrhs, &
+		    work[bx + i__ - 1], n, info);
+	}
+	d__[i__] = (d__1 = d__[i__], abs(d__1));
+/* L70: */
+    }
+
+/*     Now apply back the right singular vectors. */
+
+    icmpq2 = 1;
+    i__1 = nsub;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	st = iwork[i__];
+	st1 = st - 1;
+	nsize = iwork[sizei + i__ - 1];
+	bxst = bx + st1;
+	if (nsize == 1) {
+	    dcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb);
+	} else if (nsize <= *smlsiz) {
+	    dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b2453, &work[vt + st1],
+		    n, &work[bxst], n, &c_b2467, &b[st + b_dim1], ldb);
+	} else {
+	    dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st +
+		    b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[
+		    k + st1], &work[difl + st1], &work[difr + st1], &work[z__
+		    + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[
+		    givcol + st1], n, &iwork[perm + st1], &work[givnum + st1],
+		     &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[
+		    iwk], info);
+	    if (*info != 0) {
+		return 0;
+	    }
+	}
+/* L80: */
+    }
+
+/*     Unscale and sort the singular values. */
+
+    dlascl_("G", &c__0, &c__0, &c_b2453, &orgnrm, n, &c__1, &d__[1], n, info);
+    dlasrt_("D", n, &d__[1], info);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, n, nrhs, &b[b_offset], ldb,
+	    info);
+
+    return 0;
+
+/*     End of DLALSD */
+
+} /* dlalsd_ */
+
+/* Subroutine */ int dlamrg_(integer *n1, integer *n2, doublereal *a, integer
+	*dtrd1, integer *dtrd2, integer *index)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, ind1, ind2, n1sv, n2sv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLAMRG will create a permutation list which will merge the elements
+    of A (which is composed of two independently sorted sets) into a
+    single set which is sorted in ascending order.
+
+    Arguments
+    =========
+
+    N1     (input) INTEGER
+    N2     (input) INTEGER
+           These arguements contain the respective lengths of the two
+           sorted lists to be merged.
+
+    A      (input) DOUBLE PRECISION array, dimension (N1+N2)
+           The first N1 elements of A contain a list of numbers which
+           are sorted in either ascending or descending order.  Likewise
+           for the final N2 elements.
+
+    DTRD1  (input) INTEGER
+    DTRD2  (input) INTEGER
+           These are the strides to be taken through the array A.
+           Allowable strides are 1 and -1.  They indicate whether a
+           subset of A is sorted in ascending (DTRDx = 1) or descending
+           (DTRDx = -1) order.
+
+    INDEX  (output) INTEGER array, dimension (N1+N2)
+           On exit this array will contain a permutation such that
+           if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be
+           sorted in ascending order.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --index;
+    --a;
+
+    /* Function Body */
+    n1sv = *n1;
+    n2sv = *n2;
+    if (*dtrd1 > 0) {
+	ind1 = 1;
+    } else {
+	ind1 = *n1;
+    }
+    if (*dtrd2 > 0) {
+	ind2 = *n1 + 1;
+    } else {
+	ind2 = *n1 + *n2;
+    }
+    i__ = 1;
+/*     while ( (N1SV > 0) & (N2SV > 0) ) */
+L10:
+    if (n1sv > 0 && n2sv > 0) {
+	if (a[ind1] <= a[ind2]) {
+	    index[i__] = ind1;
+	    ++i__;
+	    ind1 += *dtrd1;
+	    --n1sv;
+	} else {
+	    index[i__] = ind2;
+	    ++i__;
+	    ind2 += *dtrd2;
+	    --n2sv;
+	}
+	goto L10;
+    }
+/*     end while */
+    if (n1sv == 0) {
+	i__1 = n2sv;
+	for (n1sv = 1; n1sv <= i__1; ++n1sv) {
+	    index[i__] = ind2;
+	    ++i__;
+	    ind2 += *dtrd2;
+/* L20: */
+	}
+    } else {
+/*     N2SV .EQ. 0 */
+	i__1 = n1sv;
+	for (n2sv = 1; n2sv <= i__1; ++n2sv) {
+	    index[i__] = ind1;
+	    ++i__;
+	    ind1 += *dtrd1;
+/* L30: */
+	}
+    }
+
+    return 0;
+
+/*     End of DLAMRG */
+
+} /* dlamrg_ */
+
+doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer
+	*lda, doublereal *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal ret_val, d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal sum, scale;
+    extern logical lsame_(char *, char *);
+    static doublereal value;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
+	    doublereal *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLANGE  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    real matrix A.
+
+    Description
+    ===========
+
+    DLANGE returns the value
+
+       DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in DLANGE as described
+            above.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.  When M = 0,
+            DLANGE is set to zero.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.  When N = 0,
+            DLANGE is set to zero.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(M,1).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+            where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (min(*m,*n) == 0) {
+	value = 0.;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
+		value = max(d__2,d__3);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1') {
+
+/*        Find norm1(A). */
+
+	value = 0.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.;
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += (d__1 = a[i__ + j * a_dim1], abs(d__1));
+/* L30: */
+	    }
+	    value = max(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1));
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.;
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    d__1 = value, d__2 = work[i__];
+	    value = max(d__1,d__2);
+/* L80: */
+	}
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+
+/*        Find normF(A). */
+
+	scale = 0.;
+	sum = 1.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of DLANGE */
+
+} /* dlange_ */
+
+doublereal dlanhs_(char *norm, integer *n, doublereal *a, integer *lda,
+	doublereal *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    doublereal ret_val, d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal sum, scale;
+    extern logical lsame_(char *, char *);
+    static doublereal value;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
+	    doublereal *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLANHS  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    Hessenberg matrix A.
+
+    Description
+    ===========
+
+    DLANHS returns the value
+
+       DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in DLANHS as described
+            above.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, DLANHS is
+            set to zero.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The n by n upper Hessenberg matrix A; the part of A below the
+            first sub-diagonal is not referenced.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
+		value = max(d__2,d__3);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1') {
+
+/*        Find norm1(A). */
+
+	value = 0.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.;
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += (d__1 = a[i__ + j * a_dim1], abs(d__1));
+/* L30: */
+	    }
+	    value = max(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1));
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    d__1 = value, d__2 = work[i__];
+	    value = max(d__1,d__2);
+/* L80: */
+	}
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+
+/*        Find normF(A). */
+
+	scale = 0.;
+	sum = 1.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of DLANHS */
+
+} /* dlanhs_ */
+
+doublereal dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal ret_val, d__1, d__2, d__3, d__4, d__5;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__;
+    static doublereal sum, scale;
+    extern logical lsame_(char *, char *);
+    static doublereal anorm;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
+	    doublereal *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLANST  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    real symmetric tridiagonal matrix A.
+
+    Description
+    ===========
+
+    DLANST returns the value
+
+       DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in DLANST as described
+            above.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, DLANST is
+            set to zero.
+
+    D       (input) DOUBLE PRECISION array, dimension (N)
+            The diagonal elements of A.
+
+    E       (input) DOUBLE PRECISION array, dimension (N-1)
+            The (n-1) sub-diagonal or super-diagonal elements of A.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --e;
+    --d__;
+
+    /* Function Body */
+    if (*n <= 0) {
+	anorm = 0.;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	anorm = (d__1 = d__[*n], abs(d__1));
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    d__2 = anorm, d__3 = (d__1 = d__[i__], abs(d__1));
+	    anorm = max(d__2,d__3);
+/* Computing MAX */
+	    d__2 = anorm, d__3 = (d__1 = e[i__], abs(d__1));
+	    anorm = max(d__2,d__3);
+/* L10: */
+	}
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1' || lsame_(norm, "I")) {
+
+/*        Find norm1(A). */
+
+	if (*n == 1) {
+	    anorm = abs(d__[1]);
+	} else {
+/* Computing MAX */
+	    d__3 = abs(d__[1]) + abs(e[1]), d__4 = (d__1 = e[*n - 1], abs(
+		    d__1)) + (d__2 = d__[*n], abs(d__2));
+	    anorm = max(d__3,d__4);
+	    i__1 = *n - 1;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+/* Computing MAX */
+		d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[
+			i__], abs(d__2)) + (d__3 = e[i__ - 1], abs(d__3));
+		anorm = max(d__4,d__5);
+/* L20: */
+	    }
+	}
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+
+/*        Find normF(A). */
+
+	scale = 0.;
+	sum = 1.;
+	if (*n > 1) {
+	    i__1 = *n - 1;
+	    dlassq_(&i__1, &e[1], &c__1, &scale, &sum);
+	    sum *= 2;
+	}
+	dlassq_(n, &d__[1], &c__1, &scale, &sum);
+	anorm = scale * sqrt(sum);
+    }
+
+    ret_val = anorm;
+    return ret_val;
+
+/*     End of DLANST */
+
+} /* dlanst_ */
+
+doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer
+	*lda, doublereal *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal ret_val, d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal sum, absa, scale;
+    extern logical lsame_(char *, char *);
+    static doublereal value;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
+	    doublereal *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLANSY  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    real symmetric matrix A.
+
+    Description
+    ===========
+
+    DLANSY returns the value
+
+       DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in DLANSY as described
+            above.
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is to be referenced.
+            = 'U':  Upper triangular part of A is referenced
+            = 'L':  Lower triangular part of A is referenced
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, DLANSY is
+            set to zero.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The symmetric matrix A.  If UPLO = 'U', the leading n by n
+            upper triangular part of A contains the upper triangular part
+            of the matrix A, and the strictly lower triangular part of A
+            is not referenced.  If UPLO = 'L', the leading n by n lower
+            triangular part of A contains the lower triangular part of
+            the matrix A, and the strictly upper triangular part of A is
+            not referenced.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+            WORK is not referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(
+			    d__1));
+		    value = max(d__2,d__3);
+/* L10: */
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(
+			    d__1));
+		    value = max(d__2,d__3);
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+    } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') {
+
+/*        Find normI(A) ( = norm1(A), since A is symmetric). */
+
+	value = 0.;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		sum = 0.;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    absa = (d__1 = a[i__ + j * a_dim1], abs(d__1));
+		    sum += absa;
+		    work[i__] += absa;
+/* L50: */
+		}
+		work[j] = sum + (d__1 = a[j + j * a_dim1], abs(d__1));
+/* L60: */
+	    }
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+		d__1 = value, d__2 = work[i__];
+		value = max(d__1,d__2);
+/* L70: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[i__] = 0.;
+/* L80: */
+	    }
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		sum = work[j] + (d__1 = a[j + j * a_dim1], abs(d__1));
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    absa = (d__1 = a[i__ + j * a_dim1], abs(d__1));
+		    sum += absa;
+		    work[i__] += absa;
+/* L90: */
+		}
+		value = max(value,sum);
+/* L100: */
+	    }
+	}
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+
+/*        Find normF(A). */
+
+	scale = 0.;
+	sum = 1.;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		i__2 = j - 1;
+		dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L110: */
+	    }
+	} else {
+	    i__1 = *n - 1;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n - j;
+		dlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum);
+/* L120: */
+	    }
+	}
+	sum *= 2;
+	i__1 = *lda + 1;
+	dlassq_(n, &a[a_offset], &i__1, &scale, &sum);
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of DLANSY */
+
+} /* dlansy_ */
+
+/* Subroutine */ int dlanv2_(doublereal *a, doublereal *b, doublereal *c__,
+	doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r,
+	 doublereal *rt2i, doublereal *cs, doublereal *sn)
+{
+    /* System generated locals */
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double d_sign(doublereal *, doublereal *), sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau,
+	    temp, scale, bcmax, bcmis, sigma;
+
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric
+    matrix in standard form:
+
+         [ A  B ] = [ CS -SN ] [ AA  BB ] [ CS  SN ]
+         [ C  D ]   [ SN  CS ] [ CC  DD ] [-SN  CS ]
+
+    where either
+    1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or
+    2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex
+    conjugate eigenvalues.
+
+    Arguments
+    =========
+
+    A       (input/output) DOUBLE PRECISION
+    B       (input/output) DOUBLE PRECISION
+    C       (input/output) DOUBLE PRECISION
+    D       (input/output) DOUBLE PRECISION
+            On entry, the elements of the input matrix.
+            On exit, they are overwritten by the elements of the
+            standardised Schur form.
+
+    RT1R    (output) DOUBLE PRECISION
+    RT1I    (output) DOUBLE PRECISION
+    RT2R    (output) DOUBLE PRECISION
+    RT2I    (output) DOUBLE PRECISION
+            The real and imaginary parts of the eigenvalues. If the
+            eigenvalues are a complex conjugate pair, RT1I > 0.
+
+    CS      (output) DOUBLE PRECISION
+    SN      (output) DOUBLE PRECISION
+            Parameters of the rotation matrix.
+
+    Further Details
+    ===============
+
+    Modified by V. Sima, Research Institute for Informatics, Bucharest,
+    Romania, to reduce the risk of cancellation errors,
+    when computing real eigenvalues, and to ensure, if possible, that
+    abs(RT1R) >= abs(RT2R).
+
+    =====================================================================
+*/
+
+
+    eps = PRECISION;
+    if (*c__ == 0.) {
+	*cs = 1.;
+	*sn = 0.;
+	goto L10;
+
+    } else if (*b == 0.) {
+
+/*        Swap rows and columns */
+
+	*cs = 0.;
+	*sn = 1.;
+	temp = *d__;
+	*d__ = *a;
+	*a = temp;
+	*b = -(*c__);
+	*c__ = 0.;
+	goto L10;
+    } else if (*a - *d__ == 0. && d_sign(&c_b2453, b) != d_sign(&c_b2453, c__)
+	    ) {
+	*cs = 1.;
+	*sn = 0.;
+	goto L10;
+    } else {
+
+	temp = *a - *d__;
+	p = temp * .5;
+/* Computing MAX */
+	d__1 = abs(*b), d__2 = abs(*c__);
+	bcmax = max(d__1,d__2);
+/* Computing MIN */
+	d__1 = abs(*b), d__2 = abs(*c__);
+	bcmis = min(d__1,d__2) * d_sign(&c_b2453, b) * d_sign(&c_b2453, c__);
+/* Computing MAX */
+	d__1 = abs(p);
+	scale = max(d__1,bcmax);
+	z__ = p / scale * p + bcmax / scale * bcmis;
+
+/*
+          If Z is of the order of the machine accuracy, postpone the
+          decision on the nature of eigenvalues
+*/
+
+	if (z__ >= eps * 4.) {
+
+/*           Real eigenvalues. Compute A and D. */
+
+	    d__1 = sqrt(scale) * sqrt(z__);
+	    z__ = p + d_sign(&d__1, &p);
+	    *a = *d__ + z__;
+	    *d__ -= bcmax / z__ * bcmis;
+
+/*           Compute B and the rotation matrix */
+
+	    tau = dlapy2_(c__, &z__);
+	    *cs = z__ / tau;
+	    *sn = *c__ / tau;
+	    *b -= *c__;
+	    *c__ = 0.;
+	} else {
+
+/*
+             Complex eigenvalues, or real (almost) equal eigenvalues.
+             Make diagonal elements equal.
+*/
+
+	    sigma = *b + *c__;
+	    tau = dlapy2_(&sigma, &temp);
+	    *cs = sqrt((abs(sigma) / tau + 1.) * .5);
+	    *sn = -(p / (tau * *cs)) * d_sign(&c_b2453, &sigma);
+
+/*
+             Compute [ AA  BB ] = [ A  B ] [ CS -SN ]
+                     [ CC  DD ]   [ C  D ] [ SN  CS ]
+*/
+
+	    aa = *a * *cs + *b * *sn;
+	    bb = -(*a) * *sn + *b * *cs;
+	    cc = *c__ * *cs + *d__ * *sn;
+	    dd = -(*c__) * *sn + *d__ * *cs;
+
+/*
+             Compute [ A  B ] = [ CS  SN ] [ AA  BB ]
+                     [ C  D ]   [-SN  CS ] [ CC  DD ]
+*/
+
+	    *a = aa * *cs + cc * *sn;
+	    *b = bb * *cs + dd * *sn;
+	    *c__ = -aa * *sn + cc * *cs;
+	    *d__ = -bb * *sn + dd * *cs;
+
+	    temp = (*a + *d__) * .5;
+	    *a = temp;
+	    *d__ = temp;
+
+	    if (*c__ != 0.) {
+		if (*b != 0.) {
+		    if (d_sign(&c_b2453, b) == d_sign(&c_b2453, c__)) {
+
+/*                    Real eigenvalues: reduce to upper triangular form */
+
+			sab = sqrt((abs(*b)));
+			sac = sqrt((abs(*c__)));
+			d__1 = sab * sac;
+			p = d_sign(&d__1, c__);
+			tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1)));
+			*a = temp + p;
+			*d__ = temp - p;
+			*b -= *c__;
+			*c__ = 0.;
+			cs1 = sab * tau;
+			sn1 = sac * tau;
+			temp = *cs * cs1 - *sn * sn1;
+			*sn = *cs * sn1 + *sn * cs1;
+			*cs = temp;
+		    }
+		} else {
+		    *b = -(*c__);
+		    *c__ = 0.;
+		    temp = *cs;
+		    *cs = -(*sn);
+		    *sn = temp;
+		}
+	    }
+	}
+
+    }
+
+L10:
+
+/*     Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */
+
+    *rt1r = *a;
+    *rt2r = *d__;
+    if (*c__ == 0.) {
+	*rt1i = 0.;
+	*rt2i = 0.;
+    } else {
+	*rt1i = sqrt((abs(*b))) * sqrt((abs(*c__)));
+	*rt2i = -(*rt1i);
+    }
+    return 0;
+
+/*     End of DLANV2 */
+
+} /* dlanv2_ */
+
+doublereal dlapy2_(doublereal *x, doublereal *y)
+{
+    /* System generated locals */
+    doublereal ret_val, d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal w, z__, xabs, yabs;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
+    overflow.
+
+    Arguments
+    =========
+
+    X       (input) DOUBLE PRECISION
+    Y       (input) DOUBLE PRECISION
+            X and Y specify the values x and y.
+
+    =====================================================================
+*/
+
+
+    xabs = abs(*x);
+    yabs = abs(*y);
+    w = max(xabs,yabs);
+    z__ = min(xabs,yabs);
+    if (z__ == 0.) {
+	ret_val = w;
+    } else {
+/* Computing 2nd power */
+	d__1 = z__ / w;
+	ret_val = w * sqrt(d__1 * d__1 + 1.);
+    }
+    return ret_val;
+
+/*     End of DLAPY2 */
+
+} /* dlapy2_ */
+
+doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
+{
+    /* System generated locals */
+    doublereal ret_val, d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal w, xabs, yabs, zabs;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause
+    unnecessary overflow.
+
+    Arguments
+    =========
+
+    X       (input) DOUBLE PRECISION
+    Y       (input) DOUBLE PRECISION
+    Z       (input) DOUBLE PRECISION
+            X, Y and Z specify the values x, y and z.
+
+    =====================================================================
+*/
+
+
+    xabs = abs(*x);
+    yabs = abs(*y);
+    zabs = abs(*z__);
+/* Computing MAX */
+    d__1 = max(xabs,yabs);
+    w = max(d__1,zabs);
+    if (w == 0.) {
+	ret_val = 0.;
+    } else {
+/* Computing 2nd power */
+	d__1 = xabs / w;
+/* Computing 2nd power */
+	d__2 = yabs / w;
+/* Computing 2nd power */
+	d__3 = zabs / w;
+	ret_val = w * sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3);
+    }
+    return ret_val;
+
+/*     End of DLAPY3 */
+
+} /* dlapy3_ */
+
+/* Subroutine */ int dlarf_(char *side, integer *m, integer *n, doublereal *v,
+	 integer *incv, doublereal *tau, doublereal *c__, integer *ldc,
+	doublereal *work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset;
+    doublereal d__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLARF applies a real elementary reflector H to a real m by n matrix
+    C, from either the left or the right. H is represented in the form
+
+          H = I - tau * v * v'
+
+    where tau is a real scalar and v is a real vector.
+
+    If tau = 0, then H is taken to be the unit matrix.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    V       (input) DOUBLE PRECISION array, dimension
+                       (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+                    or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+            The vector v in the representation of H. V is not used if
+            TAU = 0.
+
+    INCV    (input) INTEGER
+            The increment between elements of v. INCV <> 0.
+
+    TAU     (input) DOUBLE PRECISION
+            The value tau in the representation of H.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension
+                           (N) if SIDE = 'L'
+                        or (M) if SIDE = 'R'
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    if (lsame_(side, "L")) {
+
+/*        Form  H * C */
+
+	if (*tau != 0.) {
+
+/*           w := C' * v */
+
+	    dgemv_("Transpose", m, n, &c_b2453, &c__[c_offset], ldc, &v[1],
+		    incv, &c_b2467, &work[1], &c__1);
+
+/*           C := C - v * w' */
+
+	    d__1 = -(*tau);
+	    dger_(m, n, &d__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
+		    ldc);
+	}
+    } else {
+
+/*        Form  C * H */
+
+	if (*tau != 0.) {
+
+/*           w := C * v */
+
+	    dgemv_("No transpose", m, n, &c_b2453, &c__[c_offset], ldc, &v[1],
+		     incv, &c_b2467, &work[1], &c__1);
+
+/*           C := C - w * v' */
+
+	    d__1 = -(*tau);
+	    dger_(m, n, &d__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
+		    ldc);
+	}
+    }
+    return 0;
+
+/*     End of DLARF */
+
+} /* dlarf_ */
+
+/* Subroutine */ int dlarfb_(char *side, char *trans, char *direct, char *
+	storev, integer *m, integer *n, integer *k, doublereal *v, integer *
+	ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc,
+	doublereal *work, integer *ldwork)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
+	    work_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dtrmm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static char transt[1];
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLARFB applies a real block reflector H or its transpose H' to a
+    real m by n matrix C, from either the left or the right.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply H or H' from the Left
+            = 'R': apply H or H' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply H (No transpose)
+            = 'T': apply H' (Transpose)
+
+    DIRECT  (input) CHARACTER*1
+            Indicates how H is formed from a product of elementary
+            reflectors
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
+
+    STOREV  (input) CHARACTER*1
+            Indicates how the vectors which define the elementary
+            reflectors are stored:
+            = 'C': Columnwise
+            = 'R': Rowwise
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    K       (input) INTEGER
+            The order of the matrix T (= the number of elementary
+            reflectors whose product defines the block reflector).
+
+    V       (input) DOUBLE PRECISION array, dimension
+                                  (LDV,K) if STOREV = 'C'
+                                  (LDV,M) if STOREV = 'R' and SIDE = 'L'
+                                  (LDV,N) if STOREV = 'R' and SIDE = 'R'
+            The matrix V. See further details.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
+            if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
+            if STOREV = 'R', LDV >= K.
+
+    T       (input) DOUBLE PRECISION array, dimension (LDT,K)
+            The triangular k by k matrix T in the representation of the
+            block reflector.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDA >= max(1,M).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,K)
+
+    LDWORK  (input) INTEGER
+            The leading dimension of the array WORK.
+            If SIDE = 'L', LDWORK >= max(1,N);
+            if SIDE = 'R', LDWORK >= max(1,M).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    work_dim1 = *ldwork;
+    work_offset = 1 + work_dim1;
+    work -= work_offset;
+
+    /* Function Body */
+    if (*m <= 0 || *n <= 0) {
+	return 0;
+    }
+
+    if (lsame_(trans, "N")) {
+	*(unsigned char *)transt = 'T';
+    } else {
+	*(unsigned char *)transt = 'N';
+    }
+
+    if (lsame_(storev, "C")) {
+
+	if (lsame_(direct, "F")) {
+
+/*
+             Let  V =  ( V1 )    (first K rows)
+                       ( V2 )
+             where  V1  is unit lower triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C1'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+/* L10: */
+		}
+
+/*              W := W * V1 */
+
+		dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &
+			c_b2453, &v[v_offset], ldv, &work[work_offset],
+			ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C2'*V2 */
+
+		    i__1 = *m - *k;
+		    dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b2453,
+			     &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1],
+			    ldv, &c_b2453, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b2453, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V * W' */
+
+		if (*m > *k) {
+
+/*                 C2 := C2 - V2 * W' */
+
+		    i__1 = *m - *k;
+		    dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b2589,
+			     &v[*k + 1 + v_dim1], ldv, &work[work_offset],
+			    ldwork, &c_b2453, &c__[*k + 1 + c_dim1], ldc);
+		}
+
+/*              W := W * V1' */
+
+		dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b2453,
+			&v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
+/* L20: */
+		    }
+/* L30: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+
+                W := C1
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L40: */
+		}
+
+/*              W := W * V1 */
+
+		dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &
+			c_b2453, &v[v_offset], ldv, &work[work_offset],
+			ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C2 * V2 */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b2453, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k
+			    + 1 + v_dim1], ldv, &c_b2453, &work[work_offset],
+			    ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b2453, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V' */
+
+		if (*n > *k) {
+
+/*                 C2 := C2 - W * V2' */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b2589,
+			     &work[work_offset], ldwork, &v[*k + 1 + v_dim1],
+			    ldv, &c_b2453, &c__[(*k + 1) * c_dim1 + 1], ldc);
+		}
+
+/*              W := W * V1' */
+
+		dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b2453,
+			&v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
+/* L50: */
+		    }
+/* L60: */
+		}
+	    }
+
+	} else {
+
+/*
+             Let  V =  ( V1 )
+                       ( V2 )    (last K rows)
+             where  V2  is unit upper triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C2'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L70: */
+		}
+
+/*              W := W * V2 */
+
+		dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &
+			c_b2453, &v[*m - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C1'*V1 */
+
+		    i__1 = *m - *k;
+		    dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b2453,
+			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b2453,
+			     &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b2453, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V * W' */
+
+		if (*m > *k) {
+
+/*                 C1 := C1 - V1 * W' */
+
+		    i__1 = *m - *k;
+		    dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b2589,
+			     &v[v_offset], ldv, &work[work_offset], ldwork, &
+			    c_b2453, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2' */
+
+		dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b2453,
+			&v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+
+/*              C2 := C2 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L80: */
+		    }
+/* L90: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+
+                W := C2
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L100: */
+		}
+
+/*              W := W * V2 */
+
+		dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &
+			c_b2453, &v[*n - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C1 * V1 */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b2453, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b2453, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b2453, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V' */
+
+		if (*n > *k) {
+
+/*                 C1 := C1 - W * V1' */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b2589,
+			     &work[work_offset], ldwork, &v[v_offset], ldv, &
+			    c_b2453, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2' */
+
+		dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b2453,
+			&v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+
+/*              C2 := C2 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L110: */
+		    }
+/* L120: */
+		}
+	    }
+	}
+
+    } else if (lsame_(storev, "R")) {
+
+	if (lsame_(direct, "F")) {
+
+/*
+             Let  V =  ( V1  V2 )    (V1: first K columns)
+             where  V1  is unit upper triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+
+                W := C1'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+/* L130: */
+		}
+
+/*              W := W * V1' */
+
+		dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b2453,
+			&v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C2'*V2' */
+
+		    i__1 = *m - *k;
+		    dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b2453, &
+			    c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 +
+			    1], ldv, &c_b2453, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b2453, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V' * W' */
+
+		if (*m > *k) {
+
+/*                 C2 := C2 - V2' * W' */
+
+		    i__1 = *m - *k;
+		    dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b2589, &
+			    v[(*k + 1) * v_dim1 + 1], ldv, &work[work_offset],
+			     ldwork, &c_b2453, &c__[*k + 1 + c_dim1], ldc);
+		}
+
+/*              W := W * V1 */
+
+		dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &
+			c_b2453, &v[v_offset], ldv, &work[work_offset],
+			ldwork);
+
+/*              C1 := C1 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
+/* L140: */
+		    }
+/* L150: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+
+                W := C1
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L160: */
+		}
+
+/*              W := W * V1' */
+
+		dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b2453,
+			&v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C2 * V2' */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b2453,
+			     &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) *
+			    v_dim1 + 1], ldv, &c_b2453, &work[work_offset],
+			    ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b2453, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V */
+
+		if (*n > *k) {
+
+/*                 C2 := C2 - W * V2 */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "No transpose", m, &i__1, k, &
+			    c_b2589, &work[work_offset], ldwork, &v[(*k + 1) *
+			     v_dim1 + 1], ldv, &c_b2453, &c__[(*k + 1) *
+			    c_dim1 + 1], ldc);
+		}
+
+/*              W := W * V1 */
+
+		dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &
+			c_b2453, &v[v_offset], ldv, &work[work_offset],
+			ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
+/* L170: */
+		    }
+/* L180: */
+		}
+
+	    }
+
+	} else {
+
+/*
+             Let  V =  ( V1  V2 )    (V2: last K columns)
+             where  V2  is unit lower triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+
+                W := C2'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L190: */
+		}
+
+/*              W := W * V2' */
+
+		dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b2453,
+			&v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C1'*V1' */
+
+		    i__1 = *m - *k;
+		    dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b2453, &
+			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b2453, &
+			    work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b2453, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V' * W' */
+
+		if (*m > *k) {
+
+/*                 C1 := C1 - V1' * W' */
+
+		    i__1 = *m - *k;
+		    dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b2589, &
+			    v[v_offset], ldv, &work[work_offset], ldwork, &
+			    c_b2453, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2 */
+
+		dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &
+			c_b2453, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C2 := C2 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L200: */
+		    }
+/* L210: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+
+                W := C2
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L220: */
+		}
+
+/*              W := W * V2' */
+
+		dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b2453,
+			&v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C1 * V1' */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b2453,
+			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b2453,
+			     &work[work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b2453, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V */
+
+		if (*n > *k) {
+
+/*                 C1 := C1 - W * V1 */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "No transpose", m, &i__1, k, &
+			    c_b2589, &work[work_offset], ldwork, &v[v_offset],
+			     ldv, &c_b2453, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2 */
+
+		dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &
+			c_b2453, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L230: */
+		    }
+/* L240: */
+		}
+
+	    }
+
+	}
+    }
+
+    return 0;
+
+/*     End of DLARFB */
+
+} /* dlarfb_ */
+
+/* Subroutine */ int dlarfg_(integer *n, doublereal *alpha, doublereal *x,
+	integer *incx, doublereal *tau)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer j, knt;
+    static doublereal beta;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    static doublereal xnorm;
+
+    static doublereal safmin, rsafmn;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLARFG generates a real elementary reflector H of order n, such
+    that
+
+          H * ( alpha ) = ( beta ),   H' * H = I.
+              (   x   )   (   0  )
+
+    where alpha and beta are scalars, and x is an (n-1)-element real
+    vector. H is represented in the form
+
+          H = I - tau * ( 1 ) * ( 1 v' ) ,
+                        ( v )
+
+    where tau is a real scalar and v is a real (n-1)-element
+    vector.
+
+    If the elements of x are all zero, then tau = 0 and H is taken to be
+    the unit matrix.
+
+    Otherwise  1 <= tau <= 2.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the elementary reflector.
+
+    ALPHA   (input/output) DOUBLE PRECISION
+            On entry, the value alpha.
+            On exit, it is overwritten with the value beta.
+
+    X       (input/output) DOUBLE PRECISION array, dimension
+                           (1+(N-2)*abs(INCX))
+            On entry, the vector x.
+            On exit, it is overwritten with the vector v.
+
+    INCX    (input) INTEGER
+            The increment between elements of X. INCX > 0.
+
+    TAU     (output) DOUBLE PRECISION
+            The value tau.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n <= 1) {
+	*tau = 0.;
+	return 0;
+    }
+
+    i__1 = *n - 1;
+    xnorm = dnrm2_(&i__1, &x[1], incx);
+
+    if (xnorm == 0.) {
+
+/*        H  =  I */
+
+	*tau = 0.;
+    } else {
+
+/*        general case */
+
+	d__1 = dlapy2_(alpha, &xnorm);
+	beta = -d_sign(&d__1, alpha);
+	safmin = SAFEMINIMUM / EPSILON;
+	if (abs(beta) < safmin) {
+
+/*           XNORM, BETA may be inaccurate; scale X and recompute them */
+
+	    rsafmn = 1. / safmin;
+	    knt = 0;
+L10:
+	    ++knt;
+	    i__1 = *n - 1;
+	    dscal_(&i__1, &rsafmn, &x[1], incx);
+	    beta *= rsafmn;
+	    *alpha *= rsafmn;
+	    if (abs(beta) < safmin) {
+		goto L10;
+	    }
+
+/*           New BETA is at most 1, at least SAFMIN */
+
+	    i__1 = *n - 1;
+	    xnorm = dnrm2_(&i__1, &x[1], incx);
+	    d__1 = dlapy2_(alpha, &xnorm);
+	    beta = -d_sign(&d__1, alpha);
+	    *tau = (beta - *alpha) / beta;
+	    i__1 = *n - 1;
+	    d__1 = 1. / (*alpha - beta);
+	    dscal_(&i__1, &d__1, &x[1], incx);
+
+/*           If ALPHA is subnormal, it may lose relative accuracy */
+
+	    *alpha = beta;
+	    i__1 = knt;
+	    for (j = 1; j <= i__1; ++j) {
+		*alpha *= safmin;
+/* L20: */
+	    }
+	} else {
+	    *tau = (beta - *alpha) / beta;
+	    i__1 = *n - 1;
+	    d__1 = 1. / (*alpha - beta);
+	    dscal_(&i__1, &d__1, &x[1], incx);
+	    *alpha = beta;
+	}
+    }
+
+    return 0;
+
+/*     End of DLARFG */
+
+} /* dlarfg_ */
+
+/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer *
+	k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t,
+	integer *ldt)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal vii;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), dtrmv_(char *,
+	    char *, char *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLARFT forms the triangular factor T of a real block reflector H
+    of order n, which is defined as a product of k elementary reflectors.
+
+    If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+
+    If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+
+    If STOREV = 'C', the vector which defines the elementary reflector
+    H(i) is stored in the i-th column of the array V, and
+
+       H  =  I - V * T * V'
+
+    If STOREV = 'R', the vector which defines the elementary reflector
+    H(i) is stored in the i-th row of the array V, and
+
+       H  =  I - V' * T * V
+
+    Arguments
+    =========
+
+    DIRECT  (input) CHARACTER*1
+            Specifies the order in which the elementary reflectors are
+            multiplied to form the block reflector:
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
+
+    STOREV  (input) CHARACTER*1
+            Specifies how the vectors which define the elementary
+            reflectors are stored (see also Further Details):
+            = 'C': columnwise
+            = 'R': rowwise
+
+    N       (input) INTEGER
+            The order of the block reflector H. N >= 0.
+
+    K       (input) INTEGER
+            The order of the triangular factor T (= the number of
+            elementary reflectors). K >= 1.
+
+    V       (input/output) DOUBLE PRECISION array, dimension
+                                 (LDV,K) if STOREV = 'C'
+                                 (LDV,N) if STOREV = 'R'
+            The matrix V. See further details.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i).
+
+    T       (output) DOUBLE PRECISION array, dimension (LDT,K)
+            The k by k triangular factor T of the block reflector.
+            If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+            lower triangular. The rest of the array is not used.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
+
+    Further Details
+    ===============
+
+    The shape of the matrix V and the storage of the vectors which define
+    the H(i) is best illustrated by the following example with n = 5 and
+    k = 3. The elements equal to 1 are not stored; the corresponding
+    array elements are modified but restored on exit. The rest of the
+    array is not used.
+
+    DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+
+                 V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+                     ( v1  1    )                     (     1 v2 v2 v2 )
+                     ( v1 v2  1 )                     (        1 v3 v3 )
+                     ( v1 v2 v3 )
+                     ( v1 v2 v3 )
+
+    DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+
+                 V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+                     ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+                     (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+                     (     1 v3 )
+                     (        1 )
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    --tau;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (lsame_(direct, "F")) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (tau[i__] == 0.) {
+
+/*              H(i)  =  I */
+
+		i__2 = i__;
+		for (j = 1; j <= i__2; ++j) {
+		    t[j + i__ * t_dim1] = 0.;
+/* L10: */
+		}
+	    } else {
+
+/*              general case */
+
+		vii = v[i__ + i__ * v_dim1];
+		v[i__ + i__ * v_dim1] = 1.;
+		if (lsame_(storev, "C")) {
+
+/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
+
+		    i__2 = *n - i__ + 1;
+		    i__3 = i__ - 1;
+		    d__1 = -tau[i__];
+		    dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1],
+			     ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b2467, &t[
+			    i__ * t_dim1 + 1], &c__1);
+		} else {
+
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
+
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__ + 1;
+		    d__1 = -tau[i__];
+		    dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ *
+			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
+			    c_b2467, &t[i__ * t_dim1 + 1], &c__1);
+		}
+		v[i__ + i__ * v_dim1] = vii;
+
+/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+		t[i__ + i__ * t_dim1] = tau[i__];
+	    }
+/* L20: */
+	}
+    } else {
+	for (i__ = *k; i__ >= 1; --i__) {
+	    if (tau[i__] == 0.) {
+
+/*              H(i)  =  I */
+
+		i__1 = *k;
+		for (j = i__; j <= i__1; ++j) {
+		    t[j + i__ * t_dim1] = 0.;
+/* L30: */
+		}
+	    } else {
+
+/*              general case */
+
+		if (i__ < *k) {
+		    if (lsame_(storev, "C")) {
+			vii = v[*n - *k + i__ + i__ * v_dim1];
+			v[*n - *k + i__ + i__ * v_dim1] = 1.;
+
+/*
+                      T(i+1:k,i) :=
+                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+*/
+
+			i__1 = *n - *k + i__;
+			i__2 = *k - i__;
+			d__1 = -tau[i__];
+			dgemv_("Transpose", &i__1, &i__2, &d__1, &v[(i__ + 1)
+				* v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], &
+				c__1, &c_b2467, &t[i__ + 1 + i__ * t_dim1], &
+				c__1);
+			v[*n - *k + i__ + i__ * v_dim1] = vii;
+		    } else {
+			vii = v[i__ + (*n - *k + i__) * v_dim1];
+			v[i__ + (*n - *k + i__) * v_dim1] = 1.;
+
+/*
+                      T(i+1:k,i) :=
+                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+*/
+
+			i__1 = *k - i__;
+			i__2 = *n - *k + i__;
+			d__1 = -tau[i__];
+			dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ +
+				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
+				c_b2467, &t[i__ + 1 + i__ * t_dim1], &c__1);
+			v[i__ + (*n - *k + i__) * v_dim1] = vii;
+		    }
+
+/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+		    i__1 = *k - i__;
+		    dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
+			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+			     t_dim1], &c__1)
+			    ;
+		}
+		t[i__ + i__ * t_dim1] = tau[i__];
+	    }
+/* L40: */
+	}
+    }
+    return 0;
+
+/*     End of DLARFT */
+
+} /* dlarft_ */
+
+/* Subroutine */ int dlarfx_(char *side, integer *m, integer *n, doublereal *
+	v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, i__1;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer j;
+    static doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5,
+	    v6, v7, v8, v9, t10, v10, sum;
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLARFX applies a real elementary reflector H to a real m by n
+    matrix C, from either the left or the right. H is represented in the
+    form
+
+          H = I - tau * v * v'
+
+    where tau is a real scalar and v is a real vector.
+
+    If tau = 0, then H is taken to be the unit matrix
+
+    This version uses inline code if H has order < 11.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    V       (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L'
+                                       or (N) if SIDE = 'R'
+            The vector v in the representation of H.
+
+    TAU     (input) DOUBLE PRECISION
+            The value tau in the representation of H.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDA >= (1,M).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension
+                        (N) if SIDE = 'L'
+                        or (M) if SIDE = 'R'
+            WORK is not referenced if H has order < 11.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    if (*tau == 0.) {
+	return 0;
+    }
+    if (lsame_(side, "L")) {
+
+/*        Form  H * C, where H has order m. */
+
+	switch (*m) {
+	    case 1:  goto L10;
+	    case 2:  goto L30;
+	    case 3:  goto L50;
+	    case 4:  goto L70;
+	    case 5:  goto L90;
+	    case 6:  goto L110;
+	    case 7:  goto L130;
+	    case 8:  goto L150;
+	    case 9:  goto L170;
+	    case 10:  goto L190;
+	}
+
+/*
+          Code for general M
+
+          w := C'*v
+*/
+
+	dgemv_("Transpose", m, n, &c_b2453, &c__[c_offset], ldc, &v[1], &c__1,
+		 &c_b2467, &work[1], &c__1);
+
+/*        C := C - tau * v * w' */
+
+	d__1 = -(*tau);
+	dger_(m, n, &d__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc)
+		;
+	goto L410;
+L10:
+
+/*        Special code for 1 x 1 Householder */
+
+	t1 = 1. - *tau * v[1] * v[1];
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1];
+/* L20: */
+	}
+	goto L410;
+L30:
+
+/*        Special code for 2 x 2 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+/* L40: */
+	}
+	goto L410;
+L50:
+
+/*        Special code for 3 x 3 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+/* L60: */
+	}
+	goto L410;
+L70:
+
+/*        Special code for 4 x 4 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+/* L80: */
+	}
+	goto L410;
+L90:
+
+/*        Special code for 5 x 5 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+/* L100: */
+	}
+	goto L410;
+L110:
+
+/*        Special code for 6 x 6 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+/* L120: */
+	}
+	goto L410;
+L130:
+
+/*        Special code for 7 x 7 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+/* L140: */
+	}
+	goto L410;
+L150:
+
+/*        Special code for 8 x 8 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+	    c__[j * c_dim1 + 8] -= sum * t8;
+/* L160: */
+	}
+	goto L410;
+L170:
+
+/*        Special code for 9 x 9 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	v9 = v[9];
+	t9 = *tau * v9;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j *
+		    c_dim1 + 9];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+	    c__[j * c_dim1 + 8] -= sum * t8;
+	    c__[j * c_dim1 + 9] -= sum * t9;
+/* L180: */
+	}
+	goto L410;
+L190:
+
+/*        Special code for 10 x 10 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	v9 = v[9];
+	t9 = *tau * v9;
+	v10 = v[10];
+	t10 = *tau * v10;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j *
+		    c_dim1 + 9] + v10 * c__[j * c_dim1 + 10];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+	    c__[j * c_dim1 + 8] -= sum * t8;
+	    c__[j * c_dim1 + 9] -= sum * t9;
+	    c__[j * c_dim1 + 10] -= sum * t10;
+/* L200: */
+	}
+	goto L410;
+    } else {
+
+/*        Form  C * H, where H has order n. */
+
+	switch (*n) {
+	    case 1:  goto L210;
+	    case 2:  goto L230;
+	    case 3:  goto L250;
+	    case 4:  goto L270;
+	    case 5:  goto L290;
+	    case 6:  goto L310;
+	    case 7:  goto L330;
+	    case 8:  goto L350;
+	    case 9:  goto L370;
+	    case 10:  goto L390;
+	}
+
+/*
+          Code for general N
+
+          w := C * v
+*/
+
+	dgemv_("No transpose", m, n, &c_b2453, &c__[c_offset], ldc, &v[1], &
+		c__1, &c_b2467, &work[1], &c__1);
+
+/*        C := C - tau * w * v' */
+
+	d__1 = -(*tau);
+	dger_(m, n, &d__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc)
+		;
+	goto L410;
+L210:
+
+/*        Special code for 1 x 1 Householder */
+
+	t1 = 1. - *tau * v[1] * v[1];
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    c__[j + c_dim1] = t1 * c__[j + c_dim1];
+/* L220: */
+	}
+	goto L410;
+L230:
+
+/*        Special code for 2 x 2 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
+/* L240: */
+	}
+	goto L410;
+L250:
+
+/*        Special code for 3 x 3 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+/* L260: */
+	}
+	goto L410;
+L270:
+
+/*        Special code for 4 x 4 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
+/* L280: */
+	}
+	goto L410;
+L290:
+
+/*        Special code for 5 x 5 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
+		    c__[j + c_dim1 * 5];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+/* L300: */
+	}
+	goto L410;
+L310:
+
+/*        Special code for 6 x 6 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
+		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+/* L320: */
+	}
+	goto L410;
+L330:
+
+/*        Special code for 7 x 7 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
+		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[
+		    j + c_dim1 * 7];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+	    c__[j + c_dim1 * 7] -= sum * t7;
+/* L340: */
+	}
+	goto L410;
+L350:
+
+/*        Special code for 8 x 8 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
+		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[
+		    j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+	    c__[j + c_dim1 * 7] -= sum * t7;
+	    c__[j + (c_dim1 << 3)] -= sum * t8;
+/* L360: */
+	}
+	goto L410;
+L370:
+
+/*        Special code for 9 x 9 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	v9 = v[9];
+	t9 = *tau * v9;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
+		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[
+		    j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[
+		    j + c_dim1 * 9];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+	    c__[j + c_dim1 * 7] -= sum * t7;
+	    c__[j + (c_dim1 << 3)] -= sum * t8;
+	    c__[j + c_dim1 * 9] -= sum * t9;
+/* L380: */
+	}
+	goto L410;
+L390:
+
+/*        Special code for 10 x 10 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	v9 = v[9];
+	t9 = *tau * v9;
+	v10 = v[10];
+	t10 = *tau * v10;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
+		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
+		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[
+		    j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[
+		    j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + (c_dim1 << 1)] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + (c_dim1 << 2)] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+	    c__[j + c_dim1 * 7] -= sum * t7;
+	    c__[j + (c_dim1 << 3)] -= sum * t8;
+	    c__[j + c_dim1 * 9] -= sum * t9;
+	    c__[j + c_dim1 * 10] -= sum * t10;
+/* L400: */
+	}
+	goto L410;
+    }
+L410:
+    return 0;
+
+/*     End of DLARFX */
+
+} /* dlarfx_ */
+
+/* Subroutine */ int dlartg_(doublereal *f, doublereal *g, doublereal *cs,
+	doublereal *sn, doublereal *r__)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__;
+    static doublereal f1, g1, eps, scale;
+    static integer count;
+    static doublereal safmn2, safmx2;
+
+    static doublereal safmin;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLARTG generate a plane rotation so that
+
+       [  CS  SN  ]  .  [ F ]  =  [ R ]   where CS**2 + SN**2 = 1.
+       [ -SN  CS  ]     [ G ]     [ 0 ]
+
+    This is a slower, more accurate version of the BLAS1 routine DROTG,
+    with the following other differences:
+       F and G are unchanged on return.
+       If G=0, then CS=1 and SN=0.
+       If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any
+          floating point operations (saves work in DBDSQR when
+          there are zeros on the diagonal).
+
+    If F exceeds G in magnitude, CS will be positive.
+
+    Arguments
+    =========
+
+    F       (input) DOUBLE PRECISION
+            The first component of vector to be rotated.
+
+    G       (input) DOUBLE PRECISION
+            The second component of vector to be rotated.
+
+    CS      (output) DOUBLE PRECISION
+            The cosine of the rotation.
+
+    SN      (output) DOUBLE PRECISION
+            The sine of the rotation.
+
+    R       (output) DOUBLE PRECISION
+            The nonzero component of the rotated vector.
+
+    =====================================================================
+*/
+
+
+    if (first) {
+	first = FALSE_;
+	safmin = SAFEMINIMUM;
+	eps = EPSILON;
+	d__1 = BASE;
+	i__1 = (integer) (log(safmin / eps) / log(BASE) /
+		2.);
+	safmn2 = pow_di(&d__1, &i__1);
+	safmx2 = 1. / safmn2;
+    }
+    if (*g == 0.) {
+	*cs = 1.;
+	*sn = 0.;
+	*r__ = *f;
+    } else if (*f == 0.) {
+	*cs = 0.;
+	*sn = 1.;
+	*r__ = *g;
+    } else {
+	f1 = *f;
+	g1 = *g;
+/* Computing MAX */
+	d__1 = abs(f1), d__2 = abs(g1);
+	scale = max(d__1,d__2);
+	if (scale >= safmx2) {
+	    count = 0;
+L10:
+	    ++count;
+	    f1 *= safmn2;
+	    g1 *= safmn2;
+/* Computing MAX */
+	    d__1 = abs(f1), d__2 = abs(g1);
+	    scale = max(d__1,d__2);
+	    if (scale >= safmx2) {
+		goto L10;
+	    }
+/* Computing 2nd power */
+	    d__1 = f1;
+/* Computing 2nd power */
+	    d__2 = g1;
+	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
+	    *cs = f1 / *r__;
+	    *sn = g1 / *r__;
+	    i__1 = count;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		*r__ *= safmx2;
+/* L20: */
+	    }
+	} else if (scale <= safmn2) {
+	    count = 0;
+L30:
+	    ++count;
+	    f1 *= safmx2;
+	    g1 *= safmx2;
+/* Computing MAX */
+	    d__1 = abs(f1), d__2 = abs(g1);
+	    scale = max(d__1,d__2);
+	    if (scale <= safmn2) {
+		goto L30;
+	    }
+/* Computing 2nd power */
+	    d__1 = f1;
+/* Computing 2nd power */
+	    d__2 = g1;
+	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
+	    *cs = f1 / *r__;
+	    *sn = g1 / *r__;
+	    i__1 = count;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		*r__ *= safmn2;
+/* L40: */
+	    }
+	} else {
+/* Computing 2nd power */
+	    d__1 = f1;
+/* Computing 2nd power */
+	    d__2 = g1;
+	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
+	    *cs = f1 / *r__;
+	    *sn = g1 / *r__;
+	}
+	if (abs(*f) > abs(*g) && *cs < 0.) {
+	    *cs = -(*cs);
+	    *sn = -(*sn);
+	    *r__ = -(*r__);
+	}
+    }
+    return 0;
+
+/*     End of DLARTG */
+
+} /* dlartg_ */
+
+/* Subroutine */ int dlas2_(doublereal *f, doublereal *g, doublereal *h__,
+	doublereal *ssmin, doublereal *ssmax)
+{
+    /* System generated locals */
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal c__, fa, ga, ha, as, at, au, fhmn, fhmx;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLAS2  computes the singular values of the 2-by-2 matrix
+       [  F   G  ]
+       [  0   H  ].
+    On return, SSMIN is the smaller singular value and SSMAX is the
+    larger singular value.
+
+    Arguments
+    =========
+
+    F       (input) DOUBLE PRECISION
+            The (1,1) element of the 2-by-2 matrix.
+
+    G       (input) DOUBLE PRECISION
+            The (1,2) element of the 2-by-2 matrix.
+
+    H       (input) DOUBLE PRECISION
+            The (2,2) element of the 2-by-2 matrix.
+
+    SSMIN   (output) DOUBLE PRECISION
+            The smaller singular value.
+
+    SSMAX   (output) DOUBLE PRECISION
+            The larger singular value.
+
+    Further Details
+    ===============
+
+    Barring over/underflow, all output quantities are correct to within
+    a few units in the last place (ulps), even in the absence of a guard
+    digit in addition/subtraction.
+
+    In IEEE arithmetic, the code works correctly if one matrix element is
+    infinite.
+
+    Overflow will not occur unless the largest singular value itself
+    overflows, or is within a few ulps of overflow. (On machines with
+    partial overflow, like the Cray, overflow may occur if the largest
+    singular value is within a factor of 2 of overflow.)
+
+    Underflow is harmless if underflow is gradual. Otherwise, results
+    may correspond to a matrix modified by perturbations of size near
+    the underflow threshold.
+
+    ====================================================================
+*/
+
+
+    fa = abs(*f);
+    ga = abs(*g);
+    ha = abs(*h__);
+    fhmn = min(fa,ha);
+    fhmx = max(fa,ha);
+    if (fhmn == 0.) {
+	*ssmin = 0.;
+	if (fhmx == 0.) {
+	    *ssmax = ga;
+	} else {
+/* Computing 2nd power */
+	    d__1 = min(fhmx,ga) / max(fhmx,ga);
+	    *ssmax = max(fhmx,ga) * sqrt(d__1 * d__1 + 1.);
+	}
+    } else {
+	if (ga < fhmx) {
+	    as = fhmn / fhmx + 1.;
+	    at = (fhmx - fhmn) / fhmx;
+/* Computing 2nd power */
+	    d__1 = ga / fhmx;
+	    au = d__1 * d__1;
+	    c__ = 2. / (sqrt(as * as + au) + sqrt(at * at + au));
+	    *ssmin = fhmn * c__;
+	    *ssmax = fhmx / c__;
+	} else {
+	    au = fhmx / ga;
+	    if (au == 0.) {
+
+/*
+                Avoid possible harmful underflow if exponent range
+                asymmetric (true SSMIN may not underflow even if
+                AU underflows)
+*/
+
+		*ssmin = fhmn * fhmx / ga;
+		*ssmax = ga;
+	    } else {
+		as = fhmn / fhmx + 1.;
+		at = (fhmx - fhmn) / fhmx;
+/* Computing 2nd power */
+		d__1 = as * au;
+/* Computing 2nd power */
+		d__2 = at * au;
+		c__ = 1. / (sqrt(d__1 * d__1 + 1.) + sqrt(d__2 * d__2 + 1.));
+		*ssmin = fhmn * c__ * au;
+		*ssmin += *ssmin;
+		*ssmax = ga / (c__ + c__);
+	    }
+	}
+    }
+    return 0;
+
+/*     End of DLAS2 */
+
+} /* dlas2_ */
+
+/* Subroutine */ int dlascl_(char *type__, integer *kl, integer *ku,
+	doublereal *cfrom, doublereal *cto, integer *m, integer *n,
+	doublereal *a, integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    static integer i__, j, k1, k2, k3, k4;
+    static doublereal mul, cto1;
+    static logical done;
+    static doublereal ctoc;
+    extern logical lsame_(char *, char *);
+    static integer itype;
+    static doublereal cfrom1;
+
+    static doublereal cfromc;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal bignum, smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLASCL multiplies the M by N real matrix A by the real scalar
+    CTO/CFROM.  This is done without over/underflow as long as the final
+    result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
+    A may be full, upper triangular, lower triangular, upper Hessenberg,
+    or banded.
+
+    Arguments
+    =========
+
+    TYPE    (input) CHARACTER*1
+            TYPE indices the storage type of the input matrix.
+            = 'G':  A is a full matrix.
+            = 'L':  A is a lower triangular matrix.
+            = 'U':  A is an upper triangular matrix.
+            = 'H':  A is an upper Hessenberg matrix.
+            = 'B':  A is a symmetric band matrix with lower bandwidth KL
+                    and upper bandwidth KU and with the only the lower
+                    half stored.
+            = 'Q':  A is a symmetric band matrix with lower bandwidth KL
+                    and upper bandwidth KU and with the only the upper
+                    half stored.
+            = 'Z':  A is a band matrix with lower bandwidth KL and upper
+                    bandwidth KU.
+
+    KL      (input) INTEGER
+            The lower bandwidth of A.  Referenced only if TYPE = 'B',
+            'Q' or 'Z'.
+
+    KU      (input) INTEGER
+            The upper bandwidth of A.  Referenced only if TYPE = 'B',
+            'Q' or 'Z'.
+
+    CFROM   (input) DOUBLE PRECISION
+    CTO     (input) DOUBLE PRECISION
+            The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
+            without over/underflow if the final result CTO*A(I,J)/CFROM
+            can be represented without over/underflow.  CFROM must be
+            nonzero.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,M)
+            The matrix to be multiplied by CTO/CFROM.  See TYPE for the
+            storage type.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    INFO    (output) INTEGER
+            0  - successful exit
+            <0 - if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+
+    if (lsame_(type__, "G")) {
+	itype = 0;
+    } else if (lsame_(type__, "L")) {
+	itype = 1;
+    } else if (lsame_(type__, "U")) {
+	itype = 2;
+    } else if (lsame_(type__, "H")) {
+	itype = 3;
+    } else if (lsame_(type__, "B")) {
+	itype = 4;
+    } else if (lsame_(type__, "Q")) {
+	itype = 5;
+    } else if (lsame_(type__, "Z")) {
+	itype = 6;
+    } else {
+	itype = -1;
+    }
+
+    if (itype == -1) {
+	*info = -1;
+    } else if (*cfrom == 0.) {
+	*info = -4;
+    } else if (*m < 0) {
+	*info = -6;
+    } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) {
+	*info = -7;
+    } else if (itype <= 3 && *lda < max(1,*m)) {
+	*info = -9;
+    } else if (itype >= 4) {
+/* Computing MAX */
+	i__1 = *m - 1;
+	if (*kl < 0 || *kl > max(i__1,0)) {
+	    *info = -2;
+	} else /* if(complicated condition) */ {
+/* Computing MAX */
+	    i__1 = *n - 1;
+	    if (*ku < 0 || *ku > max(i__1,0) || (itype == 4 || itype == 5) &&
+		    *kl != *ku) {
+		*info = -3;
+	    } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < *
+		    ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) {
+		*info = -9;
+	    }
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASCL", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *m == 0) {
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    smlnum = SAFEMINIMUM;
+    bignum = 1. / smlnum;
+
+    cfromc = *cfrom;
+    ctoc = *cto;
+
+L10:
+    cfrom1 = cfromc * smlnum;
+    cto1 = ctoc / bignum;
+    if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
+	mul = smlnum;
+	done = FALSE_;
+	cfromc = cfrom1;
+    } else if (abs(cto1) > abs(cfromc)) {
+	mul = bignum;
+	done = FALSE_;
+	ctoc = cto1;
+    } else {
+	mul = ctoc / cfromc;
+	done = TRUE_;
+    }
+
+    if (itype == 0) {
+
+/*        Full matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L20: */
+	    }
+/* L30: */
+	}
+
+    } else if (itype == 1) {
+
+/*        Lower triangular matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L40: */
+	    }
+/* L50: */
+	}
+
+    } else if (itype == 2) {
+
+/*        Upper triangular matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L60: */
+	    }
+/* L70: */
+	}
+
+    } else if (itype == 3) {
+
+/*        Upper Hessenberg matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = j + 1;
+	    i__2 = min(i__3,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L80: */
+	    }
+/* L90: */
+	}
+
+    } else if (itype == 4) {
+
+/*        Lower half of a symmetric band matrix */
+
+	k3 = *kl + 1;
+	k4 = *n + 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = k3, i__4 = k4 - j;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L100: */
+	    }
+/* L110: */
+	}
+
+    } else if (itype == 5) {
+
+/*        Upper half of a symmetric band matrix */
+
+	k1 = *ku + 2;
+	k3 = *ku + 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__2 = k1 - j;
+	    i__3 = k3;
+	    for (i__ = max(i__2,1); i__ <= i__3; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L120: */
+	    }
+/* L130: */
+	}
+
+    } else if (itype == 6) {
+
+/*        Band matrix */
+
+	k1 = *kl + *ku + 2;
+	k2 = *kl + 1;
+	k3 = (*kl << 1) + *ku + 1;
+	k4 = *kl + *ku + 1 + *m;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__3 = k1 - j;
+/* Computing MIN */
+	    i__4 = k3, i__5 = k4 - j;
+	    i__2 = min(i__4,i__5);
+	    for (i__ = max(i__3,k2); i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L140: */
+	    }
+/* L150: */
+	}
+
+    }
+
+    if (! done) {
+	goto L10;
+    }
+
+    return 0;
+
+/*     End of DLASCL */
+
+} /* dlascl_ */
+
+/* Subroutine */ int dlasd0_(integer *n, integer *sqre, doublereal *d__,
+	doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *
+	ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer *
+	info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf,
+	    iwk, lvl, ndb1, nlp1, nrp1;
+    static doublereal beta;
+    static integer idxq, nlvl;
+    static doublereal alpha;
+    static integer inode, ndiml, idxqc, ndimr, itemp, sqrei;
+    extern /* Subroutine */ int dlasd1_(integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     doublereal *, integer *, integer *, integer *, doublereal *,
+	    integer *), dlasdq_(char *, integer *, integer *, integer *,
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlasdt_(integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *), xerbla_(
+	    char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    Using a divide and conquer approach, DLASD0 computes the singular
+    value decomposition (SVD) of a real upper bidiagonal N-by-M
+    matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
+    The algorithm computes orthogonal matrices U and VT such that
+    B = U * S * VT. The singular values S are overwritten on D.
+
+    A related subroutine, DLASDA, computes only the singular values,
+    and optionally, the singular vectors in compact form.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           On entry, the row dimension of the upper bidiagonal matrix.
+           This is also the dimension of the main diagonal array D.
+
+    SQRE   (input) INTEGER
+           Specifies the column dimension of the bidiagonal matrix.
+           = 0: The bidiagonal matrix has column dimension M = N;
+           = 1: The bidiagonal matrix has column dimension M = N+1;
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry D contains the main diagonal of the bidiagonal
+           matrix.
+           On exit D, if INFO = 0, contains its singular values.
+
+    E      (input) DOUBLE PRECISION array, dimension (M-1)
+           Contains the subdiagonal entries of the bidiagonal matrix.
+           On exit, E has been destroyed.
+
+    U      (output) DOUBLE PRECISION array, dimension at least (LDQ, N)
+           On exit, U contains the left singular vectors.
+
+    LDU    (input) INTEGER
+           On entry, leading dimension of U.
+
+    VT     (output) DOUBLE PRECISION array, dimension at least (LDVT, M)
+           On exit, VT' contains the right singular vectors.
+
+    LDVT   (input) INTEGER
+           On entry, leading dimension of VT.
+
+    SMLSIZ (input) INTEGER
+           On entry, maximum size of the subproblems at the
+           bottom of the computation tree.
+
+    IWORK  INTEGER work array.
+           Dimension must be at least (8 * N)
+
+    WORK   DOUBLE PRECISION work array.
+           Dimension must be at least (3 * M**2 + 2 * M)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --iwork;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -1;
+    } else if (*sqre < 0 || *sqre > 1) {
+	*info = -2;
+    }
+
+    m = *n + *sqre;
+
+    if (*ldu < *n) {
+	*info = -6;
+    } else if (*ldvt < m) {
+	*info = -8;
+    } else if (*smlsiz < 3) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD0", &i__1);
+	return 0;
+    }
+
+/*     If the input matrix is too small, call DLASDQ to find the SVD. */
+
+    if (*n <= *smlsiz) {
+	dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset],
+		ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info);
+	return 0;
+    }
+
+/*     Set up the computation tree. */
+
+    inode = 1;
+    ndiml = inode + *n;
+    ndimr = ndiml + *n;
+    idxq = ndimr + *n;
+    iwk = idxq + *n;
+    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+	    smlsiz);
+
+/*
+       For the nodes on bottom level of the tree, solve
+       their subproblems by DLASDQ.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    ncc = 0;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+
+/*
+       IC : center row of each node
+       NL : number of rows of left  subproblem
+       NR : number of rows of right subproblem
+       NLF: starting row of the left   subproblem
+       NRF: starting row of the right  subproblem
+*/
+
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nlp1 = nl + 1;
+	nr = iwork[ndimr + i1];
+	nrp1 = nr + 1;
+	nlf = ic - nl;
+	nrf = ic + 1;
+	sqrei = 1;
+	dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[
+		nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[
+		nlf + nlf * u_dim1], ldu, &work[1], info);
+	if (*info != 0) {
+	    return 0;
+	}
+	itemp = idxq + nlf - 2;
+	i__2 = nl;
+	for (j = 1; j <= i__2; ++j) {
+	    iwork[itemp + j] = j;
+/* L10: */
+	}
+	if (i__ == nd) {
+	    sqrei = *sqre;
+	} else {
+	    sqrei = 1;
+	}
+	nrp1 = nr + sqrei;
+	dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[
+		nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[
+		nrf + nrf * u_dim1], ldu, &work[1], info);
+	if (*info != 0) {
+	    return 0;
+	}
+	itemp = idxq + ic;
+	i__2 = nr;
+	for (j = 1; j <= i__2; ++j) {
+	    iwork[itemp + j - 1] = j;
+/* L20: */
+	}
+/* L30: */
+    }
+
+/*     Now conquer each subproblem bottom-up. */
+
+    for (lvl = nlvl; lvl >= 1; --lvl) {
+
+/*
+          Find the first node LF and last node LL on the
+          current level LVL.
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__1 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__1);
+	    ll = (lf << 1) - 1;
+	}
+	i__1 = ll;
+	for (i__ = lf; i__ <= i__1; ++i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    if (*sqre == 0 && i__ == ll) {
+		sqrei = *sqre;
+	    } else {
+		sqrei = 1;
+	    }
+	    idxqc = idxq + nlf - 1;
+	    alpha = d__[ic];
+	    beta = e[ic];
+	    dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf *
+		     u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[
+		    idxqc], &iwork[iwk], &work[1], info);
+	    if (*info != 0) {
+		return 0;
+	    }
+/* L40: */
+	}
+/* L50: */
+    }
+
+    return 0;
+
+/*     End of DLASD0 */
+
+} /* dlasd0_ */
+
+/* Subroutine */ int dlasd1_(integer *nl, integer *nr, integer *sqre,
+	doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u,
+	integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer *
+	iwork, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2,
+	    idxc, idxp, ldvt2;
+    extern /* Subroutine */ int dlasd2_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *), dlasd3_(
+	    integer *, integer *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, doublereal *, integer *),
+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, integer *, doublereal *, integer *, integer *),
+	     dlamrg_(integer *, integer *, doublereal *, integer *, integer *,
+	     integer *);
+    static integer isigma;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal orgnrm;
+    static integer coltyp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
+    where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0.
+
+    A related subroutine DLASD7 handles the case in which the singular
+    values (and the singular vectors in factored form) are desired.
+
+    DLASD1 computes the SVD as follows:
+
+                  ( D1(in)  0    0     0 )
+      B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
+                  (   0     0   D2(in) 0 )
+
+        = U(out) * ( D(out) 0) * VT(out)
+
+    where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M
+    with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
+    elsewhere; and the entry b is empty if SQRE = 0.
+
+    The left singular vectors of the original matrix are stored in U, and
+    the transpose of the right singular vectors are stored in VT, and the
+    singular values are in D.  The algorithm consists of three stages:
+
+       The first stage consists of deflating the size of the problem
+       when there are multiple singular values or when there are zeros in
+       the Z vector.  For each such occurence the dimension of the
+       secular equation problem is reduced by one.  This stage is
+       performed by the routine DLASD2.
+
+       The second stage consists of calculating the updated
+       singular values. This is done by finding the square roots of the
+       roots of the secular equation via the routine DLASD4 (as called
+       by DLASD3). This routine also calculates the singular vectors of
+       the current problem.
+
+       The final stage consists of computing the updated singular vectors
+       directly using the updated singular values.  The singular vectors
+       for the current problem are multiplied with the singular vectors
+       from the overall problem.
+
+    Arguments
+    =========
+
+    NL     (input) INTEGER
+           The row dimension of the upper block.  NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block.  NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has row dimension N = NL + NR + 1,
+           and column dimension M = N + SQRE.
+
+    D      (input/output) DOUBLE PRECISION array,
+                          dimension (N = NL+NR+1).
+           On entry D(1:NL,1:NL) contains the singular values of the
+           upper block; and D(NL+2:N) contains the singular values of
+           the lower block. On exit D(1:N) contains the singular values
+           of the modified matrix.
+
+    ALPHA  (input) DOUBLE PRECISION
+           Contains the diagonal element associated with the added row.
+
+    BETA   (input) DOUBLE PRECISION
+           Contains the off-diagonal element associated with the added
+           row.
+
+    U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
+           On entry U(1:NL, 1:NL) contains the left singular vectors of
+           the upper block; U(NL+2:N, NL+2:N) contains the left singular
+           vectors of the lower block. On exit U contains the left
+           singular vectors of the bidiagonal matrix.
+
+    LDU    (input) INTEGER
+           The leading dimension of the array U.  LDU >= max( 1, N ).
+
+    VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
+           where M = N + SQRE.
+           On entry VT(1:NL+1, 1:NL+1)' contains the right singular
+           vectors of the upper block; VT(NL+2:M, NL+2:M)' contains
+           the right singular vectors of the lower block. On exit
+           VT' contains the right singular vectors of the
+           bidiagonal matrix.
+
+    LDVT   (input) INTEGER
+           The leading dimension of the array VT.  LDVT >= max( 1, M ).
+
+    IDXQ  (output) INTEGER array, dimension(N)
+           This contains the permutation which will reintegrate the
+           subproblem just solved back into sorted order, i.e.
+           D( IDXQ( I = 1, N ) ) will be in ascending order.
+
+    IWORK  (workspace) INTEGER array, dimension( 4 * N )
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M )
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --idxq;
+    --iwork;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*nl < 1) {
+	*info = -1;
+    } else if (*nr < 1) {
+	*info = -2;
+    } else if (*sqre < 0 || *sqre > 1) {
+	*info = -3;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD1", &i__1);
+	return 0;
+    }
+
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in DLASD2 and DLASD3.
+*/
+
+    ldu2 = n;
+    ldvt2 = m;
+
+    iz = 1;
+    isigma = iz + m;
+    iu2 = isigma + n;
+    ivt2 = iu2 + ldu2 * n;
+    iq = ivt2 + ldvt2 * m;
+
+    idx = 1;
+    idxc = idx + n;
+    coltyp = idxc + n;
+    idxp = coltyp + n;
+
+/*
+       Scale.
+
+   Computing MAX
+*/
+    d__1 = abs(*alpha), d__2 = abs(*beta);
+    orgnrm = max(d__1,d__2);
+    d__[*nl + 1] = 0.;
+    i__1 = n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) > orgnrm) {
+	    orgnrm = (d__1 = d__[i__], abs(d__1));
+	}
+/* L10: */
+    }
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, &n, &c__1, &d__[1], &n,
+	    info);
+    *alpha /= orgnrm;
+    *beta /= orgnrm;
+
+/*     Deflate singular values. */
+
+    dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset],
+	    ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, &
+	    work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], &
+	    idxq[1], &iwork[coltyp], info);
+
+/*     Solve Secular Equation and update singular vectors. */
+
+    ldq = k;
+    dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[
+	    u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[
+	    ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info);
+    if (*info != 0) {
+	return 0;
+    }
+
+/*     Unscale. */
+
+    dlascl_("G", &c__0, &c__0, &c_b2453, &orgnrm, &n, &c__1, &d__[1], &n,
+	    info);
+
+/*     Prepare the IDXQ sorting permutation. */
+
+    n1 = k;
+    n2 = n - k;
+    dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
+
+    return 0;
+
+/*     End of DLASD1 */
+
+} /* dlasd1_ */
+
+/* Subroutine */ int dlasd2_(integer *nl, integer *nr, integer *sqre, integer
+	*k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal *
+	beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt,
+	doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2,
+	integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *
+	idxq, integer *coltyp, integer *info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset,
+	    vt2_dim1, vt2_offset, i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static doublereal c__;
+    static integer i__, j, m, n;
+    static doublereal s;
+    static integer k2;
+    static doublereal z1;
+    static integer ct, jp;
+    static doublereal eps, tau, tol;
+    static integer psm[4], nlp1, nlp2, idxi, idxj;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer ctot[4], idxjp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer jprev;
+
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *), xerbla_(char *,
+	    integer *);
+    static doublereal hlftol;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASD2 merges the two sets of singular values together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    singular values are close together or if there is a tiny entry in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    DLASD2 is called from DLASD1.
+
+    Arguments
+    =========
+
+    NL     (input) INTEGER
+           The row dimension of the upper block.  NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block.  NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has N = NL + NR + 1 rows and
+           M = N + SQRE >= N columns.
+
+    K      (output) INTEGER
+           Contains the dimension of the non-deflated matrix,
+           This is the order of the related secular equation. 1 <= K <=N.
+
+    D      (input/output) DOUBLE PRECISION array, dimension(N)
+           On entry D contains the singular values of the two submatrices
+           to be combined.  On exit D contains the trailing (N-K) updated
+           singular values (those which were deflated) sorted into
+           increasing order.
+
+    ALPHA  (input) DOUBLE PRECISION
+           Contains the diagonal element associated with the added row.
+
+    BETA   (input) DOUBLE PRECISION
+           Contains the off-diagonal element associated with the added
+           row.
+
+    U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
+           On entry U contains the left singular vectors of two
+           submatrices in the two square blocks with corners at (1,1),
+           (NL, NL), and (NL+2, NL+2), (N,N).
+           On exit U contains the trailing (N-K) updated left singular
+           vectors (those which were deflated) in its last N-K columns.
+
+    LDU    (input) INTEGER
+           The leading dimension of the array U.  LDU >= N.
+
+    Z      (output) DOUBLE PRECISION array, dimension(N)
+           On exit Z contains the updating row vector in the secular
+           equation.
+
+    DSIGMA (output) DOUBLE PRECISION array, dimension (N)
+           Contains a copy of the diagonal elements (K-1 singular values
+           and one zero) in the secular equation.
+
+    U2     (output) DOUBLE PRECISION array, dimension(LDU2,N)
+           Contains a copy of the first K-1 left singular vectors which
+           will be used by DLASD3 in a matrix multiply (DGEMM) to solve
+           for the new left singular vectors. U2 is arranged into four
+           blocks. The first block contains a column with 1 at NL+1 and
+           zero everywhere else; the second block contains non-zero
+           entries only at and above NL; the third contains non-zero
+           entries only below NL+1; and the fourth is dense.
+
+    LDU2   (input) INTEGER
+           The leading dimension of the array U2.  LDU2 >= N.
+
+    VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
+           On entry VT' contains the right singular vectors of two
+           submatrices in the two square blocks with corners at (1,1),
+           (NL+1, NL+1), and (NL+2, NL+2), (M,M).
+           On exit VT' contains the trailing (N-K) updated right singular
+           vectors (those which were deflated) in its last N-K columns.
+           In case SQRE =1, the last row of VT spans the right null
+           space.
+
+    LDVT   (input) INTEGER
+           The leading dimension of the array VT.  LDVT >= M.
+
+    VT2    (output) DOUBLE PRECISION array, dimension(LDVT2,N)
+           VT2' contains a copy of the first K right singular vectors
+           which will be used by DLASD3 in a matrix multiply (DGEMM) to
+           solve for the new right singular vectors. VT2 is arranged into
+           three blocks. The first block contains a row that corresponds
+           to the special 0 diagonal element in SIGMA; the second block
+           contains non-zeros only at and before NL +1; the third block
+           contains non-zeros only at and after  NL +2.
+
+    LDVT2  (input) INTEGER
+           The leading dimension of the array VT2.  LDVT2 >= M.
+
+    IDXP   (workspace) INTEGER array, dimension(N)
+           This will contain the permutation used to place deflated
+           values of D at the end of the array. On output IDXP(2:K)
+           points to the nondeflated D-values and IDXP(K+1:N)
+           points to the deflated singular values.
+
+    IDX    (workspace) INTEGER array, dimension(N)
+           This will contain the permutation used to sort the contents of
+           D into ascending order.
+
+    IDXC   (output) INTEGER array, dimension(N)
+           This will contain the permutation used to arrange the columns
+           of the deflated U matrix into three groups:  the first group
+           contains non-zero entries only at and above NL, the second
+           contains non-zero entries only below NL+2, and the third is
+           dense.
+
+    COLTYP (workspace/output) INTEGER array, dimension(N)
+           As workspace, this will contain a label which will indicate
+           which of the following types a column in the U2 matrix or a
+           row in the VT2 matrix is:
+           1 : non-zero in the upper half only
+           2 : non-zero in the lower half only
+           3 : dense
+           4 : deflated
+
+           On exit, it is an array of dimension 4, with COLTYP(I) being
+           the dimension of the I-th type columns.
+
+    IDXQ   (input) INTEGER array, dimension(N)
+           This contains the permutation which separately sorts the two
+           sub-problems in D into ascending order.  Note that entries in
+           the first hlaf of this permutation must first be moved one
+           position backward; and entries in the second half
+           must first have NL+1 added to their values.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --z__;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --dsigma;
+    u2_dim1 = *ldu2;
+    u2_offset = 1 + u2_dim1;
+    u2 -= u2_offset;
+    vt2_dim1 = *ldvt2;
+    vt2_offset = 1 + vt2_dim1;
+    vt2 -= vt2_offset;
+    --idxp;
+    --idx;
+    --idxc;
+    --idxq;
+    --coltyp;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*nl < 1) {
+	*info = -1;
+    } else if (*nr < 1) {
+	*info = -2;
+    } else if (*sqre != 1 && *sqre != 0) {
+	*info = -3;
+    }
+
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+
+    if (*ldu < n) {
+	*info = -10;
+    } else if (*ldvt < m) {
+	*info = -12;
+    } else if (*ldu2 < n) {
+	*info = -15;
+    } else if (*ldvt2 < m) {
+	*info = -17;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD2", &i__1);
+	return 0;
+    }
+
+    nlp1 = *nl + 1;
+    nlp2 = *nl + 2;
+
+/*
+       Generate the first part of the vector Z; and move the singular
+       values in the first part of D one position backward.
+*/
+
+    z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1];
+    z__[1] = z1;
+    for (i__ = *nl; i__ >= 1; --i__) {
+	z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1];
+	d__[i__ + 1] = d__[i__];
+	idxq[i__ + 1] = idxq[i__] + 1;
+/* L10: */
+    }
+
+/*     Generate the second part of the vector Z. */
+
+    i__1 = m;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1];
+/* L20: */
+    }
+
+/*     Initialize some reference arrays. */
+
+    i__1 = nlp1;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	coltyp[i__] = 1;
+/* L30: */
+    }
+    i__1 = n;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	coltyp[i__] = 2;
+/* L40: */
+    }
+
+/*     Sort the singular values into increasing order */
+
+    i__1 = n;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	idxq[i__] += nlp1;
+/* L50: */
+    }
+
+/*
+       DSIGMA, IDXC, IDXC, and the first column of U2
+       are used as storage space.
+*/
+
+    i__1 = n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	dsigma[i__] = d__[idxq[i__]];
+	u2[i__ + u2_dim1] = z__[idxq[i__]];
+	idxc[i__] = coltyp[idxq[i__]];
+/* L60: */
+    }
+
+    dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
+
+    i__1 = n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	idxi = idx[i__] + 1;
+	d__[i__] = dsigma[idxi];
+	z__[i__] = u2[idxi + u2_dim1];
+	coltyp[i__] = idxc[idxi];
+/* L70: */
+    }
+
+/*     Calculate the allowable deflation tolerance */
+
+    eps = EPSILON;
+/* Computing MAX */
+    d__1 = abs(*alpha), d__2 = abs(*beta);
+    tol = max(d__1,d__2);
+/* Computing MAX */
+    d__2 = (d__1 = d__[n], abs(d__1));
+    tol = eps * 8. * max(d__2,tol);
+
+/*
+       There are 2 kinds of deflation -- first a value in the z-vector
+       is small, second two (or more) singular values are very close
+       together (their difference is small).
+
+       If the value in the z-vector is small, we simply permute the
+       array so that the corresponding singular value is moved to the
+       end.
+
+       If two values in the D-vector are close, we perform a two-sided
+       rotation designed to make one of the corresponding z-vector
+       entries zero, and then permute the array so that the deflated
+       singular value is moved to the end.
+
+       If there are multiple singular values then the problem deflates.
+       Here the number of equal singular values are found.  As each equal
+       singular value is found, an elementary reflector is computed to
+       rotate the corresponding singular subspace so that the
+       corresponding components of Z are zero in this new basis.
+*/
+
+    *k = 1;
+    k2 = n + 1;
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	if ((d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    idxp[k2] = j;
+	    coltyp[j] = 4;
+	    if (j == n) {
+		goto L120;
+	    }
+	} else {
+	    jprev = j;
+	    goto L90;
+	}
+/* L80: */
+    }
+L90:
+    j = jprev;
+L100:
+    ++j;
+    if (j > n) {
+	goto L110;
+    }
+    if ((d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	idxp[k2] = j;
+	coltyp[j] = 4;
+    } else {
+
+/*        Check if singular values are close enough to allow deflation. */
+
+	if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    s = z__[jprev];
+	    c__ = z__[j];
+
+/*
+             Find sqrt(a**2+b**2) without overflow or
+             destructive underflow.
+*/
+
+	    tau = dlapy2_(&c__, &s);
+	    c__ /= tau;
+	    s = -s / tau;
+	    z__[j] = tau;
+	    z__[jprev] = 0.;
+
+/*
+             Apply back the Givens rotation to the left and right
+             singular vector matrices.
+*/
+
+	    idxjp = idxq[idx[jprev] + 1];
+	    idxj = idxq[idx[j] + 1];
+	    if (idxjp <= nlp1) {
+		--idxjp;
+	    }
+	    if (idxj <= nlp1) {
+		--idxj;
+	    }
+	    drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &
+		    c__1, &c__, &s);
+	    drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &
+		    c__, &s);
+	    if (coltyp[j] != coltyp[jprev]) {
+		coltyp[j] = 3;
+	    }
+	    coltyp[jprev] = 4;
+	    --k2;
+	    idxp[k2] = jprev;
+	    jprev = j;
+	} else {
+	    ++(*k);
+	    u2[*k + u2_dim1] = z__[jprev];
+	    dsigma[*k] = d__[jprev];
+	    idxp[*k] = jprev;
+	    jprev = j;
+	}
+    }
+    goto L100;
+L110:
+
+/*     Record the last singular value. */
+
+    ++(*k);
+    u2[*k + u2_dim1] = z__[jprev];
+    dsigma[*k] = d__[jprev];
+    idxp[*k] = jprev;
+
+L120:
+
+/*
+       Count up the total number of the various types of columns, then
+       form a permutation which positions the four column types into
+       four groups of uniform structure (although one or more of these
+       groups may be empty).
+*/
+
+    for (j = 1; j <= 4; ++j) {
+	ctot[j - 1] = 0;
+/* L130: */
+    }
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	ct = coltyp[j];
+	++ctot[ct - 1];
+/* L140: */
+    }
+
+/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
+
+    psm[0] = 2;
+    psm[1] = ctot[0] + 2;
+    psm[2] = psm[1] + ctot[1];
+    psm[3] = psm[2] + ctot[2];
+
+/*
+       Fill out the IDXC array so that the permutation which it induces
+       will place all type-1 columns first, all type-2 columns next,
+       then all type-3's, and finally all type-4's, starting from the
+       second column. This applies similarly to the rows of VT.
+*/
+
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	jp = idxp[j];
+	ct = coltyp[jp];
+	idxc[psm[ct - 1]] = j;
+	++psm[ct - 1];
+/* L150: */
+    }
+
+/*
+       Sort the singular values and corresponding singular vectors into
+       DSIGMA, U2, and VT2 respectively.  The singular values/vectors
+       which were not deflated go into the first K slots of DSIGMA, U2,
+       and VT2 respectively, while those which were deflated go into the
+       last N - K slots, except that the first column/row will be treated
+       separately.
+*/
+
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	jp = idxp[j];
+	dsigma[j] = d__[jp];
+	idxj = idxq[idx[idxp[idxc[j]]] + 1];
+	if (idxj <= nlp1) {
+	    --idxj;
+	}
+	dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
+	dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
+/* L160: */
+    }
+
+/*     Determine DSIGMA(1), DSIGMA(2) and Z(1) */
+
+    dsigma[1] = 0.;
+    hlftol = tol / 2.;
+    if (abs(dsigma[2]) <= hlftol) {
+	dsigma[2] = hlftol;
+    }
+    if (m > n) {
+	z__[1] = dlapy2_(&z1, &z__[m]);
+	if (z__[1] <= tol) {
+	    c__ = 1.;
+	    s = 0.;
+	    z__[1] = tol;
+	} else {
+	    c__ = z1 / z__[1];
+	    s = z__[m] / z__[1];
+	}
+    } else {
+	if (abs(z1) <= tol) {
+	    z__[1] = tol;
+	} else {
+	    z__[1] = z1;
+	}
+    }
+
+/*     Move the rest of the updating row to Z. */
+
+    i__1 = *k - 1;
+    dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);
+
+/*
+       Determine the first column of U2, the first row of VT2 and the
+       last row of VT.
+*/
+
+    dlaset_("A", &n, &c__1, &c_b2467, &c_b2467, &u2[u2_offset], ldu2);
+    u2[nlp1 + u2_dim1] = 1.;
+    if (m > n) {
+	i__1 = nlp1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1];
+	    vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1];
+/* L170: */
+	}
+	i__1 = m;
+	for (i__ = nlp2; i__ <= i__1; ++i__) {
+	    vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1];
+	    vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1];
+/* L180: */
+	}
+    } else {
+	dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
+    }
+    if (m > n) {
+	dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
+    }
+
+/*
+       The deflated singular values and their corresponding vectors go
+       into the back of D, U, and V respectively.
+*/
+
+    if (n > *k) {
+	i__1 = n - *k;
+	dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	i__1 = n - *k;
+	dlacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1)
+		 * u_dim1 + 1], ldu);
+	i__1 = n - *k;
+	dlacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 +
+		vt_dim1], ldvt);
+    }
+
+/*     Copy CTOT into COLTYP for referencing in DLASD3. */
+
+    for (j = 1; j <= 4; ++j) {
+	coltyp[j] = ctot[j - 1];
+/* L190: */
+    }
+
+    return 0;
+
+/*     End of DLASD2 */
+
+} /* dlasd2_ */
+
+/* Subroutine */ int dlasd3_(integer *nl, integer *nr, integer *sqre, integer
+	*k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma,
+	doublereal *u, integer *ldu, doublereal *u2, integer *ldu2,
+	doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2,
+	integer *idxc, integer *ctot, doublereal *z__, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1,
+	    vt_offset, vt2_dim1, vt2_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer i__, j, m, n, jc;
+    static doublereal rho;
+    static integer nlp1, nlp2, nrp1;
+    static doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer ctemp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer ktemp;
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *), dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlacpy_(char *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, integer *),
+	    xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASD3 finds all the square roots of the roots of the secular
+    equation, as defined by the values in D and Z.  It makes the
+    appropriate calls to DLASD4 and then updates the singular
+    vectors by matrix multiplication.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    DLASD3 is called from DLASD1.
+
+    Arguments
+    =========
+
+    NL     (input) INTEGER
+           The row dimension of the upper block.  NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block.  NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has N = NL + NR + 1 rows and
+           M = N + SQRE >= N columns.
+
+    K      (input) INTEGER
+           The size of the secular equation, 1 =< K = < N.
+
+    D      (output) DOUBLE PRECISION array, dimension(K)
+           On exit the square roots of the roots of the secular equation,
+           in ascending order.
+
+    Q      (workspace) DOUBLE PRECISION array,
+                       dimension at least (LDQ,K).
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= K.
+
+    DSIGMA (input) DOUBLE PRECISION array, dimension(K)
+           The first K elements of this array contain the old roots
+           of the deflated updating problem.  These are the poles
+           of the secular equation.
+
+    U      (input) DOUBLE PRECISION array, dimension (LDU, N)
+           The last N - K columns of this matrix contain the deflated
+           left singular vectors.
+
+    LDU    (input) INTEGER
+           The leading dimension of the array U.  LDU >= N.
+
+    U2     (input) DOUBLE PRECISION array, dimension (LDU2, N)
+           The first K columns of this matrix contain the non-deflated
+           left singular vectors for the split problem.
+
+    LDU2   (input) INTEGER
+           The leading dimension of the array U2.  LDU2 >= N.
+
+    VT     (input) DOUBLE PRECISION array, dimension (LDVT, M)
+           The last M - K columns of VT' contain the deflated
+           right singular vectors.
+
+    LDVT   (input) INTEGER
+           The leading dimension of the array VT.  LDVT >= N.
+
+    VT2    (input) DOUBLE PRECISION array, dimension (LDVT2, N)
+           The first K columns of VT2' contain the non-deflated
+           right singular vectors for the split problem.
+
+    LDVT2  (input) INTEGER
+           The leading dimension of the array VT2.  LDVT2 >= N.
+
+    IDXC   (input) INTEGER array, dimension ( N )
+           The permutation used to arrange the columns of U (and rows of
+           VT) into three groups:  the first group contains non-zero
+           entries only at and above (or before) NL +1; the second
+           contains non-zero entries only at and below (or after) NL+2;
+           and the third is dense. The first column of U and the row of
+           VT are treated separately, however.
+
+           The rows of the singular vectors found by DLASD4
+           must be likewise permuted before the matrix multiplies can
+           take place.
+
+    CTOT   (input) INTEGER array, dimension ( 4 )
+           A count of the total number of the various types of columns
+           in U (or rows in VT), as described in IDXC. The fourth column
+           type is any column which has been deflated.
+
+    Z      (input) DOUBLE PRECISION array, dimension (K)
+           The first K elements of this array contain the components
+           of the deflation-adjusted updating row vector.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit.
+           < 0:  if INFO = -i, the i-th argument had an illegal value.
+           > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dsigma;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    u2_dim1 = *ldu2;
+    u2_offset = 1 + u2_dim1;
+    u2 -= u2_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    vt2_dim1 = *ldvt2;
+    vt2_offset = 1 + vt2_dim1;
+    vt2 -= vt2_offset;
+    --idxc;
+    --ctot;
+    --z__;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*nl < 1) {
+	*info = -1;
+    } else if (*nr < 1) {
+	*info = -2;
+    } else if (*sqre != 1 && *sqre != 0) {
+	*info = -3;
+    }
+
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+    nlp1 = *nl + 1;
+    nlp2 = *nl + 2;
+
+    if (*k < 1 || *k > n) {
+	*info = -4;
+    } else if (*ldq < *k) {
+	*info = -7;
+    } else if (*ldu < n) {
+	*info = -10;
+    } else if (*ldu2 < n) {
+	*info = -12;
+    } else if (*ldvt < m) {
+	*info = -14;
+    } else if (*ldvt2 < m) {
+	*info = -16;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD3", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 1) {
+	d__[1] = abs(z__[1]);
+	dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
+	if (z__[1] > 0.) {
+	    dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
+	} else {
+	    i__1 = n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		u[i__ + u_dim1] = -u2[i__ + u2_dim1];
+/* L10: */
+	    }
+	}
+	return 0;
+    }
+
+/*
+       Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DSIGMA(I) if it is 1; this makes the subsequent
+       subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DSIGMA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DSIGMA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
+/* L20: */
+    }
+
+/*     Keep a copy of Z. */
+
+    dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);
+
+/*     Normalize Z. */
+
+    rho = dnrm2_(k, &z__[1], &c__1);
+    dlascl_("G", &c__0, &c__0, &rho, &c_b2453, k, &c__1, &z__[1], k, info);
+    rho *= rho;
+
+/*     Find the new singular values. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j],
+		 &vt[j * vt_dim1 + 1], info);
+
+/*        If the zero finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    return 0;
+	}
+/* L30: */
+    }
+
+/*     Compute updated Z. */
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1];
+	i__2 = i__ - 1;
+	for (j = 1; j <= i__2; ++j) {
+	    z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
+		    i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]);
+/* L40: */
+	}
+	i__2 = *k - 1;
+	for (j = i__; j <= i__2; ++j) {
+	    z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
+		    i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
+/* L50: */
+	}
+	d__2 = sqrt((d__1 = z__[i__], abs(d__1)));
+	z__[i__] = d_sign(&d__2, &q[i__ + q_dim1]);
+/* L60: */
+    }
+
+/*
+       Compute left singular vectors of the modified diagonal matrix,
+       and store related information for the right singular vectors.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ *
+		vt_dim1 + 1];
+	u[i__ * u_dim1 + 1] = -1.;
+	i__2 = *k;
+	for (j = 2; j <= i__2; ++j) {
+	    vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__
+		    * vt_dim1];
+	    u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1];
+/* L70: */
+	}
+	temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1);
+	q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp;
+	i__2 = *k;
+	for (j = 2; j <= i__2; ++j) {
+	    jc = idxc[j];
+	    q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp;
+/* L80: */
+	}
+/* L90: */
+    }
+
+/*     Update the left singular vector matrix. */
+
+    if (*k == 2) {
+	dgemm_("N", "N", &n, k, k, &c_b2453, &u2[u2_offset], ldu2, &q[
+		q_offset], ldq, &c_b2467, &u[u_offset], ldu);
+	goto L100;
+    }
+    if (ctot[1] > 0) {
+	dgemm_("N", "N", nl, k, &ctot[1], &c_b2453, &u2[(u2_dim1 << 1) + 1],
+		ldu2, &q[q_dim1 + 2], ldq, &c_b2467, &u[u_dim1 + 1], ldu);
+	if (ctot[3] > 0) {
+	    ktemp = ctot[1] + 2 + ctot[2];
+	    dgemm_("N", "N", nl, k, &ctot[3], &c_b2453, &u2[ktemp * u2_dim1 +
+		    1], ldu2, &q[ktemp + q_dim1], ldq, &c_b2453, &u[u_dim1 +
+		    1], ldu);
+	}
+    } else if (ctot[3] > 0) {
+	ktemp = ctot[1] + 2 + ctot[2];
+	dgemm_("N", "N", nl, k, &ctot[3], &c_b2453, &u2[ktemp * u2_dim1 + 1],
+		ldu2, &q[ktemp + q_dim1], ldq, &c_b2467, &u[u_dim1 + 1], ldu);
+    } else {
+	dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
+    }
+    dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
+    ktemp = ctot[1] + 2;
+    ctemp = ctot[2] + ctot[3];
+    dgemm_("N", "N", nr, k, &ctemp, &c_b2453, &u2[nlp2 + ktemp * u2_dim1],
+	    ldu2, &q[ktemp + q_dim1], ldq, &c_b2467, &u[nlp2 + u_dim1], ldu);
+
+/*     Generate the right singular vectors. */
+
+L100:
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1);
+	q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp;
+	i__2 = *k;
+	for (j = 2; j <= i__2; ++j) {
+	    jc = idxc[j];
+	    q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp;
+/* L110: */
+	}
+/* L120: */
+    }
+
+/*     Update the right singular vector matrix. */
+
+    if (*k == 2) {
+	dgemm_("N", "N", k, &m, k, &c_b2453, &q[q_offset], ldq, &vt2[
+		vt2_offset], ldvt2, &c_b2467, &vt[vt_offset], ldvt);
+	return 0;
+    }
+    ktemp = ctot[1] + 1;
+    dgemm_("N", "N", k, &nlp1, &ktemp, &c_b2453, &q[q_dim1 + 1], ldq, &vt2[
+	    vt2_dim1 + 1], ldvt2, &c_b2467, &vt[vt_dim1 + 1], ldvt);
+    ktemp = ctot[1] + 2 + ctot[2];
+    if (ktemp <= *ldvt2) {
+	dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b2453, &q[ktemp * q_dim1 + 1],
+		 ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b2453, &vt[vt_dim1 +
+		1], ldvt);
+    }
+
+    ktemp = ctot[1] + 1;
+    nrp1 = *nr + *sqre;
+    if (ktemp > 1) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    q[i__ + ktemp * q_dim1] = q[i__ + q_dim1];
+/* L130: */
+	}
+	i__1 = m;
+	for (i__ = nlp2; i__ <= i__1; ++i__) {
+	    vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1];
+/* L140: */
+	}
+    }
+    ctemp = ctot[2] + 1 + ctot[3];
+    dgemm_("N", "N", k, &nrp1, &ctemp, &c_b2453, &q[ktemp * q_dim1 + 1], ldq,
+	    &vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b2467, &vt[nlp2 *
+	    vt_dim1 + 1], ldvt);
+
+    return 0;
+
+/*     End of DLASD3 */
+
+} /* dlasd3_ */
+
+/* Subroutine */ int dlasd4_(integer *n, integer *i__, doublereal *d__,
+	doublereal *z__, doublereal *delta, doublereal *rho, doublereal *
+	sigma, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal a, b, c__;
+    static integer j;
+    static doublereal w, dd[3];
+    static integer ii;
+    static doublereal dw, zz[3];
+    static integer ip1;
+    static doublereal eta, phi, eps, tau, psi;
+    static integer iim1, iip1;
+    static doublereal dphi, dpsi;
+    static integer iter;
+    static doublereal temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq,
+	    dtiip;
+    static integer niter;
+    static doublereal dtisq;
+    static logical swtch;
+    static doublereal dtnsq;
+    extern /* Subroutine */ int dlaed6_(integer *, logical *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *)
+	    , dlasd5_(integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
+    static doublereal delsq2, dtnsq1;
+    static logical swtch3;
+
+    static logical orgati;
+    static doublereal erretm, dtipsq, rhoinv;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    This subroutine computes the square root of the I-th updated
+    eigenvalue of a positive symmetric rank-one modification to
+    a positive diagonal matrix whose entries are given as the squares
+    of the corresponding entries in the array d, and that
+
+           0 <= D(i) < D(j)  for  i < j
+
+    and that RHO > 0. This is arranged by the calling routine, and is
+    no loss in generality.  The rank-one modified system is thus
+
+           diag( D ) * diag( D ) +  RHO *  Z * Z_transpose.
+
+    where we assume the Euclidean norm of Z is 1.
+
+    The method consists of approximating the rational functions in the
+    secular equation by simpler interpolating rational functions.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The length of all arrays.
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  1 <= I <= N.
+
+    D      (input) DOUBLE PRECISION array, dimension ( N )
+           The original eigenvalues.  It is assumed that they are in
+           order, 0 <= D(I) < D(J)  for I < J.
+
+    Z      (input) DOUBLE PRECISION array, dimension ( N )
+           The components of the updating vector.
+
+    DELTA  (output) DOUBLE PRECISION array, dimension ( N )
+           If N .ne. 1, DELTA contains (D(j) - sigma_I) in its  j-th
+           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
+           contains the information necessary to construct the
+           (singular) eigenvectors.
+
+    RHO    (input) DOUBLE PRECISION
+           The scalar in the symmetric updating formula.
+
+    SIGMA  (output) DOUBLE PRECISION
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension ( N )
+           If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th
+           component.  If N = 1, then WORK( 1 ) = 1.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit
+           > 0:  if INFO = 1, the updating process failed.
+
+    Internal Parameters
+    ===================
+
+    Logical variable ORGATI (origin-at-i?) is used for distinguishing
+    whether D(i) or D(i+1) is treated as the origin.
+
+              ORGATI = .true.    origin at i
+              ORGATI = .false.   origin at i+1
+
+    Logical variable SWTCH3 (switch-for-3-poles?) is for noting
+    if we are working with THREE poles!
+
+    MAXIT is the maximum number of iterations allowed for each
+    eigenvalue.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Since this routine is called in an inner loop, we do no argument
+       checking.
+
+       Quick return for N=1 and 2.
+*/
+
+    /* Parameter adjustments */
+    --work;
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    if (*n == 1) {
+
+/*        Presumably, I=1 upon entry */
+
+	*sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]);
+	delta[1] = 1.;
+	work[1] = 1.;
+	return 0;
+    }
+    if (*n == 2) {
+	dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]);
+	return 0;
+    }
+
+/*     Compute machine epsilon */
+
+    eps = EPSILON;
+    rhoinv = 1. / *rho;
+
+/*     The case I = N */
+
+    if (*i__ == *n) {
+
+/*        Initialize some basic variables */
+
+	ii = *n - 1;
+	niter = 1;
+
+/*        Calculate initial guess */
+
+	temp = *rho / 2.;
+
+/*
+          If ||Z||_2 is not one, then TEMP should be set to
+          RHO * ||Z||_2^2 / TWO
+*/
+
+	temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp));
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    work[j] = d__[j] + d__[*n] + temp1;
+	    delta[j] = d__[j] - d__[*n] - temp1;
+/* L10: */
+	}
+
+	psi = 0.;
+	i__1 = *n - 2;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / (delta[j] * work[j]);
+/* L20: */
+	}
+
+	c__ = rhoinv + psi;
+	w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[*
+		n] / (delta[*n] * work[*n]);
+
+	if (w <= 0.) {
+	    temp1 = sqrt(d__[*n] * d__[*n] + *rho);
+	    temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[*
+		    n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] *
+		    z__[*n] / *rho;
+
+/*
+             The following TAU is to approximate
+             SIGMA_n^2 - D( N )*D( N )
+*/
+
+	    if (c__ <= temp) {
+		tau = *rho;
+	    } else {
+		delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
+		a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*
+			n];
+		b = z__[*n] * z__[*n] * delsq;
+		if (a < 0.) {
+		    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
+		} else {
+		    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
+		}
+	    }
+
+/*
+             It can be proved that
+                 D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO
+*/
+
+	} else {
+	    delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
+	    a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
+	    b = z__[*n] * z__[*n] * delsq;
+
+/*
+             The following TAU is to approximate
+             SIGMA_n^2 - D( N )*D( N )
+*/
+
+	    if (a < 0.) {
+		tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
+	    } else {
+		tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
+	    }
+
+/*
+             It can be proved that
+             D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2
+*/
+
+	}
+
+/*        The following ETA is to approximate SIGMA_n - D( N ) */
+
+	eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau));
+
+	*sigma = d__[*n] + eta;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - eta;
+	    work[j] = d__[j] + d__[*i__] + eta;
+/* L30: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / (delta[j] * work[j]);
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L40: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / (delta[*n] * work[*n]);
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
+		+ dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Test for convergence */
+
+	if (abs(w) <= eps * erretm) {
+	    goto L240;
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	dtnsq1 = work[*n - 1] * delta[*n - 1];
+	dtnsq = work[*n] * delta[*n];
+	c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
+	a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi);
+	b = dtnsq * dtnsq1 * w;
+	if (c__ < 0.) {
+	    c__ = abs(c__);
+	}
+	if (c__ == 0.) {
+	    eta = *rho - *sigma * *sigma;
+	} else if (a >= 0.) {
+	    eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
+		    * 2.);
+	} else {
+	    eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
+		    );
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta > 0.) {
+	    eta = -w / (dpsi + dphi);
+	}
+	temp = eta - dtnsq;
+	if (temp > *rho) {
+	    eta = *rho + dtnsq;
+	}
+
+	tau += eta;
+	eta /= *sigma + sqrt(eta + *sigma * *sigma);
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] -= eta;
+	    work[j] += eta;
+/* L50: */
+	}
+
+	*sigma += eta;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L60: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / (work[*n] * delta[*n]);
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
+		+ dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Main loop to update the values of the array   DELTA */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 20; ++niter) {
+
+/*           Test for convergence */
+
+	    if (abs(w) <= eps * erretm) {
+		goto L240;
+	    }
+
+/*           Calculate the new step */
+
+	    dtnsq1 = work[*n - 1] * delta[*n - 1];
+	    dtnsq = work[*n] * delta[*n];
+	    c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
+	    a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi);
+	    b = dtnsq1 * dtnsq * w;
+	    if (a >= 0.) {
+		eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
+	    } else {
+		eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta > 0.) {
+		eta = -w / (dpsi + dphi);
+	    }
+	    temp = eta - dtnsq;
+	    if (temp <= 0.) {
+		eta /= 2.;
+	    }
+
+	    tau += eta;
+	    eta /= *sigma + sqrt(eta + *sigma * *sigma);
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] -= eta;
+		work[j] += eta;
+/* L70: */
+	    }
+
+	    *sigma += eta;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.;
+	    psi = 0.;
+	    erretm = 0.;
+	    i__1 = ii;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / (work[j] * delta[j]);
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L80: */
+	    }
+	    erretm = abs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    temp = z__[*n] / (work[*n] * delta[*n]);
+	    phi = z__[*n] * temp;
+	    dphi = temp * temp;
+	    erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
+		    dpsi + dphi);
+
+	    w = rhoinv + phi + psi;
+/* L90: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+	goto L240;
+
+/*        End for the case I = N */
+
+    } else {
+
+/*        The case for I < N */
+
+	niter = 1;
+	ip1 = *i__ + 1;
+
+/*        Calculate initial guess */
+
+	delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]);
+	delsq2 = delsq / 2.;
+	temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2));
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    work[j] = d__[j] + d__[*i__] + temp;
+	    delta[j] = d__[j] - d__[*i__] - temp;
+/* L100: */
+	}
+
+	psi = 0.;
+	i__1 = *i__ - 1;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / (work[j] * delta[j]);
+/* L110: */
+	}
+
+	phi = 0.;
+	i__1 = *i__ + 2;
+	for (j = *n; j >= i__1; --j) {
+	    phi += z__[j] * z__[j] / (work[j] * delta[j]);
+/* L120: */
+	}
+	c__ = rhoinv + psi + phi;
+	w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[
+		ip1] * z__[ip1] / (work[ip1] * delta[ip1]);
+
+	if (w > 0.) {
+
+/*
+             d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
+
+             We choose d(i) as origin.
+*/
+
+	    orgati = TRUE_;
+	    sg2lb = 0.;
+	    sg2ub = delsq2;
+	    a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
+	    b = z__[*i__] * z__[*i__] * delsq;
+	    if (a > 0.) {
+		tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
+	    } else {
+		tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
+	    }
+
+/*
+             TAU now is an estimation of SIGMA^2 - D( I )^2. The
+             following, however, is the corresponding estimation of
+             SIGMA - D( I ).
+*/
+
+	    eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau));
+	} else {
+
+/*
+             (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2
+
+             We choose d(i+1) as origin.
+*/
+
+	    orgati = FALSE_;
+	    sg2lb = -delsq2;
+	    sg2ub = 0.;
+	    a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
+	    b = z__[ip1] * z__[ip1] * delsq;
+	    if (a < 0.) {
+		tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
+			d__1))));
+	    } else {
+		tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
+			(c__ * 2.);
+	    }
+
+/*
+             TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The
+             following, however, is the corresponding estimation of
+             SIGMA - D( IP1 ).
+*/
+
+	    eta = tau / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau,
+		    abs(d__1))));
+	}
+
+	if (orgati) {
+	    ii = *i__;
+	    *sigma = d__[*i__] + eta;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		work[j] = d__[j] + d__[*i__] + eta;
+		delta[j] = d__[j] - d__[*i__] - eta;
+/* L130: */
+	    }
+	} else {
+	    ii = *i__ + 1;
+	    *sigma = d__[ip1] + eta;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		work[j] = d__[j] + d__[ip1] + eta;
+		delta[j] = d__[j] - d__[ip1] - eta;
+/* L140: */
+	    }
+	}
+	iim1 = ii - 1;
+	iip1 = ii + 1;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L150: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.;
+	phi = 0.;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L160: */
+	}
+
+	w = rhoinv + phi + psi;
+
+/*
+          W is the value of the secular function with
+          its ii-th element removed.
+*/
+
+	swtch3 = FALSE_;
+	if (orgati) {
+	    if (w < 0.) {
+		swtch3 = TRUE_;
+	    }
+	} else {
+	    if (w > 0.) {
+		swtch3 = TRUE_;
+	    }
+	}
+	if (ii == 1 || ii == *n) {
+	    swtch3 = FALSE_;
+	}
+
+	temp = z__[ii] / (work[ii] * delta[ii]);
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w += temp;
+	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
+		abs(tau) * dw;
+
+/*        Test for convergence */
+
+	if (abs(w) <= eps * erretm) {
+	    goto L240;
+	}
+
+	if (w <= 0.) {
+	    sg2lb = max(sg2lb,tau);
+	} else {
+	    sg2ub = min(sg2ub,tau);
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	if (! swtch3) {
+	    dtipsq = work[ip1] * delta[ip1];
+	    dtisq = work[*i__] * delta[*i__];
+	    if (orgati) {
+/* Computing 2nd power */
+		d__1 = z__[*i__] / dtisq;
+		c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
+	    } else {
+/* Computing 2nd power */
+		d__1 = z__[ip1] / dtipsq;
+		c__ = w - dtisq * dw - delsq * (d__1 * d__1);
+	    }
+	    a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
+	    b = dtipsq * dtisq * w;
+	    if (c__ == 0.) {
+		if (a == 0.) {
+		    if (orgati) {
+			a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi +
+				dphi);
+		    } else {
+			a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi +
+				dphi);
+		    }
+		}
+		eta = b / a;
+	    } else if (a <= 0.) {
+		eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
+	    } else {
+		eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
+	    }
+	} else {
+
+/*           Interpolation using THREE most relevant poles */
+
+	    dtiim = work[iim1] * delta[iim1];
+	    dtiip = work[iip1] * delta[iip1];
+	    temp = rhoinv + psi + phi;
+	    if (orgati) {
+		temp1 = z__[iim1] / dtiim;
+		temp1 *= temp1;
+		c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) *
+			 (d__[iim1] + d__[iip1]) * temp1;
+		zz[0] = z__[iim1] * z__[iim1];
+		if (dpsi < temp1) {
+		    zz[2] = dtiip * dtiip * dphi;
+		} else {
+		    zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi);
+		}
+	    } else {
+		temp1 = z__[iip1] / dtiip;
+		temp1 *= temp1;
+		c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) *
+			 (d__[iim1] + d__[iip1]) * temp1;
+		if (dphi < temp1) {
+		    zz[0] = dtiim * dtiim * dpsi;
+		} else {
+		    zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1));
+		}
+		zz[2] = z__[iip1] * z__[iip1];
+	    }
+	    zz[1] = z__[ii] * z__[ii];
+	    dd[0] = dtiim;
+	    dd[1] = delta[ii] * work[ii];
+	    dd[2] = dtiip;
+	    dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
+	    if (*info != 0) {
+		goto L240;
+	    }
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta >= 0.) {
+	    eta = -w / dw;
+	}
+	if (orgati) {
+	    temp1 = work[*i__] * delta[*i__];
+	    temp = eta - temp1;
+	} else {
+	    temp1 = work[ip1] * delta[ip1];
+	    temp = eta - temp1;
+	}
+	if (temp > sg2ub || temp < sg2lb) {
+	    if (w < 0.) {
+		eta = (sg2ub - tau) / 2.;
+	    } else {
+		eta = (sg2lb - tau) / 2.;
+	    }
+	}
+
+	tau += eta;
+	eta /= *sigma + sqrt(*sigma * *sigma + eta);
+
+	prew = w;
+
+	*sigma += eta;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    work[j] += eta;
+	    delta[j] -= eta;
+/* L170: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L180: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.;
+	phi = 0.;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L190: */
+	}
+
+	temp = z__[ii] / (work[ii] * delta[ii]);
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w = rhoinv + phi + psi + temp;
+	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
+		abs(tau) * dw;
+
+	if (w <= 0.) {
+	    sg2lb = max(sg2lb,tau);
+	} else {
+	    sg2ub = min(sg2ub,tau);
+	}
+
+	swtch = FALSE_;
+	if (orgati) {
+	    if (-w > abs(prew) / 10.) {
+		swtch = TRUE_;
+	    }
+	} else {
+	    if (w > abs(prew) / 10.) {
+		swtch = TRUE_;
+	    }
+	}
+
+/*        Main loop to update the values of the array   DELTA and WORK */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 20; ++niter) {
+
+/*           Test for convergence */
+
+	    if (abs(w) <= eps * erretm) {
+		goto L240;
+	    }
+
+/*           Calculate the new step */
+
+	    if (! swtch3) {
+		dtipsq = work[ip1] * delta[ip1];
+		dtisq = work[*i__] * delta[*i__];
+		if (! swtch) {
+		    if (orgati) {
+/* Computing 2nd power */
+			d__1 = z__[*i__] / dtisq;
+			c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
+		    } else {
+/* Computing 2nd power */
+			d__1 = z__[ip1] / dtipsq;
+			c__ = w - dtisq * dw - delsq * (d__1 * d__1);
+		    }
+		} else {
+		    temp = z__[ii] / (work[ii] * delta[ii]);
+		    if (orgati) {
+			dpsi += temp * temp;
+		    } else {
+			dphi += temp * temp;
+		    }
+		    c__ = w - dtisq * dpsi - dtipsq * dphi;
+		}
+		a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
+		b = dtipsq * dtisq * w;
+		if (c__ == 0.) {
+		    if (a == 0.) {
+			if (! swtch) {
+			    if (orgati) {
+				a = z__[*i__] * z__[*i__] + dtipsq * dtipsq *
+					(dpsi + dphi);
+			    } else {
+				a = z__[ip1] * z__[ip1] + dtisq * dtisq * (
+					dpsi + dphi);
+			    }
+			} else {
+			    a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi;
+			}
+		    }
+		    eta = b / a;
+		} else if (a <= 0.) {
+		    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
+			     / (c__ * 2.);
+		} else {
+		    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
+			    abs(d__1))));
+		}
+	    } else {
+
+/*              Interpolation using THREE most relevant poles */
+
+		dtiim = work[iim1] * delta[iim1];
+		dtiip = work[iip1] * delta[iip1];
+		temp = rhoinv + psi + phi;
+		if (swtch) {
+		    c__ = temp - dtiim * dpsi - dtiip * dphi;
+		    zz[0] = dtiim * dtiim * dpsi;
+		    zz[2] = dtiip * dtiip * dphi;
+		} else {
+		    if (orgati) {
+			temp1 = z__[iim1] / dtiim;
+			temp1 *= temp1;
+			temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[
+				iip1]) * temp1;
+			c__ = temp - dtiip * (dpsi + dphi) - temp2;
+			zz[0] = z__[iim1] * z__[iim1];
+			if (dpsi < temp1) {
+			    zz[2] = dtiip * dtiip * dphi;
+			} else {
+			    zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi);
+			}
+		    } else {
+			temp1 = z__[iip1] / dtiip;
+			temp1 *= temp1;
+			temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[
+				iip1]) * temp1;
+			c__ = temp - dtiim * (dpsi + dphi) - temp2;
+			if (dphi < temp1) {
+			    zz[0] = dtiim * dtiim * dpsi;
+			} else {
+			    zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1));
+			}
+			zz[2] = z__[iip1] * z__[iip1];
+		    }
+		}
+		dd[0] = dtiim;
+		dd[1] = delta[ii] * work[ii];
+		dd[2] = dtiip;
+		dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
+		if (*info != 0) {
+		    goto L240;
+		}
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta >= 0.) {
+		eta = -w / dw;
+	    }
+	    if (orgati) {
+		temp1 = work[*i__] * delta[*i__];
+		temp = eta - temp1;
+	    } else {
+		temp1 = work[ip1] * delta[ip1];
+		temp = eta - temp1;
+	    }
+	    if (temp > sg2ub || temp < sg2lb) {
+		if (w < 0.) {
+		    eta = (sg2ub - tau) / 2.;
+		} else {
+		    eta = (sg2lb - tau) / 2.;
+		}
+	    }
+
+	    tau += eta;
+	    eta /= *sigma + sqrt(*sigma * *sigma + eta);
+
+	    *sigma += eta;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		work[j] += eta;
+		delta[j] -= eta;
+/* L200: */
+	    }
+
+	    prew = w;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.;
+	    psi = 0.;
+	    erretm = 0.;
+	    i__1 = iim1;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / (work[j] * delta[j]);
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L210: */
+	    }
+	    erretm = abs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    dphi = 0.;
+	    phi = 0.;
+	    i__1 = iip1;
+	    for (j = *n; j >= i__1; --j) {
+		temp = z__[j] / (work[j] * delta[j]);
+		phi += z__[j] * temp;
+		dphi += temp * temp;
+		erretm += phi;
+/* L220: */
+	    }
+
+	    temp = z__[ii] / (work[ii] * delta[ii]);
+	    dw = dpsi + dphi + temp * temp;
+	    temp = z__[ii] * temp;
+	    w = rhoinv + phi + psi + temp;
+	    erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
+		    + abs(tau) * dw;
+	    if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
+		swtch = ! swtch;
+	    }
+
+	    if (w <= 0.) {
+		sg2lb = max(sg2lb,tau);
+	    } else {
+		sg2ub = min(sg2ub,tau);
+	    }
+
+/* L230: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+
+    }
+
+L240:
+    return 0;
+
+/*     End of DLASD4 */
+
+} /* dlasd4_ */
+
+/* Subroutine */ int dlasd5_(integer *i__, doublereal *d__, doublereal *z__,
+	doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal *
+	work)
+{
+    /* System generated locals */
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal b, c__, w, del, tau, delsq;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    This subroutine computes the square root of the I-th eigenvalue
+    of a positive symmetric rank-one modification of a 2-by-2 diagonal
+    matrix
+
+               diag( D ) * diag( D ) +  RHO *  Z * transpose(Z) .
+
+    The diagonal entries in the array D are assumed to satisfy
+
+               0 <= D(i) < D(j)  for  i < j .
+
+    We also assume RHO > 0 and that the Euclidean norm of the vector
+    Z is one.
+
+    Arguments
+    =========
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  I = 1 or I = 2.
+
+    D      (input) DOUBLE PRECISION array, dimension ( 2 )
+           The original eigenvalues.  We assume 0 <= D(1) < D(2).
+
+    Z      (input) DOUBLE PRECISION array, dimension ( 2 )
+           The components of the updating vector.
+
+    DELTA  (output) DOUBLE PRECISION array, dimension ( 2 )
+           Contains (D(j) - lambda_I) in its  j-th component.
+           The vector DELTA contains the information necessary
+           to construct the eigenvectors.
+
+    RHO    (input) DOUBLE PRECISION
+           The scalar in the symmetric updating formula.
+
+    DSIGMA (output) DOUBLE PRECISION
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension ( 2 )
+           WORK contains (D(j) + sigma_I) in its  j-th component.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --work;
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    del = d__[2] - d__[1];
+    delsq = del * (d__[2] + d__[1]);
+    if (*i__ == 1) {
+	w = *rho * 4. * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.) - z__[1] *
+		z__[1] / (d__[1] * 3. + d__[2])) / del + 1.;
+	if (w > 0.) {
+	    b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[1] * z__[1] * delsq;
+
+/*
+             B > ZERO, always
+
+             The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 )
+*/
+
+	    tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
+
+/*           The following TAU is DSIGMA - D( 1 ) */
+
+	    tau /= d__[1] + sqrt(d__[1] * d__[1] + tau);
+	    *dsigma = d__[1] + tau;
+	    delta[1] = -tau;
+	    delta[2] = del - tau;
+	    work[1] = d__[1] * 2. + tau;
+	    work[2] = d__[1] + tau + d__[2];
+/*
+             DELTA( 1 ) = -Z( 1 ) / TAU
+             DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
+*/
+	} else {
+	    b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[2] * z__[2] * delsq;
+
+/*           The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
+
+	    if (b > 0.) {
+		tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
+	    } else {
+		tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
+	    }
+
+/*           The following TAU is DSIGMA - D( 2 ) */
+
+	    tau /= d__[2] + sqrt((d__1 = d__[2] * d__[2] + tau, abs(d__1)));
+	    *dsigma = d__[2] + tau;
+	    delta[1] = -(del + tau);
+	    delta[2] = -tau;
+	    work[1] = d__[1] + tau + d__[2];
+	    work[2] = d__[2] * 2. + tau;
+/*
+             DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+             DELTA( 2 ) = -Z( 2 ) / TAU
+*/
+	}
+/*
+          TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+          DELTA( 1 ) = DELTA( 1 ) / TEMP
+          DELTA( 2 ) = DELTA( 2 ) / TEMP
+*/
+    } else {
+
+/*        Now I=2 */
+
+	b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	c__ = *rho * z__[2] * z__[2] * delsq;
+
+/*        The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
+
+	if (b > 0.) {
+	    tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
+	} else {
+	    tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
+	}
+
+/*        The following TAU is DSIGMA - D( 2 ) */
+
+	tau /= d__[2] + sqrt(d__[2] * d__[2] + tau);
+	*dsigma = d__[2] + tau;
+	delta[1] = -(del + tau);
+	delta[2] = -tau;
+	work[1] = d__[1] + tau + d__[2];
+	work[2] = d__[2] * 2. + tau;
+/*
+          DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+          DELTA( 2 ) = -Z( 2 ) / TAU
+          TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+          DELTA( 1 ) = DELTA( 1 ) / TEMP
+          DELTA( 2 ) = DELTA( 2 ) / TEMP
+*/
+    }
+    return 0;
+
+/*     End of DLASD5 */
+
+} /* dlasd5_ */
+
+/* Subroutine */ int dlasd6_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl,
+	doublereal *alpha, doublereal *beta, integer *idxq, integer *perm,
+	integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum,
+	 integer *ldgnum, doublereal *poles, doublereal *difl, doublereal *
+	difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s,
+	doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset,
+	    poles_dim1, poles_offset, i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlasd7_(integer *, integer *, integer *,
+	     integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), dlasd8_(
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     doublereal *, integer *), dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlamrg_(integer *, integer *,
+	    doublereal *, integer *, integer *, integer *);
+    static integer isigma;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal orgnrm;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASD6 computes the SVD of an updated upper bidiagonal matrix B
+    obtained by merging two smaller ones by appending a row. This
+    routine is used only for the problem which requires all singular
+    values and optionally singular vector matrices in factored form.
+    B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
+    A related subroutine, DLASD1, handles the case in which all singular
+    values and singular vectors of the bidiagonal matrix are desired.
+
+    DLASD6 computes the SVD as follows:
+
+                  ( D1(in)  0    0     0 )
+      B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
+                  (   0     0   D2(in) 0 )
+
+        = U(out) * ( D(out) 0) * VT(out)
+
+    where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M
+    with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
+    elsewhere; and the entry b is empty if SQRE = 0.
+
+    The singular values of B can be computed using D1, D2, the first
+    components of all the right singular vectors of the lower block, and
+    the last components of all the right singular vectors of the upper
+    block. These components are stored and updated in VF and VL,
+    respectively, in DLASD6. Hence U and VT are not explicitly
+    referenced.
+
+    The singular values are stored in D. The algorithm consists of two
+    stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple singular values or if there is a zero
+          in the Z vector. For each such occurence the dimension of the
+          secular equation problem is reduced by one. This stage is
+          performed by the routine DLASD7.
+
+          The second stage consists of calculating the updated
+          singular values. This is done by finding the roots of the
+          secular equation via the routine DLASD4 (as called by DLASD8).
+          This routine also updates VF and VL and computes the distances
+          between the updated singular values and the old singular
+          values.
+
+    DLASD6 is called from DLASDA.
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether singular vectors are to be computed in
+           factored form:
+           = 0: Compute singular values only.
+           = 1: Compute singular vectors in factored form as well.
+
+    NL     (input) INTEGER
+           The row dimension of the upper block.  NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block.  NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has row dimension N = NL + NR + 1,
+           and column dimension M = N + SQRE.
+
+    D      (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ).
+           On entry D(1:NL,1:NL) contains the singular values of the
+           upper block, and D(NL+2:N) contains the singular values
+           of the lower block. On exit D(1:N) contains the singular
+           values of the modified matrix.
+
+    VF     (input/output) DOUBLE PRECISION array, dimension ( M )
+           On entry, VF(1:NL+1) contains the first components of all
+           right singular vectors of the upper block; and VF(NL+2:M)
+           contains the first components of all right singular vectors
+           of the lower block. On exit, VF contains the first components
+           of all right singular vectors of the bidiagonal matrix.
+
+    VL     (input/output) DOUBLE PRECISION array, dimension ( M )
+           On entry, VL(1:NL+1) contains the  last components of all
+           right singular vectors of the upper block; and VL(NL+2:M)
+           contains the last components of all right singular vectors of
+           the lower block. On exit, VL contains the last components of
+           all right singular vectors of the bidiagonal matrix.
+
+    ALPHA  (input) DOUBLE PRECISION
+           Contains the diagonal element associated with the added row.
+
+    BETA   (input) DOUBLE PRECISION
+           Contains the off-diagonal element associated with the added
+           row.
+
+    IDXQ   (output) INTEGER array, dimension ( N )
+           This contains the permutation which will reintegrate the
+           subproblem just solved back into sorted order, i.e.
+           D( IDXQ( I = 1, N ) ) will be in ascending order.
+
+    PERM   (output) INTEGER array, dimension ( N )
+           The permutations (from deflation and sorting) to be applied
+           to each block. Not referenced if ICOMPQ = 0.
+
+    GIVPTR (output) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem. Not referenced if ICOMPQ = 0.
+
+    GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation. Not referenced if ICOMPQ = 0.
+
+    LDGCOL (input) INTEGER
+           leading dimension of GIVCOL, must be at least N.
+
+    GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           Each number indicates the C or S value to be used in the
+           corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+
+    LDGNUM (input) INTEGER
+           The leading dimension of GIVNUM and POLES, must be at least N.
+
+    POLES  (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           On exit, POLES(1,*) is an array containing the new singular
+           values obtained from solving the secular equation, and
+           POLES(2,*) is an array containing the poles in the secular
+           equation. Not referenced if ICOMPQ = 0.
+
+    DIFL   (output) DOUBLE PRECISION array, dimension ( N )
+           On exit, DIFL(I) is the distance between I-th updated
+           (undeflated) singular value and the I-th (undeflated) old
+           singular value.
+
+    DIFR   (output) DOUBLE PRECISION array,
+                    dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and
+                    dimension ( N ) if ICOMPQ = 0.
+           On exit, DIFR(I, 1) is the distance between I-th updated
+           (undeflated) singular value and the I+1-th (undeflated) old
+           singular value.
+
+           If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+           normalizing factors for the right singular vector matrix.
+
+           See DLASD8 for details on DIFL and DIFR.
+
+    Z      (output) DOUBLE PRECISION array, dimension ( M )
+           The first elements of this array contain the components
+           of the deflation-adjusted updating row vector.
+
+    K      (output) INTEGER
+           Contains the dimension of the non-deflated matrix,
+           This is the order of the related secular equation. 1 <= K <=N.
+
+    C      (output) DOUBLE PRECISION
+           C contains garbage if SQRE =0 and the C-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    S      (output) DOUBLE PRECISION
+           S contains garbage if SQRE =0 and the S-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension ( 4 * M )
+
+    IWORK  (workspace) INTEGER array, dimension ( 3 * N )
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --vf;
+    --vl;
+    --idxq;
+    --perm;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    poles_dim1 = *ldgnum;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    givnum_dim1 = *ldgnum;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    --difl;
+    --difr;
+    --z__;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+
+    if (*icompq < 0 || *icompq > 1) {
+	*info = -1;
+    } else if (*nl < 1) {
+	*info = -2;
+    } else if (*nr < 1) {
+	*info = -3;
+    } else if (*sqre < 0 || *sqre > 1) {
+	*info = -4;
+    } else if (*ldgcol < n) {
+	*info = -14;
+    } else if (*ldgnum < n) {
+	*info = -16;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD6", &i__1);
+	return 0;
+    }
+
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in DLASD7 and DLASD8.
+*/
+
+    isigma = 1;
+    iw = isigma + n;
+    ivfw = iw + m;
+    ivlw = ivfw + m;
+
+    idx = 1;
+    idxc = idx + n;
+    idxp = idxc + n;
+
+/*
+       Scale.
+
+   Computing MAX
+*/
+    d__1 = abs(*alpha), d__2 = abs(*beta);
+    orgnrm = max(d__1,d__2);
+    d__[*nl + 1] = 0.;
+    i__1 = n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) > orgnrm) {
+	    orgnrm = (d__1 = d__[i__], abs(d__1));
+	}
+/* L10: */
+    }
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, &n, &c__1, &d__[1], &n,
+	    info);
+    *alpha /= orgnrm;
+    *beta /= orgnrm;
+
+/*     Sort and Deflate singular values. */
+
+    dlasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], &
+	    work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], &
+	    iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[
+	    givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s,
+	    info);
+
+/*     Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */
+
+    dlasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1],
+	    ldgnum, &work[isigma], &work[iw], info);
+
+/*     Save the poles if ICOMPQ = 1. */
+
+    if (*icompq == 1) {
+	dcopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1);
+	dcopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1);
+    }
+
+/*     Unscale. */
+
+    dlascl_("G", &c__0, &c__0, &c_b2453, &orgnrm, &n, &c__1, &d__[1], &n,
+	    info);
+
+/*     Prepare the IDXQ sorting permutation. */
+
+    n1 = *k;
+    n2 = n - *k;
+    dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
+
+    return 0;
+
+/*     End of DLASD6 */
+
+} /* dlasd6_ */
+
+/* Subroutine */ int dlasd7_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, integer *k, doublereal *d__, doublereal *z__,
+	doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl,
+	doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal *
+	dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm,
+	integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum,
+	 integer *ldgnum, doublereal *c__, doublereal *s, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static integer i__, j, m, n, k2;
+    static doublereal z1;
+    static integer jp;
+    static doublereal eps, tau, tol;
+    static integer nlp1, nlp2, idxi, idxj;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer idxjp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer jprev;
+
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *);
+    static doublereal hlftol;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASD7 merges the two sets of singular values together into a single
+    sorted set. Then it tries to deflate the size of the problem. There
+    are two ways in which deflation can occur:  when two or more singular
+    values are close together or if there is a tiny entry in the Z
+    vector. For each such occurrence the order of the related
+    secular equation problem is reduced by one.
+
+    DLASD7 is called from DLASD6.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            Specifies whether singular vectors are to be computed
+            in compact form, as follows:
+            = 0: Compute singular values only.
+            = 1: Compute singular vectors of upper
+                 bidiagonal matrix in compact form.
+
+    NL     (input) INTEGER
+           The row dimension of the upper block. NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block. NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has
+           N = NL + NR + 1 rows and
+           M = N + SQRE >= N columns.
+
+    K      (output) INTEGER
+           Contains the dimension of the non-deflated matrix, this is
+           the order of the related secular equation. 1 <= K <=N.
+
+    D      (input/output) DOUBLE PRECISION array, dimension ( N )
+           On entry D contains the singular values of the two submatrices
+           to be combined. On exit D contains the trailing (N-K) updated
+           singular values (those which were deflated) sorted into
+           increasing order.
+
+    Z      (output) DOUBLE PRECISION array, dimension ( M )
+           On exit Z contains the updating row vector in the secular
+           equation.
+
+    ZW     (workspace) DOUBLE PRECISION array, dimension ( M )
+           Workspace for Z.
+
+    VF     (input/output) DOUBLE PRECISION array, dimension ( M )
+           On entry, VF(1:NL+1) contains the first components of all
+           right singular vectors of the upper block; and VF(NL+2:M)
+           contains the first components of all right singular vectors
+           of the lower block. On exit, VF contains the first components
+           of all right singular vectors of the bidiagonal matrix.
+
+    VFW    (workspace) DOUBLE PRECISION array, dimension ( M )
+           Workspace for VF.
+
+    VL     (input/output) DOUBLE PRECISION array, dimension ( M )
+           On entry, VL(1:NL+1) contains the  last components of all
+           right singular vectors of the upper block; and VL(NL+2:M)
+           contains the last components of all right singular vectors
+           of the lower block. On exit, VL contains the last components
+           of all right singular vectors of the bidiagonal matrix.
+
+    VLW    (workspace) DOUBLE PRECISION array, dimension ( M )
+           Workspace for VL.
+
+    ALPHA  (input) DOUBLE PRECISION
+           Contains the diagonal element associated with the added row.
+
+    BETA   (input) DOUBLE PRECISION
+           Contains the off-diagonal element associated with the added
+           row.
+
+    DSIGMA (output) DOUBLE PRECISION array, dimension ( N )
+           Contains a copy of the diagonal elements (K-1 singular values
+           and one zero) in the secular equation.
+
+    IDX    (workspace) INTEGER array, dimension ( N )
+           This will contain the permutation used to sort the contents of
+           D into ascending order.
+
+    IDXP   (workspace) INTEGER array, dimension ( N )
+           This will contain the permutation used to place deflated
+           values of D at the end of the array. On output IDXP(2:K)
+           points to the nondeflated D-values and IDXP(K+1:N)
+           points to the deflated singular values.
+
+    IDXQ   (input) INTEGER array, dimension ( N )
+           This contains the permutation which separately sorts the two
+           sub-problems in D into ascending order.  Note that entries in
+           the first half of this permutation must first be moved one
+           position backward; and entries in the second half
+           must first have NL+1 added to their values.
+
+    PERM   (output) INTEGER array, dimension ( N )
+           The permutations (from deflation and sorting) to be applied
+           to each singular block. Not referenced if ICOMPQ = 0.
+
+    GIVPTR (output) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem. Not referenced if ICOMPQ = 0.
+
+    GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation. Not referenced if ICOMPQ = 0.
+
+    LDGCOL (input) INTEGER
+           The leading dimension of GIVCOL, must be at least N.
+
+    GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           Each number indicates the C or S value to be used in the
+           corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+
+    LDGNUM (input) INTEGER
+           The leading dimension of GIVNUM, must be at least N.
+
+    C      (output) DOUBLE PRECISION
+           C contains garbage if SQRE =0 and the C-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    S      (output) DOUBLE PRECISION
+           S contains garbage if SQRE =0 and the S-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit.
+           < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --z__;
+    --zw;
+    --vf;
+    --vfw;
+    --vl;
+    --vlw;
+    --dsigma;
+    --idx;
+    --idxp;
+    --idxq;
+    --perm;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    givnum_dim1 = *ldgnum;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+
+    /* Function Body */
+    *info = 0;
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+
+    if (*icompq < 0 || *icompq > 1) {
+	*info = -1;
+    } else if (*nl < 1) {
+	*info = -2;
+    } else if (*nr < 1) {
+	*info = -3;
+    } else if (*sqre < 0 || *sqre > 1) {
+	*info = -4;
+    } else if (*ldgcol < n) {
+	*info = -22;
+    } else if (*ldgnum < n) {
+	*info = -24;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD7", &i__1);
+	return 0;
+    }
+
+    nlp1 = *nl + 1;
+    nlp2 = *nl + 2;
+    if (*icompq == 1) {
+	*givptr = 0;
+    }
+
+/*
+       Generate the first part of the vector Z and move the singular
+       values in the first part of D one position backward.
+*/
+
+    z1 = *alpha * vl[nlp1];
+    vl[nlp1] = 0.;
+    tau = vf[nlp1];
+    for (i__ = *nl; i__ >= 1; --i__) {
+	z__[i__ + 1] = *alpha * vl[i__];
+	vl[i__] = 0.;
+	vf[i__ + 1] = vf[i__];
+	d__[i__ + 1] = d__[i__];
+	idxq[i__ + 1] = idxq[i__] + 1;
+/* L10: */
+    }
+    vf[1] = tau;
+
+/*     Generate the second part of the vector Z. */
+
+    i__1 = m;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	z__[i__] = *beta * vf[i__];
+	vf[i__] = 0.;
+/* L20: */
+    }
+
+/*     Sort the singular values into increasing order */
+
+    i__1 = n;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	idxq[i__] += nlp1;
+/* L30: */
+    }
+
+/*     DSIGMA, IDXC, IDXC, and ZW are used as storage space. */
+
+    i__1 = n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	dsigma[i__] = d__[idxq[i__]];
+	zw[i__] = z__[idxq[i__]];
+	vfw[i__] = vf[idxq[i__]];
+	vlw[i__] = vl[idxq[i__]];
+/* L40: */
+    }
+
+    dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
+
+    i__1 = n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	idxi = idx[i__] + 1;
+	d__[i__] = dsigma[idxi];
+	z__[i__] = zw[idxi];
+	vf[i__] = vfw[idxi];
+	vl[i__] = vlw[idxi];
+/* L50: */
+    }
+
+/*     Calculate the allowable deflation tolerence */
+
+    eps = EPSILON;
+/* Computing MAX */
+    d__1 = abs(*alpha), d__2 = abs(*beta);
+    tol = max(d__1,d__2);
+/* Computing MAX */
+    d__2 = (d__1 = d__[n], abs(d__1));
+    tol = eps * 64. * max(d__2,tol);
+
+/*
+       There are 2 kinds of deflation -- first a value in the z-vector
+       is small, second two (or more) singular values are very close
+       together (their difference is small).
+
+       If the value in the z-vector is small, we simply permute the
+       array so that the corresponding singular value is moved to the
+       end.
+
+       If two values in the D-vector are close, we perform a two-sided
+       rotation designed to make one of the corresponding z-vector
+       entries zero, and then permute the array so that the deflated
+       singular value is moved to the end.
+
+       If there are multiple singular values then the problem deflates.
+       Here the number of equal singular values are found.  As each equal
+       singular value is found, an elementary reflector is computed to
+       rotate the corresponding singular subspace so that the
+       corresponding components of Z are zero in this new basis.
+*/
+
+    *k = 1;
+    k2 = n + 1;
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	if ((d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    idxp[k2] = j;
+	    if (j == n) {
+		goto L100;
+	    }
+	} else {
+	    jprev = j;
+	    goto L70;
+	}
+/* L60: */
+    }
+L70:
+    j = jprev;
+L80:
+    ++j;
+    if (j > n) {
+	goto L90;
+    }
+    if ((d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	idxp[k2] = j;
+    } else {
+
+/*        Check if singular values are close enough to allow deflation. */
+
+	if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    *s = z__[jprev];
+	    *c__ = z__[j];
+
+/*
+             Find sqrt(a**2+b**2) without overflow or
+             destructive underflow.
+*/
+
+	    tau = dlapy2_(c__, s);
+	    z__[j] = tau;
+	    z__[jprev] = 0.;
+	    *c__ /= tau;
+	    *s = -(*s) / tau;
+
+/*           Record the appropriate Givens rotation */
+
+	    if (*icompq == 1) {
+		++(*givptr);
+		idxjp = idxq[idx[jprev] + 1];
+		idxj = idxq[idx[j] + 1];
+		if (idxjp <= nlp1) {
+		    --idxjp;
+		}
+		if (idxj <= nlp1) {
+		    --idxj;
+		}
+		givcol[*givptr + (givcol_dim1 << 1)] = idxjp;
+		givcol[*givptr + givcol_dim1] = idxj;
+		givnum[*givptr + (givnum_dim1 << 1)] = *c__;
+		givnum[*givptr + givnum_dim1] = *s;
+	    }
+	    drot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s);
+	    drot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s);
+	    --k2;
+	    idxp[k2] = jprev;
+	    jprev = j;
+	} else {
+	    ++(*k);
+	    zw[*k] = z__[jprev];
+	    dsigma[*k] = d__[jprev];
+	    idxp[*k] = jprev;
+	    jprev = j;
+	}
+    }
+    goto L80;
+L90:
+
+/*     Record the last singular value. */
+
+    ++(*k);
+    zw[*k] = z__[jprev];
+    dsigma[*k] = d__[jprev];
+    idxp[*k] = jprev;
+
+L100:
+
+/*
+       Sort the singular values into DSIGMA. The singular values which
+       were not deflated go into the first K slots of DSIGMA, except
+       that DSIGMA(1) is treated separately.
+*/
+
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	jp = idxp[j];
+	dsigma[j] = d__[jp];
+	vfw[j] = vf[jp];
+	vlw[j] = vl[jp];
+/* L110: */
+    }
+    if (*icompq == 1) {
+	i__1 = n;
+	for (j = 2; j <= i__1; ++j) {
+	    jp = idxp[j];
+	    perm[j] = idxq[idx[jp] + 1];
+	    if (perm[j] <= nlp1) {
+		--perm[j];
+	    }
+/* L120: */
+	}
+    }
+
+/*
+       The deflated singular values go back into the last N - K slots of
+       D.
+*/
+
+    i__1 = n - *k;
+    dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
+
+/*
+       Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and
+       VL(M).
+*/
+
+    dsigma[1] = 0.;
+    hlftol = tol / 2.;
+    if (abs(dsigma[2]) <= hlftol) {
+	dsigma[2] = hlftol;
+    }
+    if (m > n) {
+	z__[1] = dlapy2_(&z1, &z__[m]);
+	if (z__[1] <= tol) {
+	    *c__ = 1.;
+	    *s = 0.;
+	    z__[1] = tol;
+	} else {
+	    *c__ = z1 / z__[1];
+	    *s = -z__[m] / z__[1];
+	}
+	drot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s);
+	drot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s);
+    } else {
+	if (abs(z1) <= tol) {
+	    z__[1] = tol;
+	} else {
+	    z__[1] = z1;
+	}
+    }
+
+/*     Restore Z, VF, and VL. */
+
+    i__1 = *k - 1;
+    dcopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1);
+    i__1 = n - 1;
+    dcopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1);
+    i__1 = n - 1;
+    dcopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1);
+
+    return 0;
+
+/*     End of DLASD7 */
+
+} /* dlasd7_ */
+
+/* Subroutine */ int dlasd8_(integer *icompq, integer *k, doublereal *d__,
+	doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl,
+	doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal *
+	work, integer *info)
+{
+    /* System generated locals */
+    integer difr_dim1, difr_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal dj, rho;
+    static integer iwk1, iwk2, iwk3;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    static integer iwk2i, iwk3i;
+    static doublereal diflj, difrj, dsigj;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *), dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlaset_(char *, integer *, integer
+	    *, doublereal *, doublereal *, doublereal *, integer *),
+	    xerbla_(char *, integer *);
+    static doublereal dsigjp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASD8 finds the square roots of the roots of the secular equation,
+    as defined by the values in DSIGMA and Z. It makes the appropriate
+    calls to DLASD4, and stores, for each  element in D, the distance
+    to its two nearest poles (elements in DSIGMA). It also updates
+    the arrays VF and VL, the first and last components of all the
+    right singular vectors of the original bidiagonal matrix.
+
+    DLASD8 is called from DLASD6.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            Specifies whether singular vectors are to be computed in
+            factored form in the calling routine:
+            = 0: Compute singular values only.
+            = 1: Compute singular vectors in factored form as well.
+
+    K       (input) INTEGER
+            The number of terms in the rational function to be solved
+            by DLASD4.  K >= 1.
+
+    D       (output) DOUBLE PRECISION array, dimension ( K )
+            On output, D contains the updated singular values.
+
+    Z       (input) DOUBLE PRECISION array, dimension ( K )
+            The first K elements of this array contain the components
+            of the deflation-adjusted updating row vector.
+
+    VF      (input/output) DOUBLE PRECISION array, dimension ( K )
+            On entry, VF contains  information passed through DBEDE8.
+            On exit, VF contains the first K components of the first
+            components of all right singular vectors of the bidiagonal
+            matrix.
+
+    VL      (input/output) DOUBLE PRECISION array, dimension ( K )
+            On entry, VL contains  information passed through DBEDE8.
+            On exit, VL contains the first K components of the last
+            components of all right singular vectors of the bidiagonal
+            matrix.
+
+    DIFL    (output) DOUBLE PRECISION array, dimension ( K )
+            On exit, DIFL(I) = D(I) - DSIGMA(I).
+
+    DIFR    (output) DOUBLE PRECISION array,
+                     dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and
+                     dimension ( K ) if ICOMPQ = 0.
+            On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not
+            defined and will not be referenced.
+
+            If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+            normalizing factors for the right singular vector matrix.
+
+    LDDIFR  (input) INTEGER
+            The leading dimension of DIFR, must be at least K.
+
+    DSIGMA  (input) DOUBLE PRECISION array, dimension ( K )
+            The first K elements of this array contain the old roots
+            of the deflated updating problem.  These are the poles
+            of the secular equation.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension at least 3 * K
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --z__;
+    --vf;
+    --vl;
+    --difl;
+    difr_dim1 = *lddifr;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    --dsigma;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*icompq < 0 || *icompq > 1) {
+	*info = -1;
+    } else if (*k < 1) {
+	*info = -2;
+    } else if (*lddifr < *k) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD8", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 1) {
+	d__[1] = abs(z__[1]);
+	difl[1] = d__[1];
+	if (*icompq == 1) {
+	    difl[2] = 1.;
+	    difr[(difr_dim1 << 1) + 1] = 1.;
+	}
+	return 0;
+    }
+
+/*
+       Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DSIGMA(I) if it is 1; this makes the subsequent
+       subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DSIGMA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DSIGMA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
+/* L10: */
+    }
+
+/*     Book keeping. */
+
+    iwk1 = 1;
+    iwk2 = iwk1 + *k;
+    iwk3 = iwk2 + *k;
+    iwk2i = iwk2 - 1;
+    iwk3i = iwk3 - 1;
+
+/*     Normalize Z. */
+
+    rho = dnrm2_(k, &z__[1], &c__1);
+    dlascl_("G", &c__0, &c__0, &rho, &c_b2453, k, &c__1, &z__[1], k, info);
+    rho *= rho;
+
+/*     Initialize WORK(IWK3). */
+
+    dlaset_("A", k, &c__1, &c_b2453, &c_b2453, &work[iwk3], k);
+
+/*
+       Compute the updated singular values, the arrays DIFL, DIFR,
+       and the updated Z.
+*/
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[
+		iwk2], info);
+
+/*        If the root finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    return 0;
+	}
+	work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j];
+	difl[j] = -work[j];
+	difr[j + difr_dim1] = -work[j + 1];
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i +
+		    i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[
+		    j]);
+/* L20: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i +
+		    i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[
+		    j]);
+/* L30: */
+	}
+/* L40: */
+    }
+
+/*     Compute updated Z. */
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1)));
+	z__[i__] = d_sign(&d__2, &z__[i__]);
+/* L50: */
+    }
+
+/*     Update VF and VL. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	diflj = difl[j];
+	dj = d__[j];
+	dsigj = -dsigma[j];
+	if (j < *k) {
+	    difrj = -difr[j + difr_dim1];
+	    dsigjp = -dsigma[j + 1];
+	}
+	work[j] = -z__[j] / diflj / (dsigma[j] + dj);
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigj) - diflj) / (
+		    dsigma[i__] + dj);
+/* L60: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigjp) + difrj) /
+		    (dsigma[i__] + dj);
+/* L70: */
+	}
+	temp = dnrm2_(k, &work[1], &c__1);
+	work[iwk2i + j] = ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp;
+	work[iwk3i + j] = ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp;
+	if (*icompq == 1) {
+	    difr[j + (difr_dim1 << 1)] = temp;
+	}
+/* L80: */
+    }
+
+    dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1);
+    dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1);
+
+    return 0;
+
+/*     End of DLASD8 */
+
+} /* dlasd8_ */
+
+/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n,
+	integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer
+	*ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr,
+	doublereal *z__, doublereal *poles, integer *givptr, integer *givcol,
+	integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__,
+	doublereal *s, doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1,
+	    difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset,
+	    poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset,
+	    z_dim1, z_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc,
+	    nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1;
+    static doublereal beta;
+    static integer idxq, nlvl;
+    static doublereal alpha;
+    static integer inode, ndiml, ndimr, idxqi, itemp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer sqrei;
+    extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *);
+    static integer nwork1, nwork2;
+    extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlasdt_(integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *), dlaset_(
+	    char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *), xerbla_(char *, integer *);
+    static integer smlszp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    Using a divide and conquer approach, DLASDA computes the singular
+    value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
+    B with diagonal D and offdiagonal E, where M = N + SQRE. The
+    algorithm computes the singular values in the SVD B = U * S * VT.
+    The orthogonal matrices U and VT are optionally computed in
+    compact form.
+
+    A related subroutine, DLASD0, computes the singular values and
+    the singular vectors in explicit form.
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether singular vectors are to be computed
+           in compact form, as follows
+           = 0: Compute singular values only.
+           = 1: Compute singular vectors of upper bidiagonal
+                matrix in compact form.
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The row dimension of the upper bidiagonal matrix. This is
+           also the dimension of the main diagonal array D.
+
+    SQRE   (input) INTEGER
+           Specifies the column dimension of the bidiagonal matrix.
+           = 0: The bidiagonal matrix has column dimension M = N;
+           = 1: The bidiagonal matrix has column dimension M = N + 1.
+
+    D      (input/output) DOUBLE PRECISION array, dimension ( N )
+           On entry D contains the main diagonal of the bidiagonal
+           matrix. On exit D, if INFO = 0, contains its singular values.
+
+    E      (input) DOUBLE PRECISION array, dimension ( M-1 )
+           Contains the subdiagonal entries of the bidiagonal matrix.
+           On exit, E has been destroyed.
+
+    U      (output) DOUBLE PRECISION array,
+           dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced
+           if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left
+           singular vector matrices of all subproblems at the bottom
+           level.
+
+    LDU    (input) INTEGER, LDU = > N.
+           The leading dimension of arrays U, VT, DIFL, DIFR, POLES,
+           GIVNUM, and Z.
+
+    VT     (output) DOUBLE PRECISION array,
+           dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced
+           if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right
+           singular vector matrices of all subproblems at the bottom
+           level.
+
+    K      (output) INTEGER array,
+           dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0.
+           If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th
+           secular equation on the computation tree.
+
+    DIFL   (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ),
+           where NLVL = floor(log_2 (N/SMLSIZ))).
+
+    DIFR   (output) DOUBLE PRECISION array,
+                    dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and
+                    dimension ( N ) if ICOMPQ = 0.
+           If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1)
+           record distances between singular values on the I-th
+           level and singular values on the (I -1)-th level, and
+           DIFR(1:N, 2 * I ) contains the normalizing factors for
+           the right singular vector matrix. See DLASD8 for details.
+
+    Z      (output) DOUBLE PRECISION array,
+                    dimension ( LDU, NLVL ) if ICOMPQ = 1 and
+                    dimension ( N ) if ICOMPQ = 0.
+           The first K elements of Z(1, I) contain the components of
+           the deflation-adjusted updating row vector for subproblems
+           on the I-th level.
+
+    POLES  (output) DOUBLE PRECISION array,
+           dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced
+           if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and
+           POLES(1, 2*I) contain  the new and old singular values
+           involved in the secular equations on the I-th level.
+
+    GIVPTR (output) INTEGER array,
+           dimension ( N ) if ICOMPQ = 1, and not referenced if
+           ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records
+           the number of Givens rotations performed on the I-th
+           problem on the computation tree.
+
+    GIVCOL (output) INTEGER array,
+           dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not
+           referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+           GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations
+           of Givens rotations performed on the I-th level on the
+           computation tree.
+
+    LDGCOL (input) INTEGER, LDGCOL = > N.
+           The leading dimension of arrays GIVCOL and PERM.
+
+    PERM   (output) INTEGER array,
+           dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced
+           if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records
+           permutations done on the I-th level of the computation tree.
+
+    GIVNUM (output) DOUBLE PRECISION array,
+           dimension ( LDU,  2 * NLVL ) if ICOMPQ = 1, and not
+           referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+           GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S-
+           values of Givens rotations performed on the I-th level on
+           the computation tree.
+
+    C      (output) DOUBLE PRECISION array,
+           dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0.
+           If ICOMPQ = 1 and the I-th subproblem is not square, on exit,
+           C( I ) contains the C-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    S      (output) DOUBLE PRECISION array, dimension ( N ) if
+           ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1
+           and the I-th subproblem is not square, on exit, S( I )
+           contains the S-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension
+           (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)).
+
+    IWORK  (workspace) INTEGER array.
+           Dimension must be at least (7 * N).
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    givnum_dim1 = *ldu;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    poles_dim1 = *ldu;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    z_dim1 = *ldu;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    difr_dim1 = *ldu;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    difl_dim1 = *ldu;
+    difl_offset = 1 + difl_dim1;
+    difl -= difl_offset;
+    vt_dim1 = *ldu;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    --k;
+    --givptr;
+    perm_dim1 = *ldgcol;
+    perm_offset = 1 + perm_dim1;
+    perm -= perm_offset;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    --c__;
+    --s;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*icompq < 0 || *icompq > 1) {
+	*info = -1;
+    } else if (*smlsiz < 3) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*sqre < 0 || *sqre > 1) {
+	*info = -4;
+    } else if (*ldu < *n + *sqre) {
+	*info = -8;
+    } else if (*ldgcol < *n) {
+	*info = -17;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASDA", &i__1);
+	return 0;
+    }
+
+    m = *n + *sqre;
+
+/*     If the input matrix is too small, call DLASDQ to find the SVD. */
+
+    if (*n <= *smlsiz) {
+	if (*icompq == 0) {
+	    dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
+		    vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, &
+		    work[1], info);
+	} else {
+	    dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
+		    , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1],
+		    info);
+	}
+	return 0;
+    }
+
+/*     Book-keeping and  set up the computation tree. */
+
+    inode = 1;
+    ndiml = inode + *n;
+    ndimr = ndiml + *n;
+    idxq = ndimr + *n;
+    iwk = idxq + *n;
+
+    ncc = 0;
+    nru = 0;
+
+    smlszp = *smlsiz + 1;
+    vf = 1;
+    vl = vf + m;
+    nwork1 = vl + m;
+    nwork2 = nwork1 + smlszp * smlszp;
+
+    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+	    smlsiz);
+
+/*
+       for the nodes on bottom level of the tree, solve
+       their subproblems by DLASDQ.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+
+/*
+          IC : center row of each node
+          NL : number of rows of left  subproblem
+          NR : number of rows of right subproblem
+          NLF: starting row of the left   subproblem
+          NRF: starting row of the right  subproblem
+*/
+
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nlp1 = nl + 1;
+	nr = iwork[ndimr + i1];
+	nlf = ic - nl;
+	nrf = ic + 1;
+	idxqi = idxq + nlf - 2;
+	vfi = vf + nlf - 1;
+	vli = vl + nlf - 1;
+	sqrei = 1;
+	if (*icompq == 0) {
+	    dlaset_("A", &nlp1, &nlp1, &c_b2467, &c_b2453, &work[nwork1], &
+		    smlszp);
+	    dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], &
+		    work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2],
+		    &nl, &work[nwork2], info);
+	    itemp = nwork1 + nl * smlszp;
+	    dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1);
+	    dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1);
+	} else {
+	    dlaset_("A", &nl, &nl, &c_b2467, &c_b2453, &u[nlf + u_dim1], ldu);
+	    dlaset_("A", &nlp1, &nlp1, &c_b2467, &c_b2453, &vt[nlf + vt_dim1],
+		     ldu);
+	    dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &
+		    vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf +
+		    u_dim1], ldu, &work[nwork1], info);
+	    dcopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1);
+	    dcopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1)
+		    ;
+	}
+	if (*info != 0) {
+	    return 0;
+	}
+	i__2 = nl;
+	for (j = 1; j <= i__2; ++j) {
+	    iwork[idxqi + j] = j;
+/* L10: */
+	}
+	if (i__ == nd && *sqre == 0) {
+	    sqrei = 0;
+	} else {
+	    sqrei = 1;
+	}
+	idxqi += nlp1;
+	vfi += nlp1;
+	vli += nlp1;
+	nrp1 = nr + sqrei;
+	if (*icompq == 0) {
+	    dlaset_("A", &nrp1, &nrp1, &c_b2467, &c_b2453, &work[nwork1], &
+		    smlszp);
+	    dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], &
+		    work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2],
+		    &nr, &work[nwork2], info);
+	    itemp = nwork1 + (nrp1 - 1) * smlszp;
+	    dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1);
+	    dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1);
+	} else {
+	    dlaset_("A", &nr, &nr, &c_b2467, &c_b2453, &u[nrf + u_dim1], ldu);
+	    dlaset_("A", &nrp1, &nrp1, &c_b2467, &c_b2453, &vt[nrf + vt_dim1],
+		     ldu);
+	    dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &
+		    vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf +
+		    u_dim1], ldu, &work[nwork1], info);
+	    dcopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1);
+	    dcopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1)
+		    ;
+	}
+	if (*info != 0) {
+	    return 0;
+	}
+	i__2 = nr;
+	for (j = 1; j <= i__2; ++j) {
+	    iwork[idxqi + j] = j;
+/* L20: */
+	}
+/* L30: */
+    }
+
+/*     Now conquer each subproblem bottom-up. */
+
+    j = pow_ii(&c__2, &nlvl);
+    for (lvl = nlvl; lvl >= 1; --lvl) {
+	lvl2 = (lvl << 1) - 1;
+
+/*
+          Find the first node LF and last node LL on
+          the current level LVL.
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__1 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__1);
+	    ll = (lf << 1) - 1;
+	}
+	i__1 = ll;
+	for (i__ = lf; i__ <= i__1; ++i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    if (i__ == ll) {
+		sqrei = *sqre;
+	    } else {
+		sqrei = 1;
+	    }
+	    vfi = vf + nlf - 1;
+	    vli = vl + nlf - 1;
+	    idxqi = idxq + nlf - 1;
+	    alpha = d__[ic];
+	    beta = e[ic];
+	    if (*icompq == 0) {
+		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
+			work[vli], &alpha, &beta, &iwork[idxqi], &perm[
+			perm_offset], &givptr[1], &givcol[givcol_offset],
+			ldgcol, &givnum[givnum_offset], ldu, &poles[
+			poles_offset], &difl[difl_offset], &difr[difr_offset],
+			 &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1],
+			 &iwork[iwk], info);
+	    } else {
+		--j;
+		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
+			work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf +
+			lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 *
+			givcol_dim1], ldgcol, &givnum[nlf + lvl2 *
+			givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], &
+			difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 *
+			difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j],
+			&s[j], &work[nwork1], &iwork[iwk], info);
+	    }
+	    if (*info != 0) {
+		return 0;
+	    }
+/* L40: */
+	}
+/* L50: */
+    }
+
+    return 0;
+
+/*     End of DLASDA */
+
+} /* dlasda_ */
+
+/* Subroutine */ int dlasdq_(char *uplo, integer *sqre, integer *n, integer *
+	ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e,
+	doublereal *vt, integer *ldvt, doublereal *u, integer *ldu,
+	doublereal *c__, integer *ldc, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal r__, cs, sn;
+    static integer np1, isub;
+    static doublereal smin;
+    static integer sqre1;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *
+	    , doublereal *, integer *);
+    static integer iuplo;
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *), xerbla_(char *,
+	    integer *), dbdsqr_(char *, integer *, integer *, integer
+	    *, integer *, doublereal *, doublereal *, doublereal *, integer *,
+	     doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static logical rotate;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASDQ computes the singular value decomposition (SVD) of a real
+    (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
+    E, accumulating the transformations if desired. Letting B denote
+    the input bidiagonal matrix, the algorithm computes orthogonal
+    matrices Q and P such that B = Q * S * P' (P' denotes the transpose
+    of P). The singular values S are overwritten on D.
+
+    The input matrix U  is changed to U  * Q  if desired.
+    The input matrix VT is changed to P' * VT if desired.
+    The input matrix C  is changed to Q' * C  if desired.
+
+    See "Computing  Small Singular Values of Bidiagonal Matrices With
+    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+    LAPACK Working Note #3, for a detailed description of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO  (input) CHARACTER*1
+          On entry, UPLO specifies whether the input bidiagonal matrix
+          is upper or lower bidiagonal, and wether it is square are
+          not.
+             UPLO = 'U' or 'u'   B is upper bidiagonal.
+             UPLO = 'L' or 'l'   B is lower bidiagonal.
+
+    SQRE  (input) INTEGER
+          = 0: then the input matrix is N-by-N.
+          = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and
+               (N+1)-by-N if UPLU = 'L'.
+
+          The bidiagonal matrix has
+          N = NL + NR + 1 rows and
+          M = N + SQRE >= N columns.
+
+    N     (input) INTEGER
+          On entry, N specifies the number of rows and columns
+          in the matrix. N must be at least 0.
+
+    NCVT  (input) INTEGER
+          On entry, NCVT specifies the number of columns of
+          the matrix VT. NCVT must be at least 0.
+
+    NRU   (input) INTEGER
+          On entry, NRU specifies the number of rows of
+          the matrix U. NRU must be at least 0.
+
+    NCC   (input) INTEGER
+          On entry, NCC specifies the number of columns of
+          the matrix C. NCC must be at least 0.
+
+    D     (input/output) DOUBLE PRECISION array, dimension (N)
+          On entry, D contains the diagonal entries of the
+          bidiagonal matrix whose SVD is desired. On normal exit,
+          D contains the singular values in ascending order.
+
+    E     (input/output) DOUBLE PRECISION array.
+          dimension is (N-1) if SQRE = 0 and N if SQRE = 1.
+          On entry, the entries of E contain the offdiagonal entries
+          of the bidiagonal matrix whose SVD is desired. On normal
+          exit, E will contain 0. If the algorithm does not converge,
+          D and E will contain the diagonal and superdiagonal entries
+          of a bidiagonal matrix orthogonally equivalent to the one
+          given as input.
+
+    VT    (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
+          On entry, contains a matrix which on exit has been
+          premultiplied by P', dimension N-by-NCVT if SQRE = 0
+          and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0).
+
+    LDVT  (input) INTEGER
+          On entry, LDVT specifies the leading dimension of VT as
+          declared in the calling (sub) program. LDVT must be at
+          least 1. If NCVT is nonzero LDVT must also be at least N.
+
+    U     (input/output) DOUBLE PRECISION array, dimension (LDU, N)
+          On entry, contains a  matrix which on exit has been
+          postmultiplied by Q, dimension NRU-by-N if SQRE = 0
+          and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0).
+
+    LDU   (input) INTEGER
+          On entry, LDU  specifies the leading dimension of U as
+          declared in the calling (sub) program. LDU must be at
+          least max( 1, NRU ) .
+
+    C     (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
+          On entry, contains an N-by-NCC matrix which on exit
+          has been premultiplied by Q'  dimension N-by-NCC if SQRE = 0
+          and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0).
+
+    LDC   (input) INTEGER
+          On entry, LDC  specifies the leading dimension of C as
+          declared in the calling (sub) program. LDC must be at
+          least 1. If NCC is nonzero, LDC must also be at least N.
+
+    WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)
+          Workspace. Only referenced if one of NCVT, NRU, or NCC is
+          nonzero, and if N is at least 2.
+
+    INFO  (output) INTEGER
+          On exit, a value of 0 indicates a successful exit.
+          If INFO < 0, argument number -INFO is illegal.
+          If INFO > 0, the algorithm did not converge, and INFO
+          specifies how many superdiagonals did not converge.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    iuplo = 0;
+    if (lsame_(uplo, "U")) {
+	iuplo = 1;
+    }
+    if (lsame_(uplo, "L")) {
+	iuplo = 2;
+    }
+    if (iuplo == 0) {
+	*info = -1;
+    } else if (*sqre < 0 || *sqre > 1) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ncvt < 0) {
+	*info = -4;
+    } else if (*nru < 0) {
+	*info = -5;
+    } else if (*ncc < 0) {
+	*info = -6;
+    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) {
+	*info = -10;
+    } else if (*ldu < max(1,*nru)) {
+	*info = -12;
+    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASDQ", &i__1);
+	return 0;
+    }
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     ROTATE is true if any singular vectors desired, false otherwise */
+
+    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
+    np1 = *n + 1;
+    sqre1 = *sqre;
+
+/*
+       If matrix non-square upper bidiagonal, rotate to be lower
+       bidiagonal.  The rotations are on the right.
+*/
+
+    if (iuplo == 1 && sqre1 == 1) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (rotate) {
+		work[i__] = cs;
+		work[*n + i__] = sn;
+	    }
+/* L10: */
+	}
+	dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
+	d__[*n] = r__;
+	e[*n] = 0.;
+	if (rotate) {
+	    work[*n] = cs;
+	    work[*n + *n] = sn;
+	}
+	iuplo = 2;
+	sqre1 = 0;
+
+/*        Update singular vectors if desired. */
+
+	if (*ncvt > 0) {
+	    dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[
+		    vt_offset], ldvt);
+	}
+    }
+
+/*
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left.
+*/
+
+    if (iuplo == 2) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (rotate) {
+		work[i__] = cs;
+		work[*n + i__] = sn;
+	    }
+/* L20: */
+	}
+
+/*
+          If matrix (N+1)-by-N lower bidiagonal, one additional
+          rotation is needed.
+*/
+
+	if (sqre1 == 1) {
+	    dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
+	    d__[*n] = r__;
+	    if (rotate) {
+		work[*n] = cs;
+		work[*n + *n] = sn;
+	    }
+	}
+
+/*        Update singular vectors if desired. */
+
+	if (*nru > 0) {
+	    if (sqre1 == 0) {
+		dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[
+			u_offset], ldu);
+	    } else {
+		dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[
+			u_offset], ldu);
+	    }
+	}
+	if (*ncc > 0) {
+	    if (sqre1 == 0) {
+		dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[
+			c_offset], ldc);
+	    } else {
+		dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[
+			c_offset], ldc);
+	    }
+	}
+    }
+
+/*
+       Call DBDSQR to compute the SVD of the reduced real
+       N-by-N upper bidiagonal matrix.
+*/
+
+    dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[
+	    u_offset], ldu, &c__[c_offset], ldc, &work[1], info);
+
+/*
+       Sort the singular values into ascending order (insertion sort on
+       singular values, but only one transposition per singular vector)
+*/
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Scan for smallest D(I). */
+
+	isub = i__;
+	smin = d__[i__];
+	i__2 = *n;
+	for (j = i__ + 1; j <= i__2; ++j) {
+	    if (d__[j] < smin) {
+		isub = j;
+		smin = d__[j];
+	    }
+/* L30: */
+	}
+	if (isub != i__) {
+
+/*           Swap singular values and vectors. */
+
+	    d__[isub] = d__[i__];
+	    d__[i__] = smin;
+	    if (*ncvt > 0) {
+		dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1],
+			ldvt);
+	    }
+	    if (*nru > 0) {
+		dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1]
+			, &c__1);
+	    }
+	    if (*ncc > 0) {
+		dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc)
+			;
+	    }
+	}
+/* L40: */
+    }
+
+    return 0;
+
+/*     End of DLASDQ */
+
+} /* dlasdq_ */
+
+/* Subroutine */ int dlasdt_(integer *n, integer *lvl, integer *nd, integer *
+	inode, integer *ndiml, integer *ndimr, integer *msub)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Builtin functions */
+    double log(doublereal);
+
+    /* Local variables */
+    static integer i__, il, ir, maxn;
+    static doublereal temp;
+    static integer nlvl, llst, ncrnt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASDT creates a tree of subproblems for bidiagonal divide and
+    conquer.
+
+    Arguments
+    =========
+
+     N      (input) INTEGER
+            On entry, the number of diagonal elements of the
+            bidiagonal matrix.
+
+     LVL    (output) INTEGER
+            On exit, the number of levels on the computation tree.
+
+     ND     (output) INTEGER
+            On exit, the number of nodes on the tree.
+
+     INODE  (output) INTEGER array, dimension ( N )
+            On exit, centers of subproblems.
+
+     NDIML  (output) INTEGER array, dimension ( N )
+            On exit, row dimensions of left children.
+
+     NDIMR  (output) INTEGER array, dimension ( N )
+            On exit, row dimensions of right children.
+
+     MSUB   (input) INTEGER.
+            On entry, the maximum row dimension each subproblem at the
+            bottom of the tree can be of.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Find the number of levels on the tree.
+*/
+
+    /* Parameter adjustments */
+    --ndimr;
+    --ndiml;
+    --inode;
+
+    /* Function Body */
+    maxn = max(1,*n);
+    temp = log((doublereal) maxn / (doublereal) (*msub + 1)) / log(2.);
+    *lvl = (integer) temp + 1;
+
+    i__ = *n / 2;
+    inode[1] = i__ + 1;
+    ndiml[1] = i__;
+    ndimr[1] = *n - i__ - 1;
+    il = 0;
+    ir = 1;
+    llst = 1;
+    i__1 = *lvl - 1;
+    for (nlvl = 1; nlvl <= i__1; ++nlvl) {
+
+/*
+          Constructing the tree at (NLVL+1)-st level. The number of
+          nodes created on this level is LLST * 2.
+*/
+
+	i__2 = llst - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    il += 2;
+	    ir += 2;
+	    ncrnt = llst + i__;
+	    ndiml[il] = ndiml[ncrnt] / 2;
+	    ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1;
+	    inode[il] = inode[ncrnt] - ndimr[il] - 1;
+	    ndiml[ir] = ndimr[ncrnt] / 2;
+	    ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1;
+	    inode[ir] = inode[ncrnt] + ndiml[ir] + 1;
+/* L10: */
+	}
+	llst <<= 1;
+/* L20: */
+    }
+    *nd = (llst << 1) - 1;
+
+    return 0;
+
+/*     End of DLASDT */
+
+} /* dlasdt_ */
+
+/* Subroutine */ int dlaset_(char *uplo, integer *m, integer *n, doublereal *
+	alpha, doublereal *beta, doublereal *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLASET initializes an m-by-n matrix A to BETA on the diagonal and
+    ALPHA on the offdiagonals.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be set.
+            = 'U':      Upper triangular part is set; the strictly lower
+                        triangular part of A is not changed.
+            = 'L':      Lower triangular part is set; the strictly upper
+                        triangular part of A is not changed.
+            Otherwise:  All of the matrix A is set.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    ALPHA   (input) DOUBLE PRECISION
+            The constant to which the offdiagonal elements are to be set.
+
+    BETA    (input) DOUBLE PRECISION
+            The constant to which the diagonal elements are to be set.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On exit, the leading m-by-n submatrix of A is set as follows:
+
+            if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n,
+            if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n,
+            otherwise,     A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j,
+
+            and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+
+/*
+          Set the strictly upper triangular or trapezoidal part of the
+          array to ALPHA.
+*/
+
+	i__1 = *n;
+	for (j = 2; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = j - 1;
+	    i__2 = min(i__3,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = *alpha;
+/* L10: */
+	    }
+/* L20: */
+	}
+
+    } else if (lsame_(uplo, "L")) {
+
+/*
+          Set the strictly lower triangular or trapezoidal part of the
+          array to ALPHA.
+*/
+
+	i__1 = min(*m,*n);
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j + 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = *alpha;
+/* L30: */
+	    }
+/* L40: */
+	}
+
+    } else {
+
+/*        Set the leading m-by-n submatrix to ALPHA. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = *alpha;
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+
+/*     Set the first min(M,N) diagonal elements to BETA. */
+
+    i__1 = min(*m,*n);
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	a[i__ + i__ * a_dim1] = *beta;
+/* L70: */
+    }
+
+    return 0;
+
+/*     End of DLASET */
+
+} /* dlaset_ */
+
+/* Subroutine */ int dlasq1_(integer *n, doublereal *d__, doublereal *e,
+	doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    doublereal d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__;
+    static doublereal eps;
+    extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *);
+    static doublereal scale;
+    static integer iinfo;
+    static doublereal sigmn;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static doublereal sigmx;
+    extern /* Subroutine */ int dlasq2_(integer *, doublereal *, integer *);
+
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *), dlasrt_(
+	    char *, integer *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASQ1 computes the singular values of a real N-by-N bidiagonal
+    matrix with diagonal D and off-diagonal E. The singular values
+    are computed to high relative accuracy, in the absence of
+    denormalization, underflow and overflow. The algorithm was first
+    presented in
+
+    "Accurate singular values and differential qd algorithms" by K. V.
+    Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230,
+    1994,
+
+    and the present implementation is described in "An implementation of
+    the dqds Algorithm (Positive Case)", LAPACK Working Note.
+
+    Arguments
+    =========
+
+    N     (input) INTEGER
+          The number of rows and columns in the matrix. N >= 0.
+
+    D     (input/output) DOUBLE PRECISION array, dimension (N)
+          On entry, D contains the diagonal elements of the
+          bidiagonal matrix whose SVD is desired. On normal exit,
+          D contains the singular values in decreasing order.
+
+    E     (input/output) DOUBLE PRECISION array, dimension (N)
+          On entry, elements E(1:N-1) contain the off-diagonal elements
+          of the bidiagonal matrix whose SVD is desired.
+          On exit, E is overwritten.
+
+    WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)
+
+    INFO  (output) INTEGER
+          = 0: successful exit
+          < 0: if INFO = -i, the i-th argument had an illegal value
+          > 0: the algorithm failed
+               = 1, a split was marked by a positive value in E
+               = 2, current block of Z not diagonalized after 30*N
+                    iterations (in inner while loop)
+               = 3, termination criterion of outer while loop not met
+                    (program created more than N unreduced blocks)
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --work;
+    --e;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -2;
+	i__1 = -(*info);
+	xerbla_("DLASQ1", &i__1);
+	return 0;
+    } else if (*n == 0) {
+	return 0;
+    } else if (*n == 1) {
+	d__[1] = abs(d__[1]);
+	return 0;
+    } else if (*n == 2) {
+	dlas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx);
+	d__[1] = sigmx;
+	d__[2] = sigmn;
+	return 0;
+    }
+
+/*     Estimate the largest singular value. */
+
+    sigmx = 0.;
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__[i__] = (d__1 = d__[i__], abs(d__1));
+/* Computing MAX */
+	d__2 = sigmx, d__3 = (d__1 = e[i__], abs(d__1));
+	sigmx = max(d__2,d__3);
+/* L10: */
+    }
+    d__[*n] = (d__1 = d__[*n], abs(d__1));
+
+/*     Early return if SIGMX is zero (matrix is already diagonal). */
+
+    if (sigmx == 0.) {
+	dlasrt_("D", n, &d__[1], &iinfo);
+	return 0;
+    }
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__1 = sigmx, d__2 = d__[i__];
+	sigmx = max(d__1,d__2);
+/* L20: */
+    }
+
+/*
+       Copy D and E into WORK (in the Z format) and scale (squaring the
+       input data makes scaling by a power of the radix pointless).
+*/
+
+    eps = PRECISION;
+    safmin = SAFEMINIMUM;
+    scale = sqrt(eps / safmin);
+    dcopy_(n, &d__[1], &c__1, &work[1], &c__2);
+    i__1 = *n - 1;
+    dcopy_(&i__1, &e[1], &c__1, &work[2], &c__2);
+    i__1 = (*n << 1) - 1;
+    i__2 = (*n << 1) - 1;
+    dlascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2,
+	    &iinfo);
+
+/*     Compute the q's and e's. */
+
+    i__1 = (*n << 1) - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing 2nd power */
+	d__1 = work[i__];
+	work[i__] = d__1 * d__1;
+/* L30: */
+    }
+    work[*n * 2] = 0.;
+
+    dlasq2_(n, &work[1], info);
+
+    if (*info == 0) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    d__[i__] = sqrt(work[i__]);
+/* L40: */
+	}
+	dlascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, &
+		iinfo);
+    }
+
+    return 0;
+
+/*     End of DLASQ1 */
+
+} /* dlasq1_ */
+
+/* Subroutine */ int dlasq2_(integer *n, doublereal *z__, integer *info)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal d__, e;
+    static integer k;
+    static doublereal s, t;
+    static integer i0, i4, n0, pp;
+    static doublereal eps, tol;
+    static integer ipn4;
+    static doublereal tol2;
+    static logical ieee;
+    static integer nbig;
+    static doublereal dmin__, emin, emax;
+    static integer ndiv, iter;
+    static doublereal qmin, temp, qmax, zmax;
+    static integer splt, nfail;
+    static doublereal desig, trace, sigma;
+    static integer iinfo;
+    extern /* Subroutine */ int dlasq3_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     integer *, integer *, integer *, logical *);
+
+    static integer iwhila, iwhilb;
+    static doublereal oldemn, safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASQ2 computes all the eigenvalues of the symmetric positive
+    definite tridiagonal matrix associated with the qd array Z to high
+    relative accuracy are computed to high relative accuracy, in the
+    absence of denormalization, underflow and overflow.
+
+    To see the relation of Z to the tridiagonal matrix, let L be a
+    unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and
+    let U be an upper bidiagonal matrix with 1's above and diagonal
+    Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
+    symmetric tridiagonal to which it is similar.
+
+    Note : DLASQ2 defines a logical variable, IEEE, which is true
+    on machines which follow ieee-754 floating-point standard in their
+    handling of infinities and NaNs, and false otherwise. This variable
+    is passed to DLASQ3.
+
+    Arguments
+    =========
+
+    N     (input) INTEGER
+          The number of rows and columns in the matrix. N >= 0.
+
+    Z     (workspace) DOUBLE PRECISION array, dimension ( 4*N )
+          On entry Z holds the qd array. On exit, entries 1 to N hold
+          the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
+          trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
+          N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 )
+          holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of
+          shifts that failed.
+
+    INFO  (output) INTEGER
+          = 0: successful exit
+          < 0: if the i-th argument is a scalar and had an illegal
+               value, then INFO = -i, if the i-th argument is an
+               array and the j-entry had an illegal value, then
+               INFO = -(i*100+j)
+          > 0: the algorithm failed
+                = 1, a split was marked by a positive value in E
+                = 2, current block of Z not diagonalized after 30*N
+                     iterations (in inner while loop)
+                = 3, termination criterion of outer while loop not met
+                     (program created more than N unreduced blocks)
+
+    Further Details
+    ===============
+    Local Variables: I0:N0 defines a current unreduced segment of Z.
+    The shifts are accumulated in SIGMA. Iteration count is in ITER.
+    Ping-pong is controlled by PP (alternates between 0 and 1).
+
+    =====================================================================
+
+
+       Test the input arguments.
+       (in case DLASQ2 is not called by DLASQ1)
+*/
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+    *info = 0;
+    eps = PRECISION;
+    safmin = SAFEMINIMUM;
+    tol = eps * 100.;
+/* Computing 2nd power */
+    d__1 = tol;
+    tol2 = d__1 * d__1;
+
+    if (*n < 0) {
+	*info = -1;
+	xerbla_("DLASQ2", &c__1);
+	return 0;
+    } else if (*n == 0) {
+	return 0;
+    } else if (*n == 1) {
+
+/*        1-by-1 case. */
+
+	if (z__[1] < 0.) {
+	    *info = -201;
+	    xerbla_("DLASQ2", &c__2);
+	}
+	return 0;
+    } else if (*n == 2) {
+
+/*        2-by-2 case. */
+
+	if (z__[2] < 0. || z__[3] < 0.) {
+	    *info = -2;
+	    xerbla_("DLASQ2", &c__2);
+	    return 0;
+	} else if (z__[3] > z__[1]) {
+	    d__ = z__[3];
+	    z__[3] = z__[1];
+	    z__[1] = d__;
+	}
+	z__[5] = z__[1] + z__[2] + z__[3];
+	if (z__[2] > z__[3] * tol2) {
+	    t = (z__[1] - z__[3] + z__[2]) * .5;
+	    s = z__[3] * (z__[2] / t);
+	    if (s <= t) {
+		s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.)));
+	    } else {
+		s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s)));
+	    }
+	    t = z__[1] + (s + z__[2]);
+	    z__[3] *= z__[1] / t;
+	    z__[1] = t;
+	}
+	z__[2] = z__[3];
+	z__[6] = z__[2] + z__[1];
+	return 0;
+    }
+
+/*     Check for negative data and compute sums of q's and e's. */
+
+    z__[*n * 2] = 0.;
+    emin = z__[2];
+    qmax = 0.;
+    zmax = 0.;
+    d__ = 0.;
+    e = 0.;
+
+    i__1 = *n - 1 << 1;
+    for (k = 1; k <= i__1; k += 2) {
+	if (z__[k] < 0.) {
+	    *info = -(k + 200);
+	    xerbla_("DLASQ2", &c__2);
+	    return 0;
+	} else if (z__[k + 1] < 0.) {
+	    *info = -(k + 201);
+	    xerbla_("DLASQ2", &c__2);
+	    return 0;
+	}
+	d__ += z__[k];
+	e += z__[k + 1];
+/* Computing MAX */
+	d__1 = qmax, d__2 = z__[k];
+	qmax = max(d__1,d__2);
+/* Computing MIN */
+	d__1 = emin, d__2 = z__[k + 1];
+	emin = min(d__1,d__2);
+/* Computing MAX */
+	d__1 = max(qmax,zmax), d__2 = z__[k + 1];
+	zmax = max(d__1,d__2);
+/* L10: */
+    }
+    if (z__[(*n << 1) - 1] < 0.) {
+	*info = -((*n << 1) + 199);
+	xerbla_("DLASQ2", &c__2);
+	return 0;
+    }
+    d__ += z__[(*n << 1) - 1];
+/* Computing MAX */
+    d__1 = qmax, d__2 = z__[(*n << 1) - 1];
+    qmax = max(d__1,d__2);
+    zmax = max(qmax,zmax);
+
+/*     Check for diagonality. */
+
+    if (e == 0.) {
+	i__1 = *n;
+	for (k = 2; k <= i__1; ++k) {
+	    z__[k] = z__[(k << 1) - 1];
+/* L20: */
+	}
+	dlasrt_("D", n, &z__[1], &iinfo);
+	z__[(*n << 1) - 1] = d__;
+	return 0;
+    }
+
+    trace = d__ + e;
+
+/*     Check for zero data. */
+
+    if (trace == 0.) {
+	z__[(*n << 1) - 1] = 0.;
+	return 0;
+    }
+
+/*     Check whether the machine is IEEE conformable. */
+
+    ieee = ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)
+	    6, (ftnlen)1) == 1 && ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2,
+	     &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1;
+
+/*     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
+
+    for (k = *n << 1; k >= 2; k += -2) {
+	z__[k * 2] = 0.;
+	z__[(k << 1) - 1] = z__[k];
+	z__[(k << 1) - 2] = 0.;
+	z__[(k << 1) - 3] = z__[k - 1];
+/* L30: */
+    }
+
+    i0 = 1;
+    n0 = *n;
+
+/*     Reverse the qd-array, if warranted. */
+
+    if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) {
+	ipn4 = i0 + n0 << 2;
+	i__1 = i0 + n0 - 1 << 1;
+	for (i4 = i0 << 2; i4 <= i__1; i4 += 4) {
+	    temp = z__[i4 - 3];
+	    z__[i4 - 3] = z__[ipn4 - i4 - 3];
+	    z__[ipn4 - i4 - 3] = temp;
+	    temp = z__[i4 - 1];
+	    z__[i4 - 1] = z__[ipn4 - i4 - 5];
+	    z__[ipn4 - i4 - 5] = temp;
+/* L40: */
+	}
+    }
+
+/*     Initial split checking via dqd and Li's test. */
+
+    pp = 0;
+
+    for (k = 1; k <= 2; ++k) {
+
+	d__ = z__[(n0 << 2) + pp - 3];
+	i__1 = (i0 << 2) + pp;
+	for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) {
+	    if (z__[i4 - 1] <= tol2 * d__) {
+		z__[i4 - 1] = -0.;
+		d__ = z__[i4 - 3];
+	    } else {
+		d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
+	    }
+/* L50: */
+	}
+
+/*        dqd maps Z to ZZ plus Li's test. */
+
+	emin = z__[(i0 << 2) + pp + 1];
+	d__ = z__[(i0 << 2) + pp - 3];
+	i__1 = (n0 - 1 << 2) + pp;
+	for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) {
+	    z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1];
+	    if (z__[i4 - 1] <= tol2 * d__) {
+		z__[i4 - 1] = -0.;
+		z__[i4 - (pp << 1) - 2] = d__;
+		z__[i4 - (pp << 1)] = 0.;
+		d__ = z__[i4 + 1];
+	    } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] &&
+		    safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) {
+		temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2];
+		z__[i4 - (pp << 1)] = z__[i4 - 1] * temp;
+		d__ *= temp;
+	    } else {
+		z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - (
+			pp << 1) - 2]);
+		d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]);
+	    }
+/* Computing MIN */
+	    d__1 = emin, d__2 = z__[i4 - (pp << 1)];
+	    emin = min(d__1,d__2);
+/* L60: */
+	}
+	z__[(n0 << 2) - pp - 2] = d__;
+
+/*        Now find qmax. */
+
+	qmax = z__[(i0 << 2) - pp - 2];
+	i__1 = (n0 << 2) - pp - 2;
+	for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) {
+/* Computing MAX */
+	    d__1 = qmax, d__2 = z__[i4];
+	    qmax = max(d__1,d__2);
+/* L70: */
+	}
+
+/*        Prepare for the next iteration on K. */
+
+	pp = 1 - pp;
+/* L80: */
+    }
+
+    iter = 2;
+    nfail = 0;
+    ndiv = n0 - i0 << 1;
+
+    i__1 = *n + 1;
+    for (iwhila = 1; iwhila <= i__1; ++iwhila) {
+	if (n0 < 1) {
+	    goto L150;
+	}
+
+/*
+          While array unfinished do
+
+          E(N0) holds the value of SIGMA when submatrix in I0:N0
+          splits from the rest of the array, but is negated.
+*/
+
+	desig = 0.;
+	if (n0 == *n) {
+	    sigma = 0.;
+	} else {
+	    sigma = -z__[(n0 << 2) - 1];
+	}
+	if (sigma < 0.) {
+	    *info = 1;
+	    return 0;
+	}
+
+/*
+          Find last unreduced submatrix's top index I0, find QMAX and
+          EMIN. Find Gershgorin-type bound if Q's much greater than E's.
+*/
+
+	emax = 0.;
+	if (n0 > i0) {
+	    emin = (d__1 = z__[(n0 << 2) - 5], abs(d__1));
+	} else {
+	    emin = 0.;
+	}
+	qmin = z__[(n0 << 2) - 3];
+	qmax = qmin;
+	for (i4 = n0 << 2; i4 >= 8; i4 += -4) {
+	    if (z__[i4 - 5] <= 0.) {
+		goto L100;
+	    }
+	    if (qmin >= emax * 4.) {
+/* Computing MIN */
+		d__1 = qmin, d__2 = z__[i4 - 3];
+		qmin = min(d__1,d__2);
+/* Computing MAX */
+		d__1 = emax, d__2 = z__[i4 - 5];
+		emax = max(d__1,d__2);
+	    }
+/* Computing MAX */
+	    d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5];
+	    qmax = max(d__1,d__2);
+/* Computing MIN */
+	    d__1 = emin, d__2 = z__[i4 - 5];
+	    emin = min(d__1,d__2);
+/* L90: */
+	}
+	i4 = 4;
+
+L100:
+	i0 = i4 / 4;
+
+/*        Store EMIN for passing to DLASQ3. */
+
+	z__[(n0 << 2) - 1] = emin;
+
+/*
+          Put -(initial shift) into DMIN.
+
+   Computing MAX
+*/
+	d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax);
+	dmin__ = -max(d__1,d__2);
+
+/*        Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */
+
+	pp = 0;
+
+	nbig = (n0 - i0 + 1) * 30;
+	i__2 = nbig;
+	for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
+	    if (i0 > n0) {
+		goto L130;
+	    }
+
+/*           While submatrix unfinished take a good dqds step. */
+
+	    dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
+		    nfail, &iter, &ndiv, &ieee);
+
+	    pp = 1 - pp;
+
+/*           When EMIN is very small check for splits. */
+
+	    if (pp == 0 && n0 - i0 >= 3) {
+		if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 *
+			 sigma) {
+		    splt = i0 - 1;
+		    qmax = z__[(i0 << 2) - 3];
+		    emin = z__[(i0 << 2) - 1];
+		    oldemn = z__[i0 * 4];
+		    i__3 = n0 - 3 << 2;
+		    for (i4 = i0 << 2; i4 <= i__3; i4 += 4) {
+			if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <=
+				tol2 * sigma) {
+			    z__[i4 - 1] = -sigma;
+			    splt = i4 / 4;
+			    qmax = 0.;
+			    emin = z__[i4 + 3];
+			    oldemn = z__[i4 + 4];
+			} else {
+/* Computing MAX */
+			    d__1 = qmax, d__2 = z__[i4 + 1];
+			    qmax = max(d__1,d__2);
+/* Computing MIN */
+			    d__1 = emin, d__2 = z__[i4 - 1];
+			    emin = min(d__1,d__2);
+/* Computing MIN */
+			    d__1 = oldemn, d__2 = z__[i4];
+			    oldemn = min(d__1,d__2);
+			}
+/* L110: */
+		    }
+		    z__[(n0 << 2) - 1] = emin;
+		    z__[n0 * 4] = oldemn;
+		    i0 = splt + 1;
+		}
+	    }
+
+/* L120: */
+	}
+
+	*info = 2;
+	return 0;
+
+/*        end IWHILB */
+
+L130:
+
+/* L140: */
+	;
+    }
+
+    *info = 3;
+    return 0;
+
+/*     end IWHILA */
+
+L150:
+
+/*     Move q's to the front. */
+
+    i__1 = *n;
+    for (k = 2; k <= i__1; ++k) {
+	z__[k] = z__[(k << 2) - 3];
+/* L160: */
+    }
+
+/*     Sort and compute sum of eigenvalues. */
+
+    dlasrt_("D", n, &z__[1], &iinfo);
+
+    e = 0.;
+    for (k = *n; k >= 1; --k) {
+	e += z__[k];
+/* L170: */
+    }
+
+/*     Store trace, sum(eigenvalues) and information on performance. */
+
+    z__[(*n << 1) + 1] = trace;
+    z__[(*n << 1) + 2] = e;
+    z__[(*n << 1) + 3] = (doublereal) iter;
+/* Computing 2nd power */
+    i__1 = *n;
+    z__[(*n << 1) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1);
+    z__[(*n << 1) + 5] = nfail * 100. / (doublereal) iter;
+    return 0;
+
+/*     End of DLASQ2 */
+
+} /* dlasq2_ */
+
+/* Subroutine */ int dlasq3_(integer *i0, integer *n0, doublereal *z__,
+	integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig,
+	 doublereal *qmax, integer *nfail, integer *iter, integer *ndiv,
+	logical *ieee)
+{
+    /* Initialized data */
+
+    static integer ttype = 0;
+    static doublereal dmin1 = 0.;
+    static doublereal dmin2 = 0.;
+    static doublereal dn = 0.;
+    static doublereal dn1 = 0.;
+    static doublereal dn2 = 0.;
+    static doublereal tau = 0.;
+
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal s, t;
+    static integer j4, nn;
+    static doublereal eps, tol;
+    static integer n0in, ipn4;
+    static doublereal tol2, temp;
+    extern /* Subroutine */ int dlasq4_(integer *, integer *, doublereal *,
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *)
+	    , dlasq5_(integer *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, logical *), dlasq6_(
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *);
+
+    static doublereal safmin;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       May 17, 2000
+
+
+    Purpose
+    =======
+
+    DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds.
+    In case of failure it changes shifts, and tries again until output
+    is positive.
+
+    Arguments
+    =========
+
+    I0     (input) INTEGER
+           First index.
+
+    N0     (input) INTEGER
+           Last index.
+
+    Z      (input) DOUBLE PRECISION array, dimension ( 4*N )
+           Z holds the qd array.
+
+    PP     (input) INTEGER
+           PP=0 for ping, PP=1 for pong.
+
+    DMIN   (output) DOUBLE PRECISION
+           Minimum value of d.
+
+    SIGMA  (output) DOUBLE PRECISION
+           Sum of shifts used in current segment.
+
+    DESIG  (input/output) DOUBLE PRECISION
+           Lower order part of SIGMA
+
+    QMAX   (input) DOUBLE PRECISION
+           Maximum value of q.
+
+    NFAIL  (output) INTEGER
+           Number of times shift was too big.
+
+    ITER   (output) INTEGER
+           Number of iterations.
+
+    NDIV   (output) INTEGER
+           Number of divisions.
+
+    TTYPE  (output) INTEGER
+           Shift type.
+
+    IEEE   (input) LOGICAL
+           Flag for IEEE or non IEEE arithmetic (passed to DLASQ5).
+
+    =====================================================================
+*/
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+
+    n0in = *n0;
+    eps = PRECISION;
+    safmin = SAFEMINIMUM;
+    tol = eps * 100.;
+/* Computing 2nd power */
+    d__1 = tol;
+    tol2 = d__1 * d__1;
+
+/*     Check for deflation. */
+
+L10:
+
+    if (*n0 < *i0) {
+	return 0;
+    }
+    if (*n0 == *i0) {
+	goto L20;
+    }
+    nn = (*n0 << 2) + *pp;
+    if (*n0 == *i0 + 1) {
+	goto L40;
+    }
+
+/*     Check whether E(N0-1) is negligible, 1 eigenvalue. */
+
+    if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) -
+	    4] > tol2 * z__[nn - 7]) {
+	goto L30;
+    }
+
+L20:
+
+    z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma;
+    --(*n0);
+    goto L10;
+
+/*     Check  whether E(N0-2) is negligible, 2 eigenvalues. */
+
+L30:
+
+    if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[
+	    nn - 11]) {
+	goto L50;
+    }
+
+L40:
+
+    if (z__[nn - 3] > z__[nn - 7]) {
+	s = z__[nn - 3];
+	z__[nn - 3] = z__[nn - 7];
+	z__[nn - 7] = s;
+    }
+    if (z__[nn - 5] > z__[nn - 3] * tol2) {
+	t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
+	s = z__[nn - 3] * (z__[nn - 5] / t);
+	if (s <= t) {
+	    s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.)));
+	} else {
+	    s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s)));
+	}
+	t = z__[nn - 7] + (s + z__[nn - 5]);
+	z__[nn - 3] *= z__[nn - 7] / t;
+	z__[nn - 7] = t;
+    }
+    z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma;
+    z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma;
+    *n0 += -2;
+    goto L10;
+
+L50:
+
+/*     Reverse the qd-array, if warranted. */
+
+    if (*dmin__ <= 0. || *n0 < n0in) {
+	if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) {
+	    ipn4 = *i0 + *n0 << 2;
+	    i__1 = *i0 + *n0 - 1 << 1;
+	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+		temp = z__[j4 - 3];
+		z__[j4 - 3] = z__[ipn4 - j4 - 3];
+		z__[ipn4 - j4 - 3] = temp;
+		temp = z__[j4 - 2];
+		z__[j4 - 2] = z__[ipn4 - j4 - 2];
+		z__[ipn4 - j4 - 2] = temp;
+		temp = z__[j4 - 1];
+		z__[j4 - 1] = z__[ipn4 - j4 - 5];
+		z__[ipn4 - j4 - 5] = temp;
+		temp = z__[j4];
+		z__[j4] = z__[ipn4 - j4 - 4];
+		z__[ipn4 - j4 - 4] = temp;
+/* L60: */
+	    }
+	    if (*n0 - *i0 <= 4) {
+		z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1];
+		z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
+	    }
+/* Computing MIN */
+	    d__1 = dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
+	    dmin2 = min(d__1,d__2);
+/* Computing MIN */
+	    d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
+		    , d__1 = min(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
+	    z__[(*n0 << 2) + *pp - 1] = min(d__1,d__2);
+/* Computing MIN */
+	    d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 =
+		     min(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4];
+	    z__[(*n0 << 2) - *pp] = min(d__1,d__2);
+/* Computing MAX */
+	    d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = max(d__1,
+		    d__2), d__2 = z__[(*i0 << 2) + *pp + 1];
+	    *qmax = max(d__1,d__2);
+	    *dmin__ = -0.;
+	}
+    }
+
+/*
+   L70:
+
+   Computing MIN
+*/
+    d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*n0 << 2) + *pp - 9], d__1 =
+	     min(d__1,d__2), d__2 = dmin2 + z__[(*n0 << 2) - *pp];
+    if (*dmin__ < 0. || safmin * *qmax < min(d__1,d__2)) {
+
+/*        Choose a shift. */
+
+	dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1,
+		 &dn2, &tau, &ttype);
+
+/*        Call dqds until DMIN > 0. */
+
+L80:
+
+	dlasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1,
+		&dn2, ieee);
+
+	*ndiv += *n0 - *i0 + 2;
+	++(*iter);
+
+/*        Check status. */
+
+	if (*dmin__ >= 0. && dmin1 > 0.) {
+
+/*           Success. */
+
+	    goto L100;
+
+	} else if (*dmin__ < 0. && dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] <
+		tol * (*sigma + dn1) && abs(dn) < tol * *sigma) {
+
+/*           Convergence hidden by negative DN. */
+
+	    z__[(*n0 - 1 << 2) - *pp + 2] = 0.;
+	    *dmin__ = 0.;
+	    goto L100;
+	} else if (*dmin__ < 0.) {
+
+/*           TAU too big. Select new TAU and try again. */
+
+	    ++(*nfail);
+	    if (ttype < -22) {
+
+/*              Failed twice. Play it safe. */
+
+		tau = 0.;
+	    } else if (dmin1 > 0.) {
+
+/*              Late failure. Gives excellent shift. */
+
+		tau = (tau + *dmin__) * (1. - eps * 2.);
+		ttype += -11;
+	    } else {
+
+/*              Early failure. Divide by 4. */
+
+		tau *= .25;
+		ttype += -12;
+	    }
+	    goto L80;
+	} else if (*dmin__ != *dmin__) {
+
+/*           NaN. */
+
+	    tau = 0.;
+	    goto L80;
+	} else {
+
+/*           Possible underflow. Play it safe. */
+
+	    goto L90;
+	}
+    }
+
+/*     Risk of underflow. */
+
+L90:
+    dlasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2);
+    *ndiv += *n0 - *i0 + 2;
+    ++(*iter);
+    tau = 0.;
+
+L100:
+    if (tau < *sigma) {
+	*desig += tau;
+	t = *sigma + *desig;
+	*desig -= t - *sigma;
+    } else {
+	t = *sigma + tau;
+	*desig = *sigma - (t - tau) + *desig;
+    }
+    *sigma = t;
+
+    return 0;
+
+/*     End of DLASQ3 */
+
+} /* dlasq3_ */
+
+/* Subroutine */ int dlasq4_(integer *i0, integer *n0, doublereal *z__,
+	integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1,
+	doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2,
+	doublereal *tau, integer *ttype)
+{
+    /* Initialized data */
+
+    static doublereal g = 0.;
+
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal s, a2, b1, b2;
+    static integer i4, nn, np;
+    static doublereal gam, gap1, gap2;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASQ4 computes an approximation TAU to the smallest eigenvalue
+    using values of d from the previous transform.
+
+    I0    (input) INTEGER
+          First index.
+
+    N0    (input) INTEGER
+          Last index.
+
+    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
+          Z holds the qd array.
+
+    PP    (input) INTEGER
+          PP=0 for ping, PP=1 for pong.
+
+    NOIN  (input) INTEGER
+          The value of N0 at start of EIGTEST.
+
+    DMIN  (input) DOUBLE PRECISION
+          Minimum value of d.
+
+    DMIN1 (input) DOUBLE PRECISION
+          Minimum value of d, excluding D( N0 ).
+
+    DMIN2 (input) DOUBLE PRECISION
+          Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+
+    DN    (input) DOUBLE PRECISION
+          d(N)
+
+    DN1   (input) DOUBLE PRECISION
+          d(N-1)
+
+    DN2   (input) DOUBLE PRECISION
+          d(N-2)
+
+    TAU   (output) DOUBLE PRECISION
+          This is the shift.
+
+    TTYPE (output) INTEGER
+          Shift type.
+
+    Further Details
+    ===============
+    CNST1 = 9/16
+
+    =====================================================================
+*/
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+
+/*
+       A negative DMIN forces the shift to take that absolute value
+       TTYPE records the type of shift.
+*/
+
+    if (*dmin__ <= 0.) {
+	*tau = -(*dmin__);
+	*ttype = -1;
+	return 0;
+    }
+
+    nn = (*n0 << 2) + *pp;
+    if (*n0in == *n0) {
+
+/*        No eigenvalues deflated. */
+
+	if (*dmin__ == *dn || *dmin__ == *dn1) {
+
+	    b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]);
+	    b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]);
+	    a2 = z__[nn - 7] + z__[nn - 5];
+
+/*           Cases 2 and 3. */
+
+	    if (*dmin__ == *dn && *dmin1 == *dn1) {
+		gap2 = *dmin2 - a2 - *dmin2 * .25;
+		if (gap2 > 0. && gap2 > b2) {
+		    gap1 = a2 - *dn - b2 / gap2 * b2;
+		} else {
+		    gap1 = a2 - *dn - (b1 + b2);
+		}
+		if (gap1 > 0. && gap1 > b1) {
+/* Computing MAX */
+		    d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
+		    s = max(d__1,d__2);
+		    *ttype = -2;
+		} else {
+		    s = 0.;
+		    if (*dn > b1) {
+			s = *dn - b1;
+		    }
+		    if (a2 > b1 + b2) {
+/* Computing MIN */
+			d__1 = s, d__2 = a2 - (b1 + b2);
+			s = min(d__1,d__2);
+		    }
+/* Computing MAX */
+		    d__1 = s, d__2 = *dmin__ * .333;
+		    s = max(d__1,d__2);
+		    *ttype = -3;
+		}
+	    } else {
+
+/*              Case 4. */
+
+		*ttype = -4;
+		s = *dmin__ * .25;
+		if (*dmin__ == *dn) {
+		    gam = *dn;
+		    a2 = 0.;
+		    if (z__[nn - 5] > z__[nn - 7]) {
+			return 0;
+		    }
+		    b2 = z__[nn - 5] / z__[nn - 7];
+		    np = nn - 9;
+		} else {
+		    np = nn - (*pp << 1);
+		    b2 = z__[np - 2];
+		    gam = *dn1;
+		    if (z__[np - 4] > z__[np - 2]) {
+			return 0;
+		    }
+		    a2 = z__[np - 4] / z__[np - 2];
+		    if (z__[nn - 9] > z__[nn - 11]) {
+			return 0;
+		    }
+		    b2 = z__[nn - 9] / z__[nn - 11];
+		    np = nn - 13;
+		}
+
+/*              Approximate contribution to norm squared from I < NN-1. */
+
+		a2 += b2;
+		i__1 = (*i0 << 2) - 1 + *pp;
+		for (i4 = np; i4 >= i__1; i4 += -4) {
+		    if (b2 == 0.) {
+			goto L20;
+		    }
+		    b1 = b2;
+		    if (z__[i4] > z__[i4 - 2]) {
+			return 0;
+		    }
+		    b2 *= z__[i4] / z__[i4 - 2];
+		    a2 += b2;
+		    if (max(b2,b1) * 100. < a2 || .563 < a2) {
+			goto L20;
+		    }
+/* L10: */
+		}
+L20:
+		a2 *= 1.05;
+
+/*              Rayleigh quotient residual bound. */
+
+		if (a2 < .563) {
+		    s = gam * (1. - sqrt(a2)) / (a2 + 1.);
+		}
+	    }
+	} else if (*dmin__ == *dn2) {
+
+/*           Case 5. */
+
+	    *ttype = -5;
+	    s = *dmin__ * .25;
+
+/*           Compute contribution to norm squared from I > NN-2. */
+
+	    np = nn - (*pp << 1);
+	    b1 = z__[np - 2];
+	    b2 = z__[np - 6];
+	    gam = *dn2;
+	    if (z__[np - 8] > b2 || z__[np - 4] > b1) {
+		return 0;
+	    }
+	    a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
+
+/*           Approximate contribution to norm squared from I < NN-2. */
+
+	    if (*n0 - *i0 > 2) {
+		b2 = z__[nn - 13] / z__[nn - 15];
+		a2 += b2;
+		i__1 = (*i0 << 2) - 1 + *pp;
+		for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
+		    if (b2 == 0.) {
+			goto L40;
+		    }
+		    b1 = b2;
+		    if (z__[i4] > z__[i4 - 2]) {
+			return 0;
+		    }
+		    b2 *= z__[i4] / z__[i4 - 2];
+		    a2 += b2;
+		    if (max(b2,b1) * 100. < a2 || .563 < a2) {
+			goto L40;
+		    }
+/* L30: */
+		}
+L40:
+		a2 *= 1.05;
+	    }
+
+	    if (a2 < .563) {
+		s = gam * (1. - sqrt(a2)) / (a2 + 1.);
+	    }
+	} else {
+
+/*           Case 6, no information to guide us. */
+
+	    if (*ttype == -6) {
+		g += (1. - g) * .333;
+	    } else if (*ttype == -18) {
+		g = .083250000000000005;
+	    } else {
+		g = .25;
+	    }
+	    s = g * *dmin__;
+	    *ttype = -6;
+	}
+
+    } else if (*n0in == *n0 + 1) {
+
+/*        One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
+
+	if (*dmin1 == *dn1 && *dmin2 == *dn2) {
+
+/*           Cases 7 and 8. */
+
+	    *ttype = -7;
+	    s = *dmin1 * .333;
+	    if (z__[nn - 5] > z__[nn - 7]) {
+		return 0;
+	    }
+	    b1 = z__[nn - 5] / z__[nn - 7];
+	    b2 = b1;
+	    if (b2 == 0.) {
+		goto L60;
+	    }
+	    i__1 = (*i0 << 2) - 1 + *pp;
+	    for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
+		a2 = b1;
+		if (z__[i4] > z__[i4 - 2]) {
+		    return 0;
+		}
+		b1 *= z__[i4] / z__[i4 - 2];
+		b2 += b1;
+		if (max(b1,a2) * 100. < b2) {
+		    goto L60;
+		}
+/* L50: */
+	    }
+L60:
+	    b2 = sqrt(b2 * 1.05);
+/* Computing 2nd power */
+	    d__1 = b2;
+	    a2 = *dmin1 / (d__1 * d__1 + 1.);
+	    gap2 = *dmin2 * .5 - a2;
+	    if (gap2 > 0. && gap2 > b2 * a2) {
+/* Computing MAX */
+		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
+		s = max(d__1,d__2);
+	    } else {
+/* Computing MAX */
+		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
+		s = max(d__1,d__2);
+		*ttype = -8;
+	    }
+	} else {
+
+/*           Case 9. */
+
+	    s = *dmin1 * .25;
+	    if (*dmin1 == *dn1) {
+		s = *dmin1 * .5;
+	    }
+	    *ttype = -9;
+	}
+
+    } else if (*n0in == *n0 + 2) {
+
+/*
+          Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN.
+
+          Cases 10 and 11.
+*/
+
+	if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
+	    *ttype = -10;
+	    s = *dmin2 * .333;
+	    if (z__[nn - 5] > z__[nn - 7]) {
+		return 0;
+	    }
+	    b1 = z__[nn - 5] / z__[nn - 7];
+	    b2 = b1;
+	    if (b2 == 0.) {
+		goto L80;
+	    }
+	    i__1 = (*i0 << 2) - 1 + *pp;
+	    for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
+		if (z__[i4] > z__[i4 - 2]) {
+		    return 0;
+		}
+		b1 *= z__[i4] / z__[i4 - 2];
+		b2 += b1;
+		if (b1 * 100. < b2) {
+		    goto L80;
+		}
+/* L70: */
+	    }
+L80:
+	    b2 = sqrt(b2 * 1.05);
+/* Computing 2nd power */
+	    d__1 = b2;
+	    a2 = *dmin2 / (d__1 * d__1 + 1.);
+	    gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[
+		    nn - 9]) - a2;
+	    if (gap2 > 0. && gap2 > b2 * a2) {
+/* Computing MAX */
+		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
+		s = max(d__1,d__2);
+	    } else {
+/* Computing MAX */
+		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
+		s = max(d__1,d__2);
+	    }
+	} else {
+	    s = *dmin2 * .25;
+	    *ttype = -11;
+	}
+    } else if (*n0in > *n0 + 2) {
+
+/*        Case 12, more than two eigenvalues deflated. No information. */
+
+	s = 0.;
+	*ttype = -12;
+    }
+
+    *tau = s;
+    return 0;
+
+/*     End of DLASQ4 */
+
+} /* dlasq4_ */
+
+/* Subroutine */ int dlasq5_(integer *i0, integer *n0, doublereal *z__,
+	integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1,
+	doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2,
+	 logical *ieee)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static doublereal d__;
+    static integer j4, j4p2;
+    static doublereal emin, temp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       May 17, 2000
+
+
+    Purpose
+    =======
+
+    DLASQ5 computes one dqds transform in ping-pong form, one
+    version for IEEE machines another for non IEEE machines.
+
+    Arguments
+    =========
+
+    I0    (input) INTEGER
+          First index.
+
+    N0    (input) INTEGER
+          Last index.
+
+    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
+          Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
+          an extra argument.
+
+    PP    (input) INTEGER
+          PP=0 for ping, PP=1 for pong.
+
+    TAU   (input) DOUBLE PRECISION
+          This is the shift.
+
+    DMIN  (output) DOUBLE PRECISION
+          Minimum value of d.
+
+    DMIN1 (output) DOUBLE PRECISION
+          Minimum value of d, excluding D( N0 ).
+
+    DMIN2 (output) DOUBLE PRECISION
+          Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+
+    DN    (output) DOUBLE PRECISION
+          d(N0), the last value of d.
+
+    DNM1  (output) DOUBLE PRECISION
+          d(N0-1).
+
+    DNM2  (output) DOUBLE PRECISION
+          d(N0-2).
+
+    IEEE  (input) LOGICAL
+          Flag for IEEE or non IEEE arithmetic.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+    if (*n0 - *i0 - 1 <= 0) {
+	return 0;
+    }
+
+    j4 = (*i0 << 2) + *pp - 3;
+    emin = z__[j4 + 4];
+    d__ = z__[j4] - *tau;
+    *dmin__ = d__;
+    *dmin1 = -z__[j4];
+
+    if (*ieee) {
+
+/*        Code for IEEE arithmetic. */
+
+	if (*pp == 0) {
+	    i__1 = *n0 - 3 << 2;
+	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+		z__[j4 - 2] = d__ + z__[j4 - 1];
+		temp = z__[j4 + 1] / z__[j4 - 2];
+		d__ = d__ * temp - *tau;
+		*dmin__ = min(*dmin__,d__);
+		z__[j4] = z__[j4 - 1] * temp;
+/* Computing MIN */
+		d__1 = z__[j4];
+		emin = min(d__1,emin);
+/* L10: */
+	    }
+	} else {
+	    i__1 = *n0 - 3 << 2;
+	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+		z__[j4 - 3] = d__ + z__[j4];
+		temp = z__[j4 + 2] / z__[j4 - 3];
+		d__ = d__ * temp - *tau;
+		*dmin__ = min(*dmin__,d__);
+		z__[j4 - 1] = z__[j4] * temp;
+/* Computing MIN */
+		d__1 = z__[j4 - 1];
+		emin = min(d__1,emin);
+/* L20: */
+	    }
+	}
+
+/*        Unroll last two steps. */
+
+	*dnm2 = d__;
+	*dmin2 = *dmin__;
+	j4 = (*n0 - 2 << 2) - *pp;
+	j4p2 = j4 + (*pp << 1) - 1;
+	z__[j4 - 2] = *dnm2 + z__[j4p2];
+	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
+	*dmin__ = min(*dmin__,*dnm1);
+
+	*dmin1 = *dmin__;
+	j4 += 4;
+	j4p2 = j4 + (*pp << 1) - 1;
+	z__[j4 - 2] = *dnm1 + z__[j4p2];
+	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
+	*dmin__ = min(*dmin__,*dn);
+
+    } else {
+
+/*        Code for non IEEE arithmetic. */
+
+	if (*pp == 0) {
+	    i__1 = *n0 - 3 << 2;
+	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+		z__[j4 - 2] = d__ + z__[j4 - 1];
+		if (d__ < 0.) {
+		    return 0;
+		} else {
+		    z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
+		    d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau;
+		}
+		*dmin__ = min(*dmin__,d__);
+/* Computing MIN */
+		d__1 = emin, d__2 = z__[j4];
+		emin = min(d__1,d__2);
+/* L30: */
+	    }
+	} else {
+	    i__1 = *n0 - 3 << 2;
+	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+		z__[j4 - 3] = d__ + z__[j4];
+		if (d__ < 0.) {
+		    return 0;
+		} else {
+		    z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
+		    d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau;
+		}
+		*dmin__ = min(*dmin__,d__);
+/* Computing MIN */
+		d__1 = emin, d__2 = z__[j4 - 1];
+		emin = min(d__1,d__2);
+/* L40: */
+	    }
+	}
+
+/*        Unroll last two steps. */
+
+	*dnm2 = d__;
+	*dmin2 = *dmin__;
+	j4 = (*n0 - 2 << 2) - *pp;
+	j4p2 = j4 + (*pp << 1) - 1;
+	z__[j4 - 2] = *dnm2 + z__[j4p2];
+	if (*dnm2 < 0.) {
+	    return 0;
+	} else {
+	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	    *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
+	}
+	*dmin__ = min(*dmin__,*dnm1);
+
+	*dmin1 = *dmin__;
+	j4 += 4;
+	j4p2 = j4 + (*pp << 1) - 1;
+	z__[j4 - 2] = *dnm1 + z__[j4p2];
+	if (*dnm1 < 0.) {
+	    return 0;
+	} else {
+	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	    *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
+	}
+	*dmin__ = min(*dmin__,*dn);
+
+    }
+
+    z__[j4 + 2] = *dn;
+    z__[(*n0 << 2) - *pp] = emin;
+    return 0;
+
+/*     End of DLASQ5 */
+
+} /* dlasq5_ */
+
+/* Subroutine */ int dlasq6_(integer *i0, integer *n0, doublereal *z__,
+	integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2,
+	 doublereal *dn, doublereal *dnm1, doublereal *dnm2)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static doublereal d__;
+    static integer j4, j4p2;
+    static doublereal emin, temp;
+
+    static doublereal safmin;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASQ6 computes one dqd (shift equal to zero) transform in
+    ping-pong form, with protection against underflow and overflow.
+
+    Arguments
+    =========
+
+    I0    (input) INTEGER
+          First index.
+
+    N0    (input) INTEGER
+          Last index.
+
+    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
+          Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
+          an extra argument.
+
+    PP    (input) INTEGER
+          PP=0 for ping, PP=1 for pong.
+
+    DMIN  (output) DOUBLE PRECISION
+          Minimum value of d.
+
+    DMIN1 (output) DOUBLE PRECISION
+          Minimum value of d, excluding D( N0 ).
+
+    DMIN2 (output) DOUBLE PRECISION
+          Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+
+    DN    (output) DOUBLE PRECISION
+          d(N0), the last value of d.
+
+    DNM1  (output) DOUBLE PRECISION
+          d(N0-1).
+
+    DNM2  (output) DOUBLE PRECISION
+          d(N0-2).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+    if (*n0 - *i0 - 1 <= 0) {
+	return 0;
+    }
+
+    safmin = SAFEMINIMUM;
+    j4 = (*i0 << 2) + *pp - 3;
+    emin = z__[j4 + 4];
+    d__ = z__[j4];
+    *dmin__ = d__;
+
+    if (*pp == 0) {
+	i__1 = *n0 - 3 << 2;
+	for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+	    z__[j4 - 2] = d__ + z__[j4 - 1];
+	    if (z__[j4 - 2] == 0.) {
+		z__[j4] = 0.;
+		d__ = z__[j4 + 1];
+		*dmin__ = d__;
+		emin = 0.;
+	    } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4
+		    - 2] < z__[j4 + 1]) {
+		temp = z__[j4 + 1] / z__[j4 - 2];
+		z__[j4] = z__[j4 - 1] * temp;
+		d__ *= temp;
+	    } else {
+		z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
+		d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]);
+	    }
+	    *dmin__ = min(*dmin__,d__);
+/* Computing MIN */
+	    d__1 = emin, d__2 = z__[j4];
+	    emin = min(d__1,d__2);
+/* L10: */
+	}
+    } else {
+	i__1 = *n0 - 3 << 2;
+	for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
+	    z__[j4 - 3] = d__ + z__[j4];
+	    if (z__[j4 - 3] == 0.) {
+		z__[j4 - 1] = 0.;
+		d__ = z__[j4 + 2];
+		*dmin__ = d__;
+		emin = 0.;
+	    } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4
+		    - 3] < z__[j4 + 2]) {
+		temp = z__[j4 + 2] / z__[j4 - 3];
+		z__[j4 - 1] = z__[j4] * temp;
+		d__ *= temp;
+	    } else {
+		z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
+		d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]);
+	    }
+	    *dmin__ = min(*dmin__,d__);
+/* Computing MIN */
+	    d__1 = emin, d__2 = z__[j4 - 1];
+	    emin = min(d__1,d__2);
+/* L20: */
+	}
+    }
+
+/*     Unroll last two steps. */
+
+    *dnm2 = d__;
+    *dmin2 = *dmin__;
+    j4 = (*n0 - 2 << 2) - *pp;
+    j4p2 = j4 + (*pp << 1) - 1;
+    z__[j4 - 2] = *dnm2 + z__[j4p2];
+    if (z__[j4 - 2] == 0.) {
+	z__[j4] = 0.;
+	*dnm1 = z__[j4p2 + 2];
+	*dmin__ = *dnm1;
+	emin = 0.;
+    } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
+	    z__[j4p2 + 2]) {
+	temp = z__[j4p2 + 2] / z__[j4 - 2];
+	z__[j4] = z__[j4p2] * temp;
+	*dnm1 = *dnm2 * temp;
+    } else {
+	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]);
+    }
+    *dmin__ = min(*dmin__,*dnm1);
+
+    *dmin1 = *dmin__;
+    j4 += 4;
+    j4p2 = j4 + (*pp << 1) - 1;
+    z__[j4 - 2] = *dnm1 + z__[j4p2];
+    if (z__[j4 - 2] == 0.) {
+	z__[j4] = 0.;
+	*dn = z__[j4p2 + 2];
+	*dmin__ = *dn;
+	emin = 0.;
+    } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
+	    z__[j4p2 + 2]) {
+	temp = z__[j4p2 + 2] / z__[j4 - 2];
+	z__[j4] = z__[j4p2] * temp;
+	*dn = *dnm1 * temp;
+    } else {
+	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]);
+    }
+    *dmin__ = min(*dmin__,*dn);
+
+    z__[j4 + 2] = *dn;
+    z__[(*n0 << 2) - *pp] = emin;
+    return 0;
+
+/*     End of DLASQ6 */
+
+} /* dlasq6_ */
+
+/* Subroutine */ int dlasr_(char *side, char *pivot, char *direct, integer *m,
+	 integer *n, doublereal *c__, doublereal *s, doublereal *a, integer *
+	lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, info;
+    static doublereal temp;
+    extern logical lsame_(char *, char *);
+    static doublereal ctemp, stemp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLASR   performs the transformation
+
+       A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+
+       A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+
+    where A is an m by n real matrix and P is an orthogonal matrix,
+    consisting of a sequence of plane rotations determined by the
+    parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
+    and z = n when SIDE = 'R' or 'r' ):
+
+    When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+
+       P = P( z - 1 )*...*P( 2 )*P( 1 ),
+
+    and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+
+       P = P( 1 )*P( 2 )*...*P( z - 1 ),
+
+    where  P( k ) is a plane rotation matrix for the following planes:
+
+       when  PIVOT = 'V' or 'v'  ( Variable pivot ),
+          the plane ( k, k + 1 )
+
+       when  PIVOT = 'T' or 't'  ( Top pivot ),
+          the plane ( 1, k + 1 )
+
+       when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
+          the plane ( k, z )
+
+    c( k ) and s( k )  must contain the  cosine and sine that define the
+    matrix  P( k ).  The two by two plane rotation part of the matrix
+    P( k ), R( k ), is assumed to be of the form
+
+       R( k ) = (  c( k )  s( k ) ).
+                ( -s( k )  c( k ) )
+
+    This version vectorises across rows of the array A when SIDE = 'L'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            Specifies whether the plane rotation matrix P is applied to
+            A on the left or the right.
+            = 'L':  Left, compute A := P*A
+            = 'R':  Right, compute A:= A*P'
+
+    DIRECT  (input) CHARACTER*1
+            Specifies whether P is a forward or backward sequence of
+            plane rotations.
+            = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
+            = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+
+    PIVOT   (input) CHARACTER*1
+            Specifies the plane for which P(k) is a plane rotation
+            matrix.
+            = 'V':  Variable pivot, the plane (k,k+1)
+            = 'T':  Top pivot, the plane (1,k+1)
+            = 'B':  Bottom pivot, the plane (k,z)
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  If m <= 1, an immediate
+            return is effected.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  If n <= 1, an
+            immediate return is effected.
+
+    C, S    (input) DOUBLE PRECISION arrays, dimension
+                    (M-1) if SIDE = 'L'
+                    (N-1) if SIDE = 'R'
+            c(k) and s(k) contain the cosine and sine that define the
+            matrix P(k).  The two by two plane rotation part of the
+            matrix P(k), R(k), is assumed to be of the form
+            R( k ) = (  c( k )  s( k ) ).
+                     ( -s( k )  c( k ) )
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            The m by n matrix A.  On exit, A is overwritten by P*A if
+            SIDE = 'R' or by A*P' if SIDE = 'L'.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --c__;
+    --s;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (! (lsame_(side, "L") || lsame_(side, "R"))) {
+	info = 1;
+    } else if (! (lsame_(pivot, "V") || lsame_(pivot,
+	    "T") || lsame_(pivot, "B"))) {
+	info = 2;
+    } else if (! (lsame_(direct, "F") || lsame_(direct,
+	    "B"))) {
+	info = 3;
+    } else if (*m < 0) {
+	info = 4;
+    } else if (*n < 0) {
+	info = 5;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("DLASR ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+    if (lsame_(side, "L")) {
+
+/*        Form  P * A */
+
+	if (lsame_(pivot, "V")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1. || stemp != 0.) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[j + 1 + i__ * a_dim1];
+			    a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp *
+				    a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j
+				    + i__ * a_dim1];
+/* L10: */
+			}
+		    }
+/* L20: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1. || stemp != 0.) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[j + 1 + i__ * a_dim1];
+			    a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp *
+				    a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j
+				    + i__ * a_dim1];
+/* L30: */
+			}
+		    }
+/* L40: */
+		}
+	    }
+	} else if (lsame_(pivot, "T")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m;
+		for (j = 2; j <= i__1; ++j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if (ctemp != 1. || stemp != 0.) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = ctemp * temp - stemp * a[
+				    i__ * a_dim1 + 1];
+			    a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[
+				    i__ * a_dim1 + 1];
+/* L50: */
+			}
+		    }
+/* L60: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m; j >= 2; --j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if (ctemp != 1. || stemp != 0.) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = ctemp * temp - stemp * a[
+				    i__ * a_dim1 + 1];
+			    a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[
+				    i__ * a_dim1 + 1];
+/* L70: */
+			}
+		    }
+/* L80: */
+		}
+	    }
+	} else if (lsame_(pivot, "B")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1. || stemp != 0.) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1]
+				     + ctemp * temp;
+			    a[*m + i__ * a_dim1] = ctemp * a[*m + i__ *
+				    a_dim1] - stemp * temp;
+/* L90: */
+			}
+		    }
+/* L100: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1. || stemp != 0.) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1]
+				     + ctemp * temp;
+			    a[*m + i__ * a_dim1] = ctemp * a[*m + i__ *
+				    a_dim1] - stemp * temp;
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+	    }
+	}
+    } else if (lsame_(side, "R")) {
+
+/*        Form A * P' */
+
+	if (lsame_(pivot, "V")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1. || stemp != 0.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[i__ + (j + 1) * a_dim1];
+			    a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp *
+				     a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = stemp * temp + ctemp * a[
+				    i__ + j * a_dim1];
+/* L130: */
+			}
+		    }
+/* L140: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1. || stemp != 0.) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[i__ + (j + 1) * a_dim1];
+			    a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp *
+				     a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = stemp * temp + ctemp * a[
+				    i__ + j * a_dim1];
+/* L150: */
+			}
+		    }
+/* L160: */
+		}
+	    }
+	} else if (lsame_(pivot, "T")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n;
+		for (j = 2; j <= i__1; ++j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if (ctemp != 1. || stemp != 0.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = ctemp * temp - stemp * a[
+				    i__ + a_dim1];
+			    a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ +
+				    a_dim1];
+/* L170: */
+			}
+		    }
+/* L180: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n; j >= 2; --j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if (ctemp != 1. || stemp != 0.) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = ctemp * temp - stemp * a[
+				    i__ + a_dim1];
+			    a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ +
+				    a_dim1];
+/* L190: */
+			}
+		    }
+/* L200: */
+		}
+	    }
+	} else if (lsame_(pivot, "B")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1. || stemp != 0.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1]
+				     + ctemp * temp;
+			    a[i__ + *n * a_dim1] = ctemp * a[i__ + *n *
+				    a_dim1] - stemp * temp;
+/* L210: */
+			}
+		    }
+/* L220: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if (ctemp != 1. || stemp != 0.) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1]
+				     + ctemp * temp;
+			    a[i__ + *n * a_dim1] = ctemp * a[i__ + *n *
+				    a_dim1] - stemp * temp;
+/* L230: */
+			}
+		    }
+/* L240: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DLASR */
+
+} /* dlasr_ */
+
+/* Subroutine */ int dlasrt_(char *id, integer *n, doublereal *d__, integer *
+	info)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal d1, d2, d3;
+    static integer dir;
+    static doublereal tmp;
+    static integer endd;
+    extern logical lsame_(char *, char *);
+    static integer stack[64]	/* was [2][32] */;
+    static doublereal dmnmx;
+    static integer start;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static integer stkpnt;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    Sort the numbers in D in increasing order (if ID = 'I') or
+    in decreasing order (if ID = 'D' ).
+
+    Use Quick Sort, reverting to Insertion sort on arrays of
+    size <= 20. Dimension of STACK limits N to about 2**32.
+
+    Arguments
+    =========
+
+    ID      (input) CHARACTER*1
+            = 'I': sort D in increasing order;
+            = 'D': sort D in decreasing order.
+
+    N       (input) INTEGER
+            The length of the array D.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the array to be sorted.
+            On exit, D has been sorted into increasing order
+            (D(1) <= ... <= D(N) ) or into decreasing order
+            (D(1) >= ... >= D(N) ), depending on ID.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input paramters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    dir = -1;
+    if (lsame_(id, "D")) {
+	dir = 0;
+    } else if (lsame_(id, "I")) {
+	dir = 1;
+    }
+    if (dir == -1) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASRT", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 1) {
+	return 0;
+    }
+
+    stkpnt = 1;
+    stack[0] = 1;
+    stack[1] = *n;
+L10:
+    start = stack[(stkpnt << 1) - 2];
+    endd = stack[(stkpnt << 1) - 1];
+    --stkpnt;
+    if (endd - start <= 20 && endd - start > 0) {
+
+/*        Do Insertion sort on D( START:ENDD ) */
+
+	if (dir == 0) {
+
+/*           Sort into decreasing order */
+
+	    i__1 = endd;
+	    for (i__ = start + 1; i__ <= i__1; ++i__) {
+		i__2 = start + 1;
+		for (j = i__; j >= i__2; --j) {
+		    if (d__[j] > d__[j - 1]) {
+			dmnmx = d__[j];
+			d__[j] = d__[j - 1];
+			d__[j - 1] = dmnmx;
+		    } else {
+			goto L30;
+		    }
+/* L20: */
+		}
+L30:
+		;
+	    }
+
+	} else {
+
+/*           Sort into increasing order */
+
+	    i__1 = endd;
+	    for (i__ = start + 1; i__ <= i__1; ++i__) {
+		i__2 = start + 1;
+		for (j = i__; j >= i__2; --j) {
+		    if (d__[j] < d__[j - 1]) {
+			dmnmx = d__[j];
+			d__[j] = d__[j - 1];
+			d__[j - 1] = dmnmx;
+		    } else {
+			goto L50;
+		    }
+/* L40: */
+		}
+L50:
+		;
+	    }
+
+	}
+
+    } else if (endd - start > 20) {
+
+/*
+          Partition D( START:ENDD ) and stack parts, largest one first
+
+          Choose partition entry as median of 3
+*/
+
+	d1 = d__[start];
+	d2 = d__[endd];
+	i__ = (start + endd) / 2;
+	d3 = d__[i__];
+	if (d1 < d2) {
+	    if (d3 < d1) {
+		dmnmx = d1;
+	    } else if (d3 < d2) {
+		dmnmx = d3;
+	    } else {
+		dmnmx = d2;
+	    }
+	} else {
+	    if (d3 < d2) {
+		dmnmx = d2;
+	    } else if (d3 < d1) {
+		dmnmx = d3;
+	    } else {
+		dmnmx = d1;
+	    }
+	}
+
+	if (dir == 0) {
+
+/*           Sort into decreasing order */
+
+	    i__ = start - 1;
+	    j = endd + 1;
+L60:
+L70:
+	    --j;
+	    if (d__[j] < dmnmx) {
+		goto L70;
+	    }
+L80:
+	    ++i__;
+	    if (d__[i__] > dmnmx) {
+		goto L80;
+	    }
+	    if (i__ < j) {
+		tmp = d__[i__];
+		d__[i__] = d__[j];
+		d__[j] = tmp;
+		goto L60;
+	    }
+	    if (j - start > endd - j - 1) {
+		++stkpnt;
+		stack[(stkpnt << 1) - 2] = start;
+		stack[(stkpnt << 1) - 1] = j;
+		++stkpnt;
+		stack[(stkpnt << 1) - 2] = j + 1;
+		stack[(stkpnt << 1) - 1] = endd;
+	    } else {
+		++stkpnt;
+		stack[(stkpnt << 1) - 2] = j + 1;
+		stack[(stkpnt << 1) - 1] = endd;
+		++stkpnt;
+		stack[(stkpnt << 1) - 2] = start;
+		stack[(stkpnt << 1) - 1] = j;
+	    }
+	} else {
+
+/*           Sort into increasing order */
+
+	    i__ = start - 1;
+	    j = endd + 1;
+L90:
+L100:
+	    --j;
+	    if (d__[j] > dmnmx) {
+		goto L100;
+	    }
+L110:
+	    ++i__;
+	    if (d__[i__] < dmnmx) {
+		goto L110;
+	    }
+	    if (i__ < j) {
+		tmp = d__[i__];
+		d__[i__] = d__[j];
+		d__[j] = tmp;
+		goto L90;
+	    }
+	    if (j - start > endd - j - 1) {
+		++stkpnt;
+		stack[(stkpnt << 1) - 2] = start;
+		stack[(stkpnt << 1) - 1] = j;
+		++stkpnt;
+		stack[(stkpnt << 1) - 2] = j + 1;
+		stack[(stkpnt << 1) - 1] = endd;
+	    } else {
+		++stkpnt;
+		stack[(stkpnt << 1) - 2] = j + 1;
+		stack[(stkpnt << 1) - 1] = endd;
+		++stkpnt;
+		stack[(stkpnt << 1) - 2] = start;
+		stack[(stkpnt << 1) - 1] = j;
+	    }
+	}
+    }
+    if (stkpnt > 0) {
+	goto L10;
+    }
+    return 0;
+
+/*     End of DLASRT */
+
+} /* dlasrt_ */
+
+/* Subroutine */ int dlassq_(integer *n, doublereal *x, integer *incx,
+	doublereal *scale, doublereal *sumsq)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer ix;
+    static doublereal absxi;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASSQ  returns the values  scl  and  smsq  such that
+
+       ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
+
+    where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is
+    assumed to be non-negative and  scl  returns the value
+
+       scl = max( scale, abs( x( i ) ) ).
+
+    scale and sumsq must be supplied in SCALE and SUMSQ and
+    scl and smsq are overwritten on SCALE and SUMSQ respectively.
+
+    The routine makes only one pass through the vector x.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of elements to be used from the vector X.
+
+    X       (input) DOUBLE PRECISION array, dimension (N)
+            The vector for which a scaled sum of squares is computed.
+               x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
+
+    INCX    (input) INTEGER
+            The increment between successive values of the vector X.
+            INCX > 0.
+
+    SCALE   (input/output) DOUBLE PRECISION
+            On entry, the value  scale  in the equation above.
+            On exit, SCALE is overwritten with  scl , the scaling factor
+            for the sum of squares.
+
+    SUMSQ   (input/output) DOUBLE PRECISION
+            On entry, the value  sumsq  in the equation above.
+            On exit, SUMSQ is overwritten with  smsq , the basic sum of
+            squares from which  scl  has been factored out.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n > 0) {
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    if (x[ix] != 0.) {
+		absxi = (d__1 = x[ix], abs(d__1));
+		if (*scale < absxi) {
+/* Computing 2nd power */
+		    d__1 = *scale / absxi;
+		    *sumsq = *sumsq * (d__1 * d__1) + 1;
+		    *scale = absxi;
+		} else {
+/* Computing 2nd power */
+		    d__1 = absxi / *scale;
+		    *sumsq += d__1 * d__1;
+		}
+	    }
+/* L10: */
+	}
+    }
+    return 0;
+
+/*     End of DLASSQ */
+
+} /* dlassq_ */
+
+/* Subroutine */ int dlasv2_(doublereal *f, doublereal *g, doublereal *h__,
+	doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal *
+	csr, doublereal *snl, doublereal *csl)
+{
+    /* System generated locals */
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static doublereal a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt,
+	     clt, crt, slt, srt;
+    static integer pmax;
+    static doublereal temp;
+    static logical swap;
+    static doublereal tsign;
+
+    static logical gasmal;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLASV2 computes the singular value decomposition of a 2-by-2
+    triangular matrix
+       [  F   G  ]
+       [  0   H  ].
+    On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the
+    smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and
+    right singular vectors for abs(SSMAX), giving the decomposition
+
+       [ CSL  SNL ] [  F   G  ] [ CSR -SNR ]  =  [ SSMAX   0   ]
+       [-SNL  CSL ] [  0   H  ] [ SNR  CSR ]     [  0    SSMIN ].
+
+    Arguments
+    =========
+
+    F       (input) DOUBLE PRECISION
+            The (1,1) element of the 2-by-2 matrix.
+
+    G       (input) DOUBLE PRECISION
+            The (1,2) element of the 2-by-2 matrix.
+
+    H       (input) DOUBLE PRECISION
+            The (2,2) element of the 2-by-2 matrix.
+
+    SSMIN   (output) DOUBLE PRECISION
+            abs(SSMIN) is the smaller singular value.
+
+    SSMAX   (output) DOUBLE PRECISION
+            abs(SSMAX) is the larger singular value.
+
+    SNL     (output) DOUBLE PRECISION
+    CSL     (output) DOUBLE PRECISION
+            The vector (CSL, SNL) is a unit left singular vector for the
+            singular value abs(SSMAX).
+
+    SNR     (output) DOUBLE PRECISION
+    CSR     (output) DOUBLE PRECISION
+            The vector (CSR, SNR) is a unit right singular vector for the
+            singular value abs(SSMAX).
+
+    Further Details
+    ===============
+
+    Any input parameter may be aliased with any output parameter.
+
+    Barring over/underflow and assuming a guard digit in subtraction, all
+    output quantities are correct to within a few units in the last
+    place (ulps).
+
+    In IEEE arithmetic, the code works correctly if one matrix element is
+    infinite.
+
+    Overflow will not occur unless the largest singular value itself
+    overflows or is within a few ulps of overflow. (On machines with
+    partial overflow, like the Cray, overflow may occur if the largest
+    singular value is within a factor of 2 of overflow.)
+
+    Underflow is harmless if underflow is gradual. Otherwise, results
+    may correspond to a matrix modified by perturbations of size near
+    the underflow threshold.
+
+   =====================================================================
+*/
+
+
+    ft = *f;
+    fa = abs(ft);
+    ht = *h__;
+    ha = abs(*h__);
+
+/*
+       PMAX points to the maximum absolute element of matrix
+         PMAX = 1 if F largest in absolute values
+         PMAX = 2 if G largest in absolute values
+         PMAX = 3 if H largest in absolute values
+*/
+
+    pmax = 1;
+    swap = ha > fa;
+    if (swap) {
+	pmax = 3;
+	temp = ft;
+	ft = ht;
+	ht = temp;
+	temp = fa;
+	fa = ha;
+	ha = temp;
+
+/*        Now FA .ge. HA */
+
+    }
+    gt = *g;
+    ga = abs(gt);
+    if (ga == 0.) {
+
+/*        Diagonal matrix */
+
+	*ssmin = ha;
+	*ssmax = fa;
+	clt = 1.;
+	crt = 1.;
+	slt = 0.;
+	srt = 0.;
+    } else {
+	gasmal = TRUE_;
+	if (ga > fa) {
+	    pmax = 2;
+	    if (fa / ga < EPSILON) {
+
+/*              Case of very large GA */
+
+		gasmal = FALSE_;
+		*ssmax = ga;
+		if (ha > 1.) {
+		    *ssmin = fa / (ga / ha);
+		} else {
+		    *ssmin = fa / ga * ha;
+		}
+		clt = 1.;
+		slt = ht / gt;
+		srt = 1.;
+		crt = ft / gt;
+	    }
+	}
+	if (gasmal) {
+
+/*           Normal case */
+
+	    d__ = fa - ha;
+	    if (d__ == fa) {
+
+/*              Copes with infinite F or H */
+
+		l = 1.;
+	    } else {
+		l = d__ / fa;
+	    }
+
+/*           Note that 0 .le. L .le. 1 */
+
+	    m = gt / ft;
+
+/*           Note that abs(M) .le. 1/macheps */
+
+	    t = 2. - l;
+
+/*           Note that T .ge. 1 */
+
+	    mm = m * m;
+	    tt = t * t;
+	    s = sqrt(tt + mm);
+
+/*           Note that 1 .le. S .le. 1 + 1/macheps */
+
+	    if (l == 0.) {
+		r__ = abs(m);
+	    } else {
+		r__ = sqrt(l * l + mm);
+	    }
+
+/*           Note that 0 .le. R .le. 1 + 1/macheps */
+
+	    a = (s + r__) * .5;
+
+/*           Note that 1 .le. A .le. 1 + abs(M) */
+
+	    *ssmin = ha / a;
+	    *ssmax = fa * a;
+	    if (mm == 0.) {
+
+/*              Note that M is very tiny */
+
+		if (l == 0.) {
+		    t = d_sign(&c_b5242, &ft) * d_sign(&c_b2453, &gt);
+		} else {
+		    t = gt / d_sign(&d__, &ft) + m / t;
+		}
+	    } else {
+		t = (m / (s + t) + m / (r__ + l)) * (a + 1.);
+	    }
+	    l = sqrt(t * t + 4.);
+	    crt = 2. / l;
+	    srt = t / l;
+	    clt = (crt + srt * m) / a;
+	    slt = ht / ft * srt / a;
+	}
+    }
+    if (swap) {
+	*csl = srt;
+	*snl = crt;
+	*csr = slt;
+	*snr = clt;
+    } else {
+	*csl = clt;
+	*snl = slt;
+	*csr = crt;
+	*snr = srt;
+    }
+
+/*     Correct signs of SSMAX and SSMIN */
+
+    if (pmax == 1) {
+	tsign = d_sign(&c_b2453, csr) * d_sign(&c_b2453, csl) * d_sign(&
+		c_b2453, f);
+    }
+    if (pmax == 2) {
+	tsign = d_sign(&c_b2453, snr) * d_sign(&c_b2453, csl) * d_sign(&
+		c_b2453, g);
+    }
+    if (pmax == 3) {
+	tsign = d_sign(&c_b2453, snr) * d_sign(&c_b2453, snl) * d_sign(&
+		c_b2453, h__);
+    }
+    *ssmax = d_sign(ssmax, &tsign);
+    d__1 = tsign * d_sign(&c_b2453, f) * d_sign(&c_b2453, h__);
+    *ssmin = d_sign(ssmin, &d__1);
+    return 0;
+
+/*     End of DLASV2 */
+
+} /* dlasv2_ */
+
+/* Subroutine */ int dlaswp_(integer *n, doublereal *a, integer *lda, integer
+	*k1, integer *k2, integer *ipiv, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc;
+    static doublereal temp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASWP performs a series of row interchanges on the matrix A.
+    One row interchange is initiated for each of rows K1 through K2 of A.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the matrix of column dimension N to which the row
+            interchanges will be applied.
+            On exit, the permuted matrix.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+
+    K1      (input) INTEGER
+            The first element of IPIV for which a row interchange will
+            be done.
+
+    K2      (input) INTEGER
+            The last element of IPIV for which a row interchange will
+            be done.
+
+    IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+            The vector of pivot indices.  Only the elements in positions
+            K1 through K2 of IPIV are accessed.
+            IPIV(K) = L implies rows K and L are to be interchanged.
+
+    INCX    (input) INTEGER
+            The increment between successive values of IPIV.  If IPIV
+            is negative, the pivots are applied in reverse order.
+
+    Further Details
+    ===============
+
+    Modified by
+     R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+
+   =====================================================================
+
+
+       Interchange row I with row IPIV(I) for each of rows K1 through K2.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    if (*incx > 0) {
+	ix0 = *k1;
+	i1 = *k1;
+	i2 = *k2;
+	inc = 1;
+    } else if (*incx < 0) {
+	ix0 = (1 - *k2) * *incx + 1;
+	i1 = *k2;
+	i2 = *k1;
+	inc = -1;
+    } else {
+	return 0;
+    }
+
+    n32 = *n / 32 << 5;
+    if (n32 != 0) {
+	i__1 = n32;
+	for (j = 1; j <= i__1; j += 32) {
+	    ix = ix0;
+	    i__2 = i2;
+	    i__3 = inc;
+	    for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3)
+		    {
+		ip = ipiv[ix];
+		if (ip != i__) {
+		    i__4 = j + 31;
+		    for (k = j; k <= i__4; ++k) {
+			temp = a[i__ + k * a_dim1];
+			a[i__ + k * a_dim1] = a[ip + k * a_dim1];
+			a[ip + k * a_dim1] = temp;
+/* L10: */
+		    }
+		}
+		ix += *incx;
+/* L20: */
+	    }
+/* L30: */
+	}
+    }
+    if (n32 != *n) {
+	++n32;
+	ix = ix0;
+	i__1 = i2;
+	i__3 = inc;
+	for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) {
+	    ip = ipiv[ix];
+	    if (ip != i__) {
+		i__2 = *n;
+		for (k = n32; k <= i__2; ++k) {
+		    temp = a[i__ + k * a_dim1];
+		    a[i__ + k * a_dim1] = a[ip + k * a_dim1];
+		    a[ip + k * a_dim1] = temp;
+/* L40: */
+		}
+	    }
+	    ix += *incx;
+/* L50: */
+	}
+    }
+
+    return 0;
+
+/*     End of DLASWP */
+
+} /* dlaswp_ */
+
+/* Subroutine */ int dlatrd_(char *uplo, integer *n, integer *nb, doublereal *
+	a, integer *lda, doublereal *e, doublereal *tau, doublereal *w,
+	integer *ldw)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, iw;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static doublereal alpha;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), daxpy_(integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *),
+	    dsymv_(char *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), dlarfg_(integer *, doublereal *, doublereal *, integer *,
+	     doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLATRD reduces NB rows and columns of a real symmetric matrix A to
+    symmetric tridiagonal form by an orthogonal similarity
+    transformation Q' * A * Q, and returns the matrices V and W which are
+    needed to apply the transformation to the unreduced part of A.
+
+    If UPLO = 'U', DLATRD reduces the last NB rows and columns of a
+    matrix, of which the upper triangle is supplied;
+    if UPLO = 'L', DLATRD reduces the first NB rows and columns of a
+    matrix, of which the lower triangle is supplied.
+
+    This is an auxiliary routine called by DSYTRD.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is stored:
+            = 'U': Upper triangular
+            = 'L': Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.
+
+    NB      (input) INTEGER
+            The number of rows and columns to be reduced.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            n-by-n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n-by-n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit:
+            if UPLO = 'U', the last NB columns have been reduced to
+              tridiagonal form, with the diagonal elements overwriting
+              the diagonal elements of A; the elements above the diagonal
+              with the array TAU, represent the orthogonal matrix Q as a
+              product of elementary reflectors;
+            if UPLO = 'L', the first NB columns have been reduced to
+              tridiagonal form, with the diagonal elements overwriting
+              the diagonal elements of A; the elements below the diagonal
+              with the array TAU, represent the  orthogonal matrix Q as a
+              product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= (1,N).
+
+    E       (output) DOUBLE PRECISION array, dimension (N-1)
+            If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
+            elements of the last NB columns of the reduced matrix;
+            if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
+            the first NB columns of the reduced matrix.
+
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+            The scalar factors of the elementary reflectors, stored in
+            TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
+            See Further Details.
+
+    W       (output) DOUBLE PRECISION array, dimension (LDW,NB)
+            The n-by-nb matrix W required to update the unreduced part
+            of A.
+
+    LDW     (input) INTEGER
+            The leading dimension of the array W. LDW >= max(1,N).
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n) H(n-1) . . . H(n-nb+1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
+    and tau in TAU(i-1).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(nb).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+    and tau in TAU(i).
+
+    The elements of the vectors v together form the n-by-nb matrix V
+    which is needed, with W, to apply the transformation to the unreduced
+    part of the matrix, using a symmetric rank-2k update of the form:
+    A := A - V*W' - W*V'.
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5 and nb = 2:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  a   a   a   v4  v5 )              (  d                  )
+      (      a   a   v4  v5 )              (  1   d              )
+      (          a   1   v5 )              (  v1  1   a          )
+      (              d   1  )              (  v1  v2  a   a      )
+      (                  d  )              (  v1  v2  a   a   a  )
+
+    where d denotes a diagonal element of the reduced matrix, a denotes
+    an element of the original matrix that is unchanged, and vi denotes
+    an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --e;
+    --tau;
+    w_dim1 = *ldw;
+    w_offset = 1 + w_dim1;
+    w -= w_offset;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+
+    if (lsame_(uplo, "U")) {
+
+/*        Reduce last NB columns of upper triangle */
+
+	i__1 = *n - *nb + 1;
+	for (i__ = *n; i__ >= i__1; --i__) {
+	    iw = i__ - *n + *nb;
+	    if (i__ < *n) {
+
+/*              Update A(1:i,i) */
+
+		i__2 = *n - i__;
+		dgemv_("No transpose", &i__, &i__2, &c_b2589, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
+			c_b2453, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		dgemv_("No transpose", &i__, &i__2, &c_b2589, &w[(iw + 1) *
+			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b2453, &a[i__ * a_dim1 + 1], &c__1);
+	    }
+	    if (i__ > 1) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(1:i-2,i)
+*/
+
+		i__2 = i__ - 1;
+		dlarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 +
+			1], &c__1, &tau[i__ - 1]);
+		e[i__ - 1] = a[i__ - 1 + i__ * a_dim1];
+		a[i__ - 1 + i__ * a_dim1] = 1.;
+
+/*              Compute W(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		dsymv_("Upper", &i__2, &c_b2453, &a[a_offset], lda, &a[i__ *
+			a_dim1 + 1], &c__1, &c_b2467, &w[iw * w_dim1 + 1], &
+			c__1);
+		if (i__ < *n) {
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    dgemv_("Transpose", &i__2, &i__3, &c_b2453, &w[(iw + 1) *
+			    w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, &
+			    c_b2467, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[(i__ +
+			    1) * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1],
+			    &c__1, &c_b2453, &w[iw * w_dim1 + 1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[(i__ + 1) *
+			     a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, &
+			    c_b2467, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    dgemv_("No transpose", &i__2, &i__3, &c_b2589, &w[(iw + 1)
+			     * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
+			    c__1, &c_b2453, &w[iw * w_dim1 + 1], &c__1);
+		}
+		i__2 = i__ - 1;
+		dscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		alpha = tau[i__ - 1] * -.5 * ddot_(&i__2, &w[iw * w_dim1 + 1],
+			 &c__1, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		daxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
+			w_dim1 + 1], &c__1);
+	    }
+
+/* L10: */
+	}
+    } else {
+
+/*        Reduce first NB columns of lower triangle */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i:n,i) */
+
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[i__ + a_dim1],
+		    lda, &w[i__ + w_dim1], ldw, &c_b2453, &a[i__ + i__ *
+		    a_dim1], &c__1);
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b2589, &w[i__ + w_dim1],
+		    ldw, &a[i__ + a_dim1], lda, &c_b2453, &a[i__ + i__ *
+		    a_dim1], &c__1);
+	    if (i__ < *n) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:n,i)
+*/
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) +
+			i__ * a_dim1], &c__1, &tau[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.;
+
+/*              Compute W(i+1:n,i) */
+
+		i__2 = *n - i__;
+		dsymv_("Lower", &i__2, &c_b2453, &a[i__ + 1 + (i__ + 1) *
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b2467, &w[i__ + 1 + i__ * w_dim1], &c__1)
+			;
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2453, &w[i__ + 1 +
+			w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b2467, &w[i__ * w_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[i__ + 1 +
+			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b2453, &
+			w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[i__ + 1 +
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b2467, &w[i__ * w_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2589, &w[i__ + 1 +
+			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b2453, &
+			w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		dscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		alpha = tau[i__] * -.5 * ddot_(&i__2, &w[i__ + 1 + i__ *
+			w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
+		i__2 = *n - i__;
+		daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
+	    }
+
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of DLATRD */
+
+} /* dlatrd_ */
+
+/* Subroutine */ int dlauu2_(char *uplo, integer *n, doublereal *a, integer *
+	lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__;
+    static doublereal aii;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLAUU2 computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
+
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
+
+    This is the unblocked form of the algorithm, calling Level 2 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAUU2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute the product U * U'. */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    aii = a[i__ + i__ * a_dim1];
+	    if (i__ < *n) {
+		i__2 = *n - i__ + 1;
+		a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1],
+			lda, &a[i__ + i__ * a_dim1], lda);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2453, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			aii, &a[i__ * a_dim1 + 1], &c__1);
+	    } else {
+		dscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
+	    }
+/* L10: */
+	}
+
+    } else {
+
+/*        Compute the product L' * L. */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    aii = a[i__ + i__ * a_dim1];
+	    if (i__ < *n) {
+		i__2 = *n - i__ + 1;
+		a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1], &
+			c__1, &a[i__ + i__ * a_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2453, &a[i__ + 1 +
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii,
+			 &a[i__ + a_dim1], lda);
+	    } else {
+		dscal_(&i__, &aii, &a[i__ + a_dim1], lda);
+	    }
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of DLAUU2 */
+
+} /* dlauu2_ */
+
+/* Subroutine */ int dlauum_(char *uplo, integer *n, doublereal *a, integer *
+	lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, ib, nb;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
+	     integer *), dlauu2_(char *, integer *,
+	    doublereal *, integer *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLAUUM computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
+
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
+
+    This is the blocked form of the algorithm, calling Level 3 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAUUM", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "DLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+
+    if (nb <= 1 || nb >= *n) {
+
+/*        Use unblocked code */
+
+	dlauu2_(uplo, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code */
+
+	if (upper) {
+
+/*           Compute the product U * U'. */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		dtrmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib,
+			&c_b2453, &a[i__ + i__ * a_dim1], lda, &a[i__ *
+			a_dim1 + 1], lda);
+		dlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    dgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
+			    c_b2453, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__
+			    + (i__ + ib) * a_dim1], lda, &c_b2453, &a[i__ *
+			    a_dim1 + 1], lda);
+		    i__3 = *n - i__ - ib + 1;
+		    dsyrk_("Upper", "No transpose", &ib, &i__3, &c_b2453, &a[
+			    i__ + (i__ + ib) * a_dim1], lda, &c_b2453, &a[i__
+			    + i__ * a_dim1], lda);
+		}
+/* L10: */
+	    }
+	} else {
+
+/*           Compute the product L' * L. */
+
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		dtrmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
+			c_b2453, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1]
+			, lda);
+		dlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    dgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
+			    c_b2453, &a[i__ + ib + i__ * a_dim1], lda, &a[i__
+			    + ib + a_dim1], lda, &c_b2453, &a[i__ + a_dim1],
+			    lda);
+		    i__3 = *n - i__ - ib + 1;
+		    dsyrk_("Lower", "Transpose", &ib, &i__3, &c_b2453, &a[i__
+			    + ib + i__ * a_dim1], lda, &c_b2453, &a[i__ + i__
+			    * a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DLAUUM */
+
+} /* dlauum_ */
+
+/* Subroutine */ int dorg2r_(integer *m, integer *n, integer *k, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dlarf_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DORG2R generates an m by n real matrix Q with orthonormal columns,
+    which is defined as the first n columns of a product of k elementary
+    reflectors of order m
+
+          Q  =  H(1) H(2) . . . H(k)
+
+    as returned by DGEQRF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. M >= N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. N >= K >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the i-th column must contain the vector which
+            defines the elementary reflector H(i), for i = 1,2,...,k, as
+            returned by DGEQRF in the first k columns of its array
+            argument A.
+            On exit, the m-by-n matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEQRF.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0 || *n > *m) {
+	*info = -2;
+    } else if (*k < 0 || *k > *n) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORG2R", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	return 0;
+    }
+
+/*     Initialise columns k+1:n to columns of the unit matrix */
+
+    i__1 = *n;
+    for (j = *k + 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (l = 1; l <= i__2; ++l) {
+	    a[l + j * a_dim1] = 0.;
+/* L10: */
+	}
+	a[j + j * a_dim1] = 1.;
+/* L20: */
+    }
+
+    for (i__ = *k; i__ >= 1; --i__) {
+
+/*        Apply H(i) to A(i:m,i:n) from the left */
+
+	if (i__ < *n) {
+	    a[i__ + i__ * a_dim1] = 1.;
+	    i__1 = *m - i__ + 1;
+	    i__2 = *n - i__;
+	    dlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	}
+	if (i__ < *m) {
+	    i__1 = *m - i__;
+	    d__1 = -tau[i__];
+	    dscal_(&i__1, &d__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
+	}
+	a[i__ + i__ * a_dim1] = 1. - tau[i__];
+
+/*        Set A(1:i-1,i) to zero */
+
+	i__1 = i__ - 1;
+	for (l = 1; l <= i__1; ++l) {
+	    a[l + i__ * a_dim1] = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+    return 0;
+
+/*     End of DORG2R */
+
+} /* dorg2r_ */
+
+/* Subroutine */ int dorgbr_(char *vect, integer *m, integer *n, integer *k,
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, nb, mn;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    static logical wantq;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dorglq_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dorgqr_(integer *, integer *, integer *, doublereal *,
+	     integer *, doublereal *, doublereal *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORGBR generates one of the real orthogonal matrices Q or P**T
+    determined by DGEBRD when reducing a real matrix A to bidiagonal
+    form: A = Q * B * P**T.  Q and P**T are defined as products of
+    elementary reflectors H(i) or G(i) respectively.
+
+    If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
+    is of order M:
+    if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n
+    columns of Q, where m >= n >= k;
+    if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an
+    M-by-M matrix.
+
+    If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
+    is of order N:
+    if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m
+    rows of P**T, where n >= m >= k;
+    if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as
+    an N-by-N matrix.
+
+    Arguments
+    =========
+
+    VECT    (input) CHARACTER*1
+            Specifies whether the matrix Q or the matrix P**T is
+            required, as defined in the transformation applied by DGEBRD:
+            = 'Q':  generate Q;
+            = 'P':  generate P**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q or P**T to be returned.
+            M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q or P**T to be returned.
+            N >= 0.
+            If VECT = 'Q', M >= N >= min(M,K);
+            if VECT = 'P', N >= M >= min(N,K).
+
+    K       (input) INTEGER
+            If VECT = 'Q', the number of columns in the original M-by-K
+            matrix reduced by DGEBRD.
+            If VECT = 'P', the number of rows in the original K-by-N
+            matrix reduced by DGEBRD.
+            K >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the vectors which define the elementary reflectors,
+            as returned by DGEBRD.
+            On exit, the M-by-N matrix Q or P**T.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) DOUBLE PRECISION array, dimension
+                                  (min(M,K)) if VECT = 'Q'
+                                  (min(N,K)) if VECT = 'P'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i) or G(i), which determines Q or P**T, as
+            returned by DGEBRD in its array argument TAUQ or TAUP.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,min(M,N)).
+            For optimum performance LWORK >= min(M,N)*NB, where NB
+            is the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    wantq = lsame_(vect, "Q");
+    mn = min(*m,*n);
+    lquery = *lwork == -1;
+    if (! wantq && ! lsame_(vect, "P")) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && (
+	    *m > *n || *m < min(*n,*k))) {
+	*info = -3;
+    } else if (*k < 0) {
+	*info = -4;
+    } else if (*lda < max(1,*m)) {
+	*info = -6;
+    } else if (*lwork < max(1,mn) && ! lquery) {
+	*info = -9;
+    }
+
+    if (*info == 0) {
+	if (wantq) {
+	    nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	} else {
+	    nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	}
+	lwkopt = max(1,mn) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORGBR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    if (wantq) {
+
+/*
+          Form Q, determined by a call to DGEBRD to reduce an m-by-k
+          matrix
+*/
+
+	if (*m >= *k) {
+
+/*           If m >= k, assume m >= n >= k */
+
+	    dorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+		    iinfo);
+
+	} else {
+
+/*
+             If m < k, assume m = n
+
+             Shift the vectors which define the elementary reflectors one
+             column to the right, and set the first row and column of Q
+             to those of the unit matrix
+*/
+
+	    for (j = *m; j >= 2; --j) {
+		a[j * a_dim1 + 1] = 0.;
+		i__1 = *m;
+		for (i__ = j + 1; i__ <= i__1; ++i__) {
+		    a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1];
+/* L10: */
+		}
+/* L20: */
+	    }
+	    a[a_dim1 + 1] = 1.;
+	    i__1 = *m;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+		a[i__ + a_dim1] = 0.;
+/* L30: */
+	    }
+	    if (*m > 1) {
+
+/*              Form Q(2:m,2:m) */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		dorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[
+			1], &work[1], lwork, &iinfo);
+	    }
+	}
+    } else {
+
+/*
+          Form P', determined by a call to DGEBRD to reduce a k-by-n
+          matrix
+*/
+
+	if (*k < *n) {
+
+/*           If k < n, assume k <= m <= n */
+
+	    dorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+		    iinfo);
+
+	} else {
+
+/*
+             If k >= n, assume m = n
+
+             Shift the vectors which define the elementary reflectors one
+             row downward, and set the first row and column of P' to
+             those of the unit matrix
+*/
+
+	    a[a_dim1 + 1] = 1.;
+	    i__1 = *n;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+		a[i__ + a_dim1] = 0.;
+/* L40: */
+	    }
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		for (i__ = j - 1; i__ >= 2; --i__) {
+		    a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1];
+/* L50: */
+		}
+		a[j * a_dim1 + 1] = 0.;
+/* L60: */
+	    }
+	    if (*n > 1) {
+
+/*              Form P'(2:n,2:n) */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		dorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[
+			1], &work[1], lwork, &iinfo);
+	    }
+	}
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORGBR */
+
+} /* dorgbr_ */
+
+/* Subroutine */ int dorghr_(integer *n, integer *ilo, integer *ihi,
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, nb, nh, iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORGHR generates a real orthogonal matrix Q which is defined as the
+    product of IHI-ILO elementary reflectors of order N, as returned by
+    DGEHRD:
+
+    Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix Q. N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            ILO and IHI must have the same values as in the previous call
+            of DGEHRD. Q is equal to the unit matrix except in the
+            submatrix Q(ilo+1:ihi,ilo+1:ihi).
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the vectors which define the elementary reflectors,
+            as returned by DGEHRD.
+            On exit, the N-by-N orthogonal matrix Q.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,N).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (N-1)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEHRD.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= IHI-ILO.
+            For optimum performance LWORK >= (IHI-ILO)*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nh = *ihi - *ilo;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+	*info = -2;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,nh) && ! lquery) {
+	*info = -8;
+    }
+
+    if (*info == 0) {
+	nb = ilaenv_(&c__1, "DORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+	lwkopt = max(1,nh) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORGHR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+/*
+       Shift the vectors which define the elementary reflectors one
+       column to the right, and set the first ilo and the last n-ihi
+       rows and columns to those of the unit matrix
+*/
+
+    i__1 = *ilo + 1;
+    for (j = *ihi; j >= i__1; --j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.;
+/* L10: */
+	}
+	i__2 = *ihi;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1];
+/* L20: */
+	}
+	i__2 = *n;
+	for (i__ = *ihi + 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+    i__1 = *ilo;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.;
+/* L50: */
+	}
+	a[j + j * a_dim1] = 1.;
+/* L60: */
+    }
+    i__1 = *n;
+    for (j = *ihi + 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.;
+/* L70: */
+	}
+	a[j + j * a_dim1] = 1.;
+/* L80: */
+    }
+
+    if (nh > 0) {
+
+/*        Generate Q(ilo+1:ihi,ilo+1:ihi) */
+
+	dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*
+		ilo], &work[1], lwork, &iinfo);
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORGHR */
+
+} /* dorghr_ */
+
+/* Subroutine */ int dorgl2_(integer *m, integer *n, integer *k, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dlarf_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORGL2 generates an m by n real matrix Q with orthonormal rows,
+    which is defined as the first m rows of a product of k elementary
+    reflectors of order n
+
+          Q  =  H(k) . . . H(2) H(1)
+
+    as returned by DGELQF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. N >= M.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. M >= K >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the i-th row must contain the vector which defines
+            the elementary reflector H(i), for i = 1,2,...,k, as returned
+            by DGELQF in the first k rows of its array argument A.
+            On exit, the m-by-n matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGELQF.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (M)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if (*k < 0 || *k > *m) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORGL2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m <= 0) {
+	return 0;
+    }
+
+    if (*k < *m) {
+
+/*        Initialise rows k+1:m to rows of the unit matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (l = *k + 1; l <= i__2; ++l) {
+		a[l + j * a_dim1] = 0.;
+/* L10: */
+	    }
+	    if (j > *k && j <= *m) {
+		a[j + j * a_dim1] = 1.;
+	    }
+/* L20: */
+	}
+    }
+
+    for (i__ = *k; i__ >= 1; --i__) {
+
+/*        Apply H(i) to A(i:m,i:n) from the right */
+
+	if (i__ < *n) {
+	    if (i__ < *m) {
+		a[i__ + i__ * a_dim1] = 1.;
+		i__1 = *m - i__;
+		i__2 = *n - i__ + 1;
+		dlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &
+			tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    }
+	    i__1 = *n - i__;
+	    d__1 = -tau[i__];
+	    dscal_(&i__1, &d__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	}
+	a[i__ + i__ * a_dim1] = 1. - tau[i__];
+
+/*        Set A(i,1:i-1) to zero */
+
+	i__1 = i__ - 1;
+	for (l = 1; l <= i__1; ++l) {
+	    a[i__ + l * a_dim1] = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+    return 0;
+
+/*     End of DORGL2 */
+
+} /* dorgl2_ */
+
+/* Subroutine */ int dorglq_(integer *m, integer *n, integer *k, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *work, integer *lwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int dorgl2_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *),
+	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORGLQ generates an M-by-N real matrix Q with orthonormal rows,
+    which is defined as the first M rows of a product of K elementary
+    reflectors of order N
+
+          Q  =  H(k) . . . H(2) H(1)
+
+    as returned by DGELQF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. N >= M.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. M >= K >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the i-th row must contain the vector which defines
+            the elementary reflector H(i), for i = 1,2,...,k, as returned
+            by DGELQF in the first k rows of its array argument A.
+            On exit, the M-by-N matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGELQF.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = max(1,*m) * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if (*k < 0 || *k > *m) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORGLQ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m <= 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < *k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DORGLQ", " ", m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DORGLQ", " ", m, n, k, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < *k && nx < *k) {
+
+/*
+          Use blocked code after the last block.
+          The first kk rows are handled by the block method.
+*/
+
+	ki = (*k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = *k, i__2 = ki + nb;
+	kk = min(i__1,i__2);
+
+/*        Set A(kk+1:m,1:kk) to zero. */
+
+	i__1 = kk;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = kk + 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else {
+	kk = 0;
+    }
+
+/*     Use unblocked code for the last or only block. */
+
+    if (kk < *m) {
+	i__1 = *m - kk;
+	i__2 = *n - kk;
+	i__3 = *k - kk;
+	dorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+		tau[kk + 1], &work[1], &iinfo);
+    }
+
+    if (kk > 0) {
+
+/*        Use blocked code */
+
+	i__1 = -nb;
+	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *k - i__ + 1;
+	    ib = min(i__2,i__3);
+	    if (i__ + ib <= *m) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__2 = *n - i__ + 1;
+		dlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H' to A(i+ib:m,i:n) from the right */
+
+		i__2 = *m - i__ - ib + 1;
+		i__3 = *n - i__ + 1;
+		dlarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, &
+			i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
+			1], &ldwork);
+	    }
+
+/*           Apply H' to columns i:n of current block */
+
+	    i__2 = *n - i__ + 1;
+	    dorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+
+/*           Set columns 1:i-1 of current block to zero */
+
+	    i__2 = i__ - 1;
+	    for (j = 1; j <= i__2; ++j) {
+		i__3 = i__ + ib - 1;
+		for (l = i__; l <= i__3; ++l) {
+		    a[l + j * a_dim1] = 0.;
+/* L30: */
+		}
+/* L40: */
+	    }
+/* L50: */
+	}
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DORGLQ */
+
+} /* dorglq_ */
+
+/* Subroutine */ int dorgqr_(integer *m, integer *n, integer *k, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *work, integer *lwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int dorg2r_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *),
+	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORGQR generates an M-by-N real matrix Q with orthonormal columns,
+    which is defined as the first N columns of a product of K elementary
+    reflectors of order M
+
+          Q  =  H(1) H(2) . . . H(k)
+
+    as returned by DGEQRF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. M >= N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. N >= K >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the i-th column must contain the vector which
+            defines the elementary reflector H(i), for i = 1,2,...,k, as
+            returned by DGEQRF in the first k columns of its array
+            argument A.
+            On exit, the M-by-N matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEQRF.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = max(1,*n) * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0 || *n > *m) {
+	*info = -2;
+    } else if (*k < 0 || *k > *n) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORGQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < *k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DORGQR", " ", m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DORGQR", " ", m, n, k, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < *k && nx < *k) {
+
+/*
+          Use blocked code after the last block.
+          The first kk columns are handled by the block method.
+*/
+
+	ki = (*k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = *k, i__2 = ki + nb;
+	kk = min(i__1,i__2);
+
+/*        Set A(1:kk,kk+1:n) to zero. */
+
+	i__1 = *n;
+	for (j = kk + 1; j <= i__1; ++j) {
+	    i__2 = kk;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else {
+	kk = 0;
+    }
+
+/*     Use unblocked code for the last or only block. */
+
+    if (kk < *n) {
+	i__1 = *m - kk;
+	i__2 = *n - kk;
+	i__3 = *k - kk;
+	dorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+		tau[kk + 1], &work[1], &iinfo);
+    }
+
+    if (kk > 0) {
+
+/*        Use blocked code */
+
+	i__1 = -nb;
+	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *k - i__ + 1;
+	    ib = min(i__2,i__3);
+	    if (i__ + ib <= *n) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__2 = *m - i__ + 1;
+		dlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i:m,i+ib:n) from the left */
+
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__ - ib + 1;
+		dlarfb_("Left", "No transpose", "Forward", "Columnwise", &
+			i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
+			1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &
+			work[ib + 1], &ldwork);
+	    }
+
+/*           Apply H to rows i:m of current block */
+
+	    i__2 = *m - i__ + 1;
+	    dorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+
+/*           Set rows 1:i-1 of current block to zero */
+
+	    i__2 = i__ + ib - 1;
+	    for (j = i__; j <= i__2; ++j) {
+		i__3 = i__ - 1;
+		for (l = 1; l <= i__3; ++l) {
+		    a[l + j * a_dim1] = 0.;
+/* L30: */
+		}
+/* L40: */
+	    }
+/* L50: */
+	}
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DORGQR */
+
+} /* dorgqr_ */
+
+/* Subroutine */ int dorm2l_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, mi, ni, nq;
+    static doublereal aii;
+    static logical left;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DORM2L overwrites the general real m by n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'T', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'T',
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'T': apply Q' (Transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            DGEQLF in the last k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEQLF.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > nq) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORM2L", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	return 0;
+    }
+
+    if (left && notran || ! left && ! notran) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+    } else {
+	mi = *m;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) is applied to C(1:m-k+i,1:n) */
+
+	    mi = *m - *k + i__;
+	} else {
+
+/*           H(i) is applied to C(1:m,1:n-k+i) */
+
+	    ni = *n - *k + i__;
+	}
+
+/*        Apply H(i) */
+
+	aii = a[nq - *k + i__ + i__ * a_dim1];
+	a[nq - *k + i__ + i__ * a_dim1] = 1.;
+	dlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[
+		c_offset], ldc, &work[1]);
+	a[nq - *k + i__ + i__ * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of DORM2L */
+
+} /* dorm2l_ */
+
+/* Subroutine */ int dorm2r_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
+    static doublereal aii;
+    static logical left;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DORM2R overwrites the general real m by n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'T', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'T',
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(1) H(2) . . . H(k)
+
+    as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'T': apply Q' (Transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            DGEQRF in the first k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEQRF.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > nq) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORM2R", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	return 0;
+    }
+
+    if (left && ! notran || ! left && notran) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+	jc = 1;
+    } else {
+	mi = *m;
+	ic = 1;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) is applied to C(i:m,1:n) */
+
+	    mi = *m - i__ + 1;
+	    ic = i__;
+	} else {
+
+/*           H(i) is applied to C(1:m,i:n) */
+
+	    ni = *n - i__ + 1;
+	    jc = i__;
+	}
+
+/*        Apply H(i) */
+
+	aii = a[i__ + i__ * a_dim1];
+	a[i__ + i__ * a_dim1] = 1.;
+	dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[
+		ic + jc * c_dim1], ldc, &work[1]);
+	a[i__ + i__ * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of DORM2R */
+
+} /* dorm2r_ */
+
+/* Subroutine */ int dormbr_(char *vect, char *side, char *trans, integer *m,
+	integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau,
+	doublereal *c__, integer *ldc, doublereal *work, integer *lwork,
+	integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2];
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
+    static logical notran;
+    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
+    static logical applyq;
+    static char transt[1];
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C
+    with
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C
+    with
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      P * C          C * P
+    TRANS = 'T':      P**T * C       C * P**T
+
+    Here Q and P**T are the orthogonal matrices determined by DGEBRD when
+    reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
+    P**T are defined as products of elementary reflectors H(i) and G(i)
+    respectively.
+
+    Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
+    order of the orthogonal matrix Q or P**T that is applied.
+
+    If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
+    if nq >= k, Q = H(1) H(2) . . . H(k);
+    if nq < k, Q = H(1) H(2) . . . H(nq-1).
+
+    If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
+    if k < nq, P = G(1) G(2) . . . G(k);
+    if k >= nq, P = G(1) G(2) . . . G(nq-1).
+
+    Arguments
+    =========
+
+    VECT    (input) CHARACTER*1
+            = 'Q': apply Q or Q**T;
+            = 'P': apply P or P**T.
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q, Q**T, P or P**T from the Left;
+            = 'R': apply Q, Q**T, P or P**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q  or P;
+            = 'T':  Transpose, apply Q**T or P**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            If VECT = 'Q', the number of columns in the original
+            matrix reduced by DGEBRD.
+            If VECT = 'P', the number of rows in the original
+            matrix reduced by DGEBRD.
+            K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension
+                                  (LDA,min(nq,K)) if VECT = 'Q'
+                                  (LDA,nq)        if VECT = 'P'
+            The vectors which define the elementary reflectors H(i) and
+            G(i), whose products determine the matrices Q and P, as
+            returned by DGEBRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If VECT = 'Q', LDA >= max(1,nq);
+            if VECT = 'P', LDA >= max(1,min(nq,K)).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (min(nq,K))
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i) or G(i) which determines Q or P, as returned
+            by DGEBRD in the array argument TAUQ or TAUP.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q
+            or P*C or P**T*C or C*P or C*P**T.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    applyq = lsame_(vect, "Q");
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q or P and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! applyq && ! lsame_(vect, "P")) {
+	*info = -1;
+    } else if (! left && ! lsame_(side, "R")) {
+	*info = -2;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*k < 0) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 1, i__2 = min(nq,*k);
+	if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) {
+	    *info = -8;
+	} else if (*ldc < max(1,*m)) {
+	    *info = -11;
+	} else if (*lwork < max(1,nw) && ! lquery) {
+	    *info = -13;
+	}
+    }
+
+    if (*info == 0) {
+	if (applyq) {
+	    if (left) {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__1, n, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__1, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	} else {
+	    if (left) {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		nb = ilaenv_(&c__1, "DORMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		nb = ilaenv_(&c__1, "DORMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORMBR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    work[1] = 1.;
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+    if (applyq) {
+
+/*        Apply Q */
+
+	if (nq >= *k) {
+
+/*           Q was determined by a call to DGEBRD with nq >= k */
+
+	    dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		    c_offset], ldc, &work[1], lwork, &iinfo);
+	} else if (nq > 1) {
+
+/*           Q was determined by a call to DGEBRD with nq < k */
+
+	    if (left) {
+		mi = *m - 1;
+		ni = *n;
+		i1 = 2;
+		i2 = 1;
+	    } else {
+		mi = *m;
+		ni = *n - 1;
+		i1 = 1;
+		i2 = 2;
+	    }
+	    i__1 = nq - 1;
+	    dormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1]
+		    , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+	}
+    } else {
+
+/*        Apply P */
+
+	if (notran) {
+	    *(unsigned char *)transt = 'T';
+	} else {
+	    *(unsigned char *)transt = 'N';
+	}
+	if (nq > *k) {
+
+/*           P was determined by a call to DGEBRD with nq > k */
+
+	    dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		    c_offset], ldc, &work[1], lwork, &iinfo);
+	} else if (nq > 1) {
+
+/*           P was determined by a call to DGEBRD with nq <= k */
+
+	    if (left) {
+		mi = *m - 1;
+		ni = *n;
+		i1 = 2;
+		i2 = 1;
+	    } else {
+		mi = *m;
+		ni = *n - 1;
+		i1 = 1;
+		i2 = 2;
+	    }
+	    i__1 = nq - 1;
+	    dormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda,
+		     &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &
+		    iinfo);
+	}
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORMBR */
+
+} /* dormbr_ */
+
+/* Subroutine */ int dorml2_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
+    static doublereal aii;
+    static logical left;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DORML2 overwrites the general real m by n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'T', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'T',
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'T': apply Q' (Transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension
+                                 (LDA,M) if SIDE = 'L',
+                                 (LDA,N) if SIDE = 'R'
+            The i-th row must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            DGELQF in the first k rows of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,K).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGELQF.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > nq) {
+	*info = -5;
+    } else if (*lda < max(1,*k)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORML2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	return 0;
+    }
+
+    if (left && notran || ! left && ! notran) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+	jc = 1;
+    } else {
+	mi = *m;
+	ic = 1;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) is applied to C(i:m,1:n) */
+
+	    mi = *m - i__ + 1;
+	    ic = i__;
+	} else {
+
+/*           H(i) is applied to C(1:m,i:n) */
+
+	    ni = *n - i__ + 1;
+	    jc = i__;
+	}
+
+/*        Apply H(i) */
+
+	aii = a[i__ + i__ * a_dim1];
+	a[i__ + i__ * a_dim1] = 1.;
+	dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[
+		ic + jc * c_dim1], ldc, &work[1]);
+	a[i__ + i__ * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of DORML2 */
+
+} /* dorml2_ */
+
+/* Subroutine */ int dormlq_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static doublereal t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int dorml2_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *), dlarfb_(char
+	    *, char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran;
+    static integer ldwork;
+    static char transt[1];
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORMLQ overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension
+                                 (LDA,M) if SIDE = 'L',
+                                 (LDA,N) if SIDE = 'R'
+            The i-th row must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            DGELQF in the first k rows of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,K).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGELQF.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > nq) {
+	*info = -5;
+    } else if (*lda < max(1,*k)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORMLQ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if (nb < nbmin || nb >= *k) {
+
+/*        Use unblocked code */
+
+	dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if (left && notran || ! left && ! notran) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	    jc = 1;
+	} else {
+	    mi = *m;
+	    ic = 1;
+	}
+
+	if (notran) {
+	    *(unsigned char *)transt = 'T';
+	} else {
+	    *(unsigned char *)transt = 'N';
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+	    i__4 = nq - i__ + 1;
+	    dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1],
+		    lda, &tau[i__], t, &c__65);
+	    if (left) {
+
+/*              H or H' is applied to C(i:m,1:n) */
+
+		mi = *m - i__ + 1;
+		ic = i__;
+	    } else {
+
+/*              H or H' is applied to C(1:m,i:n) */
+
+		ni = *n - i__ + 1;
+		jc = i__;
+	    }
+
+/*           Apply H or H' */
+
+	    dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__
+		    + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1],
+		    ldc, &work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORMLQ */
+
+} /* dormlq_ */
+
+/* Subroutine */ int dormql_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static doublereal t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int dorm2l_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *), dlarfb_(char
+	    *, char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran;
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORMQL overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            DGEQLF in the last k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEQLF.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > nq) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQL", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORMQL", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQL", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if (nb < nbmin || nb >= *k) {
+
+/*        Use unblocked code */
+
+	dorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if (left && notran || ! left && ! notran) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	} else {
+	    mi = *m;
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i+ib-1) . . . H(i+1) H(i)
+*/
+
+	    i__4 = nq - *k + i__ + ib - 1;
+	    dlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1]
+		    , lda, &tau[i__], t, &c__65);
+	    if (left) {
+
+/*              H or H' is applied to C(1:m-k+i+ib-1,1:n) */
+
+		mi = *m - *k + i__ + ib - 1;
+	    } else {
+
+/*              H or H' is applied to C(1:m,1:n-k+i+ib-1) */
+
+		ni = *n - *k + i__ + ib - 1;
+	    }
+
+/*           Apply H or H' */
+
+	    dlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[
+		    i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, &
+		    work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORMQL */
+
+} /* dormql_ */
+
+/* Subroutine */ int dormqr_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static doublereal t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int dorm2r_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *), dlarfb_(char
+	    *, char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran;
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORMQR overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(1) H(2) . . . H(k)
+
+    as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            DGEQRF in the first k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEQRF.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*k < 0 || *k > nq) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORMQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || *k == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if (nb < nbmin || nb >= *k) {
+
+/*        Use unblocked code */
+
+	dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if (left && ! notran || ! left && notran) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	    jc = 1;
+	} else {
+	    mi = *m;
+	    ic = 1;
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+	    i__4 = nq - i__ + 1;
+	    dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ *
+		    a_dim1], lda, &tau[i__], t, &c__65)
+		    ;
+	    if (left) {
+
+/*              H or H' is applied to C(i:m,1:n) */
+
+		mi = *m - i__ + 1;
+		ic = i__;
+	    } else {
+
+/*              H or H' is applied to C(1:m,i:n) */
+
+		ni = *n - i__ + 1;
+		jc = i__;
+	    }
+
+/*           Apply H or H' */
+
+	    dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
+		    i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc *
+		    c_dim1], ldc, &work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORMQR */
+
+} /* dormqr_ */
+
+/* Subroutine */ int dormtr_(char *side, char *uplo, char *trans, integer *m,
+	integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dormql_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *),
+	    dormqr_(char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORMTR overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix of order nq, with nq = m if
+    SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+    nq-1 elementary reflectors, as returned by DSYTRD:
+
+    if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+
+    if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    UPLO    (input) CHARACTER*1
+            = 'U': Upper triangle of A contains elementary reflectors
+                   from DSYTRD;
+            = 'L': Lower triangle of A contains elementary reflectors
+                   from DSYTRD.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension
+                                 (LDA,M) if SIDE = 'L'
+                                 (LDA,N) if SIDE = 'R'
+            The vectors which define the elementary reflectors, as
+            returned by DSYTRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+
+    TAU     (input) DOUBLE PRECISION array, dimension
+                                 (M-1) if SIDE = 'L'
+                                 (N-1) if SIDE = 'R'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DSYTRD.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    upper = lsame_(uplo, "U");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	*info = -2;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T")) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+	if (upper) {
+	    if (left) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		nb = ilaenv_(&c__1, "DORMQL", ch__1, &i__2, n, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		nb = ilaenv_(&c__1, "DORMQL", ch__1, m, &i__2, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	} else {
+	    if (left) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__2, n, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__2, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("DORMTR", &i__2);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || nq == 1) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    if (left) {
+	mi = *m - 1;
+	ni = *n;
+    } else {
+	mi = *m;
+	ni = *n - 1;
+    }
+
+    if (upper) {
+
+/*        Q was determined by a call to DSYTRD with UPLO = 'U' */
+
+	i__2 = nq - 1;
+	dormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, &
+		tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
+    } else {
+
+/*        Q was determined by a call to DSYTRD with UPLO = 'L' */
+
+	if (left) {
+	    i1 = 2;
+	    i2 = 1;
+	} else {
+	    i1 = 1;
+	    i2 = 2;
+	}
+	i__2 = nq - 1;
+	dormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], &
+		c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORMTR */
+
+} /* dormtr_ */
+
+/* Subroutine */ int dpotf2_(char *uplo, integer *n, doublereal *a, integer *
+	lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer j;
+    static doublereal ajj;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DPOTF2 computes the Cholesky factorization of a real symmetric
+    positive definite matrix A.
+
+    The factorization has the form
+       A = U' * U ,  if UPLO = 'U', or
+       A = L  * L',  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the unblocked version of the algorithm, calling Level 2 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is stored.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            n by n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U'*U  or A = L*L'.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, the leading minor of order k is not
+                 positive definite, and the factorization could not be
+                 completed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DPOTF2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute the Cholesky factorization A = U'*U. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+
+/*           Compute U(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j - 1;
+	    ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j * a_dim1 + 1], &c__1,
+		    &a[j * a_dim1 + 1], &c__1);
+	    if (ajj <= 0.) {
+		a[j + j * a_dim1] = ajj;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    a[j + j * a_dim1] = ajj;
+
+/*           Compute elements J+1:N of row J. */
+
+	    if (j < *n) {
+		i__2 = j - 1;
+		i__3 = *n - j;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2589, &a[(j + 1) *
+			a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b2453,
+			 &a[j + (j + 1) * a_dim1], lda);
+		i__2 = *n - j;
+		d__1 = 1. / ajj;
+		dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda);
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Compute the Cholesky factorization A = L*L'. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+
+/*           Compute L(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j - 1;
+	    ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j + a_dim1], lda, &a[j
+		    + a_dim1], lda);
+	    if (ajj <= 0.) {
+		a[j + j * a_dim1] = ajj;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    a[j + j * a_dim1] = ajj;
+
+/*           Compute elements J+1:N of column J. */
+
+	    if (j < *n) {
+		i__2 = *n - j;
+		i__3 = j - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2589, &a[j + 1 +
+			a_dim1], lda, &a[j + a_dim1], lda, &c_b2453, &a[j + 1
+			+ j * a_dim1], &c__1);
+		i__2 = *n - j;
+		d__1 = 1. / ajj;
+		dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+    goto L40;
+
+L30:
+    *info = j;
+
+L40:
+    return 0;
+
+/*     End of DPOTF2 */
+
+} /* dpotf2_ */
+
+/* Subroutine */ int dpotrf_(char *uplo, integer *n, doublereal *a, integer *
+	lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer j, jb, nb;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
+	     integer *), dpotf2_(char *, integer *,
+	    doublereal *, integer *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DPOTRF computes the Cholesky factorization of a real symmetric
+    positive definite matrix A.
+
+    The factorization has the form
+       A = U**T * U,  if UPLO = 'U', or
+       A = L  * L**T,  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the block version of the algorithm, calling Level 3 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U**T*U or A = L*L**T.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the leading minor of order i is not
+                  positive definite, and the factorization could not be
+                  completed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DPOTRF", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    if (nb <= 1 || nb >= *n) {
+
+/*        Use unblocked code. */
+
+	dpotf2_(uplo, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code. */
+
+	if (upper) {
+
+/*           Compute the Cholesky factorization A = U'*U. */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
+
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b2589, &a[j *
+			a_dim1 + 1], lda, &c_b2453, &a[j + j * a_dim1], lda);
+		dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
+
+/*                 Compute the current block row. */
+
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    dgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, &
+			    c_b2589, &a[j * a_dim1 + 1], lda, &a[(j + jb) *
+			    a_dim1 + 1], lda, &c_b2453, &a[j + (j + jb) *
+			    a_dim1], lda);
+		    i__3 = *n - j - jb + 1;
+		    dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, &
+			    i__3, &c_b2453, &a[j + j * a_dim1], lda, &a[j + (
+			    j + jb) * a_dim1], lda);
+		}
+/* L10: */
+	    }
+
+	} else {
+
+/*           Compute the Cholesky factorization A = L*L'. */
+
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
+
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		dsyrk_("Lower", "No transpose", &jb, &i__3, &c_b2589, &a[j +
+			a_dim1], lda, &c_b2453, &a[j + j * a_dim1], lda);
+		dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
+
+/*                 Compute the current block column. */
+
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &
+			    c_b2589, &a[j + jb + a_dim1], lda, &a[j + a_dim1],
+			     lda, &c_b2453, &a[j + jb + j * a_dim1], lda);
+		    i__3 = *n - j - jb + 1;
+		    dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, &
+			    jb, &c_b2453, &a[j + j * a_dim1], lda, &a[j + jb
+			    + j * a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
+    goto L40;
+
+L30:
+    *info = *info + j - 1;
+
+L40:
+    return 0;
+
+/*     End of DPOTRF */
+
+} /* dpotrf_ */
+
+/* Subroutine */ int dpotri_(char *uplo, integer *n, doublereal *a, integer *
+	lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), dlauum_(
+	    char *, integer *, doublereal *, integer *, integer *),
+	    dtrtri_(char *, char *, integer *, doublereal *, integer *,
+	    integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DPOTRI computes the inverse of a real symmetric positive definite
+    matrix A using the Cholesky factorization A = U**T*U or A = L*L**T
+    computed by DPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the triangular factor U or L from the Cholesky
+            factorization A = U**T*U or A = L*L**T, as computed by
+            DPOTRF.
+            On exit, the upper or lower triangle of the (symmetric)
+            inverse of A, overwriting the input factor U or L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the (i,i) element of the factor U or L is
+                  zero, and the inverse could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DPOTRI", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Invert the triangular Cholesky factor U or L. */
+
+    dtrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*     Form inv(U)*inv(U)' or inv(L)'*inv(L). */
+
+    dlauum_(uplo, n, &a[a_offset], lda, info);
+
+    return 0;
+
+/*     End of DPOTRI */
+
+} /* dpotri_ */
+
+/* Subroutine */ int dpotrs_(char *uplo, integer *n, integer *nrhs,
+	doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DPOTRS solves a system of linear equations A*X = B with a symmetric
+    positive definite matrix A using the Cholesky factorization
+    A = U**T*U or A = L*L**T computed by DPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The triangular factor U or L from the Cholesky factorization
+            A = U**T*U or A = L*L**T, as computed by DPOTRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DPOTRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*
+          Solve A*X = B where A = U'*U.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b2453, &
+		a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b2453,
+		 &a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A*X = B where A = L*L'.
+
+          Solve L*X = B, overwriting B with X.
+*/
+
+	dtrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b2453,
+		 &a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b2453, &
+		a[a_offset], lda, &b[b_offset], ldb);
+    }
+
+    return 0;
+
+/*     End of DPOTRS */
+
+} /* dpotrs_ */
+
+/* Subroutine */ int dstedc_(char *compz, integer *n, doublereal *d__,
+	doublereal *e, doublereal *z__, integer *ldz, doublereal *work,
+	integer *lwork, integer *iwork, integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k, m;
+    static doublereal p;
+    static integer ii, end, lgn;
+    static doublereal eps, tiny;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer lwmin;
+    extern /* Subroutine */ int dlaed0_(integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, doublereal *, integer *, integer *);
+    static integer start;
+
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlacpy_(char *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, integer *),
+	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
+	     integer *), dlasrt_(char *, integer *, doublereal *, integer *);
+    static integer liwmin, icompz;
+    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *);
+    static doublereal orgnrm;
+    static logical lquery;
+    static integer smlsiz, dtrtrw, storez;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the divide and conquer method.
+    The eigenvectors of a full or band real symmetric matrix can also be
+    found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this
+    matrix to tridiagonal form.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See DLAED3 for details.
+
+    Arguments
+    =========
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'I':  Compute eigenvectors of tridiagonal matrix also.
+            = 'V':  Compute eigenvectors of original dense symmetric
+                    matrix also.  On entry, Z contains the orthogonal
+                    matrix used to reduce the original matrix to
+                    tridiagonal form.
+
+    N       (input) INTEGER
+            The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the subdiagonal elements of the tridiagonal matrix.
+            On exit, E has been destroyed.
+
+    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+            On entry, if COMPZ = 'V', then Z contains the orthogonal
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original symmetric matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If  COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1.
+            If eigenvectors are desired, then LDZ >= max(1,N).
+
+    WORK    (workspace/output) DOUBLE PRECISION array,
+                                           dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If COMPZ = 'N' or N <= 1 then LWORK must be at least 1.
+            If COMPZ = 'V' and N > 1 then LWORK must be at least
+                           ( 1 + 3*N + 2*N*lg N + 3*N**2 ),
+                           where lg( N ) = smallest integer k such
+                           that 2**k >= N.
+            If COMPZ = 'I' and N > 1 then LWORK must be at least
+                           ( 1 + 4*N + N**2 ).
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1.
+            If COMPZ = 'V' and N > 1 then LIWORK must be at least
+                           ( 6 + 6*N + 5*N*lg N ).
+            If COMPZ = 'I' and N > 1 then LIWORK must be at least
+                           ( 3 + 5*N ).
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1 || *liwork == -1;
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if (*n <= 1 || icompz <= 0) {
+	liwmin = 1;
+	lwmin = 1;
+    } else {
+	lgn = (integer) (log((doublereal) (*n)) / log(2.));
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (icompz == 1) {
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3;
+	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
+	} else if (icompz == 2) {
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lwmin = (*n << 2) + 1 + i__1 * i__1;
+	    liwmin = *n * 5 + 3;
+	}
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
+	*info = -6;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -10;
+    }
+
+    if (*info == 0) {
+	work[1] = (doublereal) lwmin;
+	iwork[1] = liwmin;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DSTEDC", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	if (icompz != 0) {
+	    z__[z_dim1 + 1] = 1.;
+	}
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       If the following conditional clause is removed, then the routine
+       will use the Divide and Conquer routine to compute only the
+       eigenvalues, which requires (3N + 3N**2) real workspace and
+       (2 + 5N + 2N lg(N)) integer workspace.
+       Since on many architectures DSTERF is much faster than any other
+       algorithm for finding eigenvalues only, it is used here
+       as the default.
+
+       If COMPZ = 'N', use DSTERF to compute the eigenvalues.
+*/
+
+    if (icompz == 0) {
+	dsterf_(n, &d__[1], &e[1], info);
+	return 0;
+    }
+
+/*
+       If N is smaller than the minimum divide size (SMLSIZ+1), then
+       solve the problem with another solver.
+*/
+
+    if (*n <= smlsiz) {
+	if (icompz == 0) {
+	    dsterf_(n, &d__[1], &e[1], info);
+	    return 0;
+	} else if (icompz == 2) {
+	    dsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
+		    info);
+	    return 0;
+	} else {
+	    dsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
+		    info);
+	    return 0;
+	}
+    }
+
+/*
+       If COMPZ = 'V', the Z matrix must be stored elsewhere for later
+       use.
+*/
+
+    if (icompz == 1) {
+	storez = *n * *n + 1;
+    } else {
+	storez = 1;
+    }
+
+    if (icompz == 2) {
+	dlaset_("Full", n, n, &c_b2467, &c_b2453, &z__[z_offset], ldz);
+    }
+
+/*     Scale. */
+
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	return 0;
+    }
+
+    eps = EPSILON;
+
+    start = 1;
+
+/*     while ( START <= N ) */
+
+L10:
+    if (start <= *n) {
+
+/*
+       Let END be the position of the next subdiagonal entry such that
+       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
+       matrix identified by the elements between START and END
+       constitutes an independent sub-problem.
+*/
+
+	end = start;
+L20:
+	if (end < *n) {
+	    tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 =
+		    d__[end + 1], abs(d__2)));
+	    if ((d__1 = e[end], abs(d__1)) > tiny) {
+		++end;
+		goto L20;
+	    }
+	}
+
+/*        (Sub) Problem determined.  Compute its size and solve it. */
+
+	m = end - start + 1;
+	if (m == 1) {
+	    start = end + 1;
+	    goto L10;
+	}
+	if (m > smlsiz) {
+	    *info = smlsiz;
+
+/*           Scale. */
+
+	    orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
+	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, &m, &c__1, &d__[
+		    start], &m, info);
+	    i__1 = m - 1;
+	    i__2 = m - 1;
+	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2453, &i__1, &c__1, &e[
+		    start], &i__2, info);
+
+	    if (icompz == 1) {
+		dtrtrw = 1;
+	    } else {
+		dtrtrw = start;
+	    }
+	    dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[dtrtrw +
+		    start * z_dim1], ldz, &work[1], n, &work[storez], &iwork[
+		    1], info);
+	    if (*info != 0) {
+		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
+			+ 1) + start - 1;
+		return 0;
+	    }
+
+/*           Scale back. */
+
+	    dlascl_("G", &c__0, &c__0, &c_b2453, &orgnrm, &m, &c__1, &d__[
+		    start], &m, info);
+
+	} else {
+	    if (icompz == 1) {
+
+/*
+       Since QR won't update a Z matrix which is larger than the
+       length of D, we must solve the sub-problem in a workspace and
+       then multiply back into Z.
+*/
+
+		dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[
+			m * m + 1], info);
+		dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
+			storez], n);
+		dgemm_("N", "N", n, &m, &m, &c_b2453, &work[storez], ldz, &
+			work[1], &m, &c_b2467, &z__[start * z_dim1 + 1], ldz);
+	    } else if (icompz == 2) {
+		dsteqr_("I", &m, &d__[start], &e[start], &z__[start + start *
+			z_dim1], ldz, &work[1], info);
+	    } else {
+		dsterf_(&m, &d__[start], &e[start], info);
+	    }
+	    if (*info != 0) {
+		*info = start * (*n + 1) + end;
+		return 0;
+	    }
+	}
+
+	start = end + 1;
+	goto L10;
+    }
+
+/*
+       endwhile
+
+       If the problem split any number of times, then the eigenvalues
+       will not be properly ordered.  Here we permute the eigenvalues
+       (and the associated eigenvectors) into ascending order.
+*/
+
+    if (m != *n) {
+	if (icompz == 0) {
+
+/*        Use Quick Sort */
+
+	    dlasrt_("I", n, &d__[1], info);
+
+	} else {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	    i__1 = *n;
+	    for (ii = 2; ii <= i__1; ++ii) {
+		i__ = ii - 1;
+		k = i__;
+		p = d__[i__];
+		i__2 = *n;
+		for (j = ii; j <= i__2; ++j) {
+		    if (d__[j] < p) {
+			k = j;
+			p = d__[j];
+		    }
+/* L30: */
+		}
+		if (k != i__) {
+		    d__[k] = d__[i__];
+		    d__[i__] = p;
+		    dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1
+			    + 1], &c__1);
+		}
+/* L40: */
+	    }
+	}
+    }
+
+    work[1] = (doublereal) lwmin;
+    iwork[1] = liwmin;
+
+    return 0;
+
+/*     End of DSTEDC */
+
+} /* dstedc_ */
+
+/* Subroutine */ int dsteqr_(char *compz, integer *n, doublereal *d__,
+	doublereal *e, doublereal *z__, integer *ldz, doublereal *work,
+	integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static doublereal b, c__, f, g;
+    static integer i__, j, k, l, m;
+    static doublereal p, r__, s;
+    static integer l1, ii, mm, lm1, mm1, nm1;
+    static doublereal rt1, rt2, eps;
+    static integer lsv;
+    static doublereal tst, eps2;
+    static integer lend, jtot;
+    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *);
+    static doublereal anorm;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlaev2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *);
+    static integer lendm1, lendp1;
+
+    static integer iscale;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlaset_(char *, integer *, integer
+	    *, doublereal *, doublereal *, doublereal *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
+    static doublereal safmax;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
+    static integer lendsv;
+    static doublereal ssfmin;
+    static integer nmaxit, icompz;
+    static doublereal ssfmax;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the implicit QL or QR method.
+    The eigenvectors of a full or band symmetric matrix can also be found
+    if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to
+    tridiagonal form.
+
+    Arguments
+    =========
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'V':  Compute eigenvalues and eigenvectors of the original
+                    symmetric matrix.  On entry, Z must contain the
+                    orthogonal matrix used to reduce the original matrix
+                    to tridiagonal form.
+            = 'I':  Compute eigenvalues and eigenvectors of the
+                    tridiagonal matrix.  Z is initialized to the identity
+                    matrix.
+
+    N       (input) INTEGER
+            The order of the matrix.  N >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the (n-1) subdiagonal elements of the tridiagonal
+            matrix.
+            On exit, E has been destroyed.
+
+    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ, N)
+            On entry, if  COMPZ = 'V', then Z contains the orthogonal
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if  COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original symmetric matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1, and if
+            eigenvectors are desired, then  LDZ >= max(1,N).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
+            If COMPZ = 'N', then WORK is not referenced.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm has failed to find all the eigenvalues in
+                  a total of 30*N iterations; if INFO = i, then i
+                  elements of E have not converged to zero; on exit, D
+                  and E contain the elements of a symmetric tridiagonal
+                  matrix which is orthogonally similar to the original
+                  matrix.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DSTEQR", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*n == 1) {
+	if (icompz == 2) {
+	    z__[z_dim1 + 1] = 1.;
+	}
+	return 0;
+    }
+
+/*     Determine the unit roundoff and over/underflow thresholds. */
+
+    eps = EPSILON;
+/* Computing 2nd power */
+    d__1 = eps;
+    eps2 = d__1 * d__1;
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    ssfmax = sqrt(safmax) / 3.;
+    ssfmin = sqrt(safmin) / eps2;
+
+/*
+       Compute the eigenvalues and eigenvectors of the tridiagonal
+       matrix.
+*/
+
+    if (icompz == 2) {
+	dlaset_("Full", n, n, &c_b2467, &c_b2453, &z__[z_offset], ldz);
+    }
+
+    nmaxit = *n * 30;
+    jtot = 0;
+
+/*
+       Determine where the matrix splits and choose QL or QR iteration
+       for each block, according to whether top or bottom diagonal
+       element is smaller.
+*/
+
+    l1 = 1;
+    nm1 = *n - 1;
+
+L10:
+    if (l1 > *n) {
+	goto L160;
+    }
+    if (l1 > 1) {
+	e[l1 - 1] = 0.;
+    }
+    if (l1 <= nm1) {
+	i__1 = nm1;
+	for (m = l1; m <= i__1; ++m) {
+	    tst = (d__1 = e[m], abs(d__1));
+	    if (tst == 0.) {
+		goto L30;
+	    }
+	    if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m
+		    + 1], abs(d__2))) * eps) {
+		e[m] = 0.;
+		goto L30;
+	    }
+/* L20: */
+	}
+    }
+    m = *n;
+
+L30:
+    l = l1;
+    lsv = l;
+    lend = m;
+    lendsv = lend;
+    l1 = m + 1;
+    if (lend == l) {
+	goto L10;
+    }
+
+/*     Scale submatrix in rows and columns L to LEND */
+
+    i__1 = lend - l + 1;
+    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
+    iscale = 0;
+    if (anorm == 0.) {
+	goto L10;
+    }
+    if (anorm > ssfmax) {
+	iscale = 1;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+		info);
+    } else if (anorm < ssfmin) {
+	iscale = 2;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+		info);
+    }
+
+/*     Choose between QL and QR iteration */
+
+    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
+	lend = lsv;
+	l = lendsv;
+    }
+
+    if (lend > l) {
+
+/*
+          QL Iteration
+
+          Look for small subdiagonal element.
+*/
+
+L40:
+	if (l != lend) {
+	    lendm1 = lend - 1;
+	    i__1 = lendm1;
+	    for (m = l; m <= i__1; ++m) {
+/* Computing 2nd power */
+		d__2 = (d__1 = e[m], abs(d__1));
+		tst = d__2 * d__2;
+		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
+			+ 1], abs(d__2)) + safmin) {
+		    goto L60;
+		}
+/* L50: */
+	    }
+	}
+
+	m = lend;
+
+L60:
+	if (m < lend) {
+	    e[m] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L80;
+	}
+
+/*
+          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
+
+	if (m == l + 1) {
+	    if (icompz > 0) {
+		dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
+		work[l] = c__;
+		work[*n - 1 + l] = s;
+		dlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
+			z__[l * z_dim1 + 1], ldz);
+	    } else {
+		dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
+	    }
+	    d__[l] = rt1;
+	    d__[l + 1] = rt2;
+	    e[l] = 0.;
+	    l += 2;
+	    if (l <= lend) {
+		goto L40;
+	    }
+	    goto L140;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	g = (d__[l + 1] - p) / (e[l] * 2.);
+	r__ = dlapy2_(&g, &c_b2453);
+	g = d__[m] - p + e[l] / (g + d_sign(&r__, &g));
+
+	s = 1.;
+	c__ = 1.;
+	p = 0.;
+
+/*        Inner loop */
+
+	mm1 = m - 1;
+	i__1 = l;
+	for (i__ = mm1; i__ >= i__1; --i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    dlartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m - 1) {
+		e[i__ + 1] = r__;
+	    }
+	    g = d__[i__ + 1] - p;
+	    r__ = (d__[i__] - g) * s + c__ * 2. * b;
+	    p = s * r__;
+	    d__[i__ + 1] = g + p;
+	    g = c__ * r__ - b;
+
+/*           If eigenvectors are desired, then save rotations. */
+
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = -s;
+	    }
+
+/* L70: */
+	}
+
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = m - l + 1;
+	    dlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
+		    * z_dim1 + 1], ldz);
+	}
+
+	d__[l] -= p;
+	e[l] = g;
+	goto L40;
+
+/*        Eigenvalue found. */
+
+L80:
+	d__[l] = p;
+
+	++l;
+	if (l <= lend) {
+	    goto L40;
+	}
+	goto L140;
+
+    } else {
+
+/*
+          QR Iteration
+
+          Look for small superdiagonal element.
+*/
+
+L90:
+	if (l != lend) {
+	    lendp1 = lend + 1;
+	    i__1 = lendp1;
+	    for (m = l; m >= i__1; --m) {
+/* Computing 2nd power */
+		d__2 = (d__1 = e[m - 1], abs(d__1));
+		tst = d__2 * d__2;
+		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
+			- 1], abs(d__2)) + safmin) {
+		    goto L110;
+		}
+/* L100: */
+	    }
+	}
+
+	m = lend;
+
+L110:
+	if (m > lend) {
+	    e[m - 1] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L130;
+	}
+
+/*
+          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
+
+	if (m == l - 1) {
+	    if (icompz > 0) {
+		dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
+			;
+		work[m] = c__;
+		work[*n - 1 + m] = s;
+		dlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
+			z__[(l - 1) * z_dim1 + 1], ldz);
+	    } else {
+		dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
+	    }
+	    d__[l - 1] = rt1;
+	    d__[l] = rt2;
+	    e[l - 1] = 0.;
+	    l += -2;
+	    if (l >= lend) {
+		goto L90;
+	    }
+	    goto L140;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	g = (d__[l - 1] - p) / (e[l - 1] * 2.);
+	r__ = dlapy2_(&g, &c_b2453);
+	g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g));
+
+	s = 1.;
+	c__ = 1.;
+	p = 0.;
+
+/*        Inner loop */
+
+	lm1 = l - 1;
+	i__1 = lm1;
+	for (i__ = m; i__ <= i__1; ++i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    dlartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m) {
+		e[i__ - 1] = r__;
+	    }
+	    g = d__[i__] - p;
+	    r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b;
+	    p = s * r__;
+	    d__[i__] = g + p;
+	    g = c__ * r__ - b;
+
+/*           If eigenvectors are desired, then save rotations. */
+
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = s;
+	    }
+
+/* L120: */
+	}
+
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = l - m + 1;
+	    dlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
+		    * z_dim1 + 1], ldz);
+	}
+
+	d__[l] -= p;
+	e[lm1] = g;
+	goto L90;
+
+/*        Eigenvalue found. */
+
+L130:
+	d__[l] = p;
+
+	--l;
+	if (l >= lend) {
+	    goto L90;
+	}
+	goto L140;
+
+    }
+
+/*     Undo scaling if necessary */
+
+L140:
+    if (iscale == 1) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    } else if (iscale == 2) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    }
+
+/*
+       Check for no convergence to an eigenvalue after a total
+       of N*MAXIT iterations.
+*/
+
+    if (jtot < nmaxit) {
+	goto L10;
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (e[i__] != 0.) {
+	    ++(*info);
+	}
+/* L150: */
+    }
+    goto L190;
+
+/*     Order eigenvalues and eigenvectors. */
+
+L160:
+    if (icompz == 0) {
+
+/*        Use Quick Sort */
+
+	dlasrt_("I", n, &d__[1], info);
+
+    } else {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	i__1 = *n;
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
+	    i__2 = *n;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
+		}
+/* L170: */
+	    }
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
+	    }
+/* L180: */
+	}
+    }
+
+L190:
+    return 0;
+
+/*     End of DSTEQR */
+
+} /* dsteqr_ */
+
+/* Subroutine */ int dsterf_(integer *n, doublereal *d__, doublereal *e,
+	integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static doublereal c__;
+    static integer i__, l, m;
+    static doublereal p, r__, s;
+    static integer l1;
+    static doublereal bb, rt1, rt2, eps, rte;
+    static integer lsv;
+    static doublereal eps2, oldc;
+    static integer lend, jtot;
+    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *);
+    static doublereal gamma, alpha, sigma, anorm;
+
+    static integer iscale;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *);
+    static doublereal oldgam, safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal safmax;
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
+    static integer lendsv;
+    static doublereal ssfmin;
+    static integer nmaxit;
+    static doublereal ssfmax;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DSTERF computes all eigenvalues of a symmetric tridiagonal matrix
+    using the Pal-Walker-Kahan variant of the QL or QR algorithm.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix.  N >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the n diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the (n-1) subdiagonal elements of the tridiagonal
+            matrix.
+            On exit, E has been destroyed.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm failed to find all of the eigenvalues in
+                  a total of 30*N iterations; if INFO = i, then i
+                  elements of E have not converged to zero.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --e;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n < 0) {
+	*info = -1;
+	i__1 = -(*info);
+	xerbla_("DSTERF", &i__1);
+	return 0;
+    }
+    if (*n <= 1) {
+	return 0;
+    }
+
+/*     Determine the unit roundoff for this environment. */
+
+    eps = EPSILON;
+/* Computing 2nd power */
+    d__1 = eps;
+    eps2 = d__1 * d__1;
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    ssfmax = sqrt(safmax) / 3.;
+    ssfmin = sqrt(safmin) / eps2;
+
+/*     Compute the eigenvalues of the tridiagonal matrix. */
+
+    nmaxit = *n * 30;
+    sigma = 0.;
+    jtot = 0;
+
+/*
+       Determine where the matrix splits and choose QL or QR iteration
+       for each block, according to whether top or bottom diagonal
+       element is smaller.
+*/
+
+    l1 = 1;
+
+L10:
+    if (l1 > *n) {
+	goto L170;
+    }
+    if (l1 > 1) {
+	e[l1 - 1] = 0.;
+    }
+    i__1 = *n - 1;
+    for (m = l1; m <= i__1; ++m) {
+	if ((d__3 = e[m], abs(d__3)) <= sqrt((d__1 = d__[m], abs(d__1))) *
+		sqrt((d__2 = d__[m + 1], abs(d__2))) * eps) {
+	    e[m] = 0.;
+	    goto L30;
+	}
+/* L20: */
+    }
+    m = *n;
+
+L30:
+    l = l1;
+    lsv = l;
+    lend = m;
+    lendsv = lend;
+    l1 = m + 1;
+    if (lend == l) {
+	goto L10;
+    }
+
+/*     Scale submatrix in rows and columns L to LEND */
+
+    i__1 = lend - l + 1;
+    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
+    iscale = 0;
+    if (anorm > ssfmax) {
+	iscale = 1;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+		info);
+    } else if (anorm < ssfmin) {
+	iscale = 2;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+		info);
+    }
+
+    i__1 = lend - 1;
+    for (i__ = l; i__ <= i__1; ++i__) {
+/* Computing 2nd power */
+	d__1 = e[i__];
+	e[i__] = d__1 * d__1;
+/* L40: */
+    }
+
+/*     Choose between QL and QR iteration */
+
+    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
+	lend = lsv;
+	l = lendsv;
+    }
+
+    if (lend >= l) {
+
+/*
+          QL Iteration
+
+          Look for small subdiagonal element.
+*/
+
+L50:
+	if (l != lend) {
+	    i__1 = lend - 1;
+	    for (m = l; m <= i__1; ++m) {
+		if ((d__2 = e[m], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m
+			+ 1], abs(d__1))) {
+		    goto L70;
+		}
+/* L60: */
+	    }
+	}
+	m = lend;
+
+L70:
+	if (m < lend) {
+	    e[m] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L90;
+	}
+
+/*
+          If remaining matrix is 2 by 2, use DLAE2 to compute its
+          eigenvalues.
+*/
+
+	if (m == l + 1) {
+	    rte = sqrt(e[l]);
+	    dlae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2);
+	    d__[l] = rt1;
+	    d__[l + 1] = rt2;
+	    e[l] = 0.;
+	    l += 2;
+	    if (l <= lend) {
+		goto L50;
+	    }
+	    goto L150;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L150;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	rte = sqrt(e[l]);
+	sigma = (d__[l + 1] - p) / (rte * 2.);
+	r__ = dlapy2_(&sigma, &c_b2453);
+	sigma = p - rte / (sigma + d_sign(&r__, &sigma));
+
+	c__ = 1.;
+	s = 0.;
+	gamma = d__[m] - sigma;
+	p = gamma * gamma;
+
+/*        Inner loop */
+
+	i__1 = l;
+	for (i__ = m - 1; i__ >= i__1; --i__) {
+	    bb = e[i__];
+	    r__ = p + bb;
+	    if (i__ != m - 1) {
+		e[i__ + 1] = s * r__;
+	    }
+	    oldc = c__;
+	    c__ = p / r__;
+	    s = bb / r__;
+	    oldgam = gamma;
+	    alpha = d__[i__];
+	    gamma = c__ * (alpha - sigma) - s * oldgam;
+	    d__[i__ + 1] = oldgam + (alpha - gamma);
+	    if (c__ != 0.) {
+		p = gamma * gamma / c__;
+	    } else {
+		p = oldc * bb;
+	    }
+/* L80: */
+	}
+
+	e[l] = s * p;
+	d__[l] = sigma + gamma;
+	goto L50;
+
+/*        Eigenvalue found. */
+
+L90:
+	d__[l] = p;
+
+	++l;
+	if (l <= lend) {
+	    goto L50;
+	}
+	goto L150;
+
+    } else {
+
+/*
+          QR Iteration
+
+          Look for small superdiagonal element.
+*/
+
+L100:
+	i__1 = lend + 1;
+	for (m = l; m >= i__1; --m) {
+	    if ((d__2 = e[m - 1], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m
+		    - 1], abs(d__1))) {
+		goto L120;
+	    }
+/* L110: */
+	}
+	m = lend;
+
+L120:
+	if (m > lend) {
+	    e[m - 1] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L140;
+	}
+
+/*
+          If remaining matrix is 2 by 2, use DLAE2 to compute its
+          eigenvalues.
+*/
+
+	if (m == l - 1) {
+	    rte = sqrt(e[l - 1]);
+	    dlae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2);
+	    d__[l] = rt1;
+	    d__[l - 1] = rt2;
+	    e[l - 1] = 0.;
+	    l += -2;
+	    if (l >= lend) {
+		goto L100;
+	    }
+	    goto L150;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L150;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	rte = sqrt(e[l - 1]);
+	sigma = (d__[l - 1] - p) / (rte * 2.);
+	r__ = dlapy2_(&sigma, &c_b2453);
+	sigma = p - rte / (sigma + d_sign(&r__, &sigma));
+
+	c__ = 1.;
+	s = 0.;
+	gamma = d__[m] - sigma;
+	p = gamma * gamma;
+
+/*        Inner loop */
+
+	i__1 = l - 1;
+	for (i__ = m; i__ <= i__1; ++i__) {
+	    bb = e[i__];
+	    r__ = p + bb;
+	    if (i__ != m) {
+		e[i__ - 1] = s * r__;
+	    }
+	    oldc = c__;
+	    c__ = p / r__;
+	    s = bb / r__;
+	    oldgam = gamma;
+	    alpha = d__[i__ + 1];
+	    gamma = c__ * (alpha - sigma) - s * oldgam;
+	    d__[i__] = oldgam + (alpha - gamma);
+	    if (c__ != 0.) {
+		p = gamma * gamma / c__;
+	    } else {
+		p = oldc * bb;
+	    }
+/* L130: */
+	}
+
+	e[l - 1] = s * p;
+	d__[l] = sigma + gamma;
+	goto L100;
+
+/*        Eigenvalue found. */
+
+L140:
+	d__[l] = p;
+
+	--l;
+	if (l >= lend) {
+	    goto L100;
+	}
+	goto L150;
+
+    }
+
+/*     Undo scaling if necessary */
+
+L150:
+    if (iscale == 1) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+    }
+    if (iscale == 2) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+    }
+
+/*
+       Check for no convergence to an eigenvalue after a total
+       of N*MAXIT iterations.
+*/
+
+    if (jtot < nmaxit) {
+	goto L10;
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (e[i__] != 0.) {
+	    ++(*info);
+	}
+/* L160: */
+    }
+    goto L180;
+
+/*     Sort eigenvalues in increasing order. */
+
+L170:
+    dlasrt_("I", n, &d__[1], info);
+
+L180:
+    return 0;
+
+/*     End of DSTERF */
+
+} /* dsterf_ */
+
+/* Subroutine */ int dsyevd_(char *jobz, char *uplo, integer *n, doublereal *
+	a, integer *lda, doublereal *w, doublereal *work, integer *lwork,
+	integer *iwork, integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal eps;
+    static integer inde;
+    static doublereal anrm, rmin, rmax;
+    static integer lopt;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    static doublereal sigma;
+    extern logical lsame_(char *, char *);
+    static integer iinfo, lwmin, liopt;
+    static logical lower, wantz;
+    static integer indwk2, llwrk2;
+
+    static integer iscale;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dstedc_(char *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, integer *, integer *, integer *), dlacpy_(
+	    char *, integer *, integer *, doublereal *, integer *, doublereal
+	    *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal bignum;
+    static integer indtau;
+    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
+	     integer *);
+    extern doublereal dlansy_(char *, char *, integer *, doublereal *,
+	    integer *, doublereal *);
+    static integer indwrk, liwmin;
+    extern /* Subroutine */ int dormtr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *), dsytrd_(char *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *);
+    static integer llwork;
+    static doublereal smlnum;
+    static logical lquery;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DSYEVD computes all eigenvalues and, optionally, eigenvectors of a
+    real symmetric matrix A. If eigenvectors are desired, it uses a
+    divide and conquer algorithm.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Because of large use of BLAS of level 3, DSYEVD needs N**2 more
+    workspace than DSYEVX.
+
+    Arguments
+    =========
+
+    JOBZ    (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only;
+            = 'V':  Compute eigenvalues and eigenvectors.
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of A contains the
+            upper triangular part of the matrix A.  If UPLO = 'L',
+            the leading N-by-N lower triangular part of A contains
+            the lower triangular part of the matrix A.
+            On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+            orthonormal eigenvectors of the matrix A.
+            If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+            or the upper triangle (if UPLO='U') of A, including the
+            diagonal, is destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    W       (output) DOUBLE PRECISION array, dimension (N)
+            If INFO = 0, the eigenvalues in ascending order.
+
+    WORK    (workspace/output) DOUBLE PRECISION array,
+                                           dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If N <= 1,               LWORK must be at least 1.
+            If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1.
+            If JOBZ = 'V' and N > 1, LWORK must be at least
+                                                  1 + 6*N + 2*N**2.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If N <= 1,                LIWORK must be at least 1.
+            If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
+            If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the algorithm failed to converge; i
+                  off-diagonal elements of an intermediate tridiagonal
+                  form did not converge to zero.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --w;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    wantz = lsame_(jobz, "V");
+    lower = lsame_(uplo, "L");
+    lquery = *lwork == -1 || *liwork == -1;
+
+    *info = 0;
+    if (*n <= 1) {
+	liwmin = 1;
+	lwmin = 1;
+	lopt = lwmin;
+	liopt = liwmin;
+    } else {
+	if (wantz) {
+	    liwmin = *n * 5 + 3;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lwmin = *n * 6 + 1 + (i__1 * i__1 << 1);
+	} else {
+	    liwmin = 1;
+	    lwmin = (*n << 1) + 1;
+	}
+	lopt = lwmin;
+	liopt = liwmin;
+    }
+    if (! (wantz || lsame_(jobz, "N"))) {
+	*info = -1;
+    } else if (! (lower || lsame_(uplo, "U"))) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -10;
+    }
+
+    if (*info == 0) {
+	work[1] = (doublereal) lopt;
+	iwork[1] = liopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DSYEVD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*n == 1) {
+	w[1] = a[a_dim1 + 1];
+	if (wantz) {
+	    a[a_dim1 + 1] = 1.;
+	}
+	return 0;
+    }
+
+/*     Get machine constants. */
+
+    safmin = SAFEMINIMUM;
+    eps = PRECISION;
+    smlnum = safmin / eps;
+    bignum = 1. / smlnum;
+    rmin = sqrt(smlnum);
+    rmax = sqrt(bignum);
+
+/*     Scale matrix to allowable range, if necessary. */
+
+    anrm = dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]);
+    iscale = 0;
+    if (anrm > 0. && anrm < rmin) {
+	iscale = 1;
+	sigma = rmin / anrm;
+    } else if (anrm > rmax) {
+	iscale = 1;
+	sigma = rmax / anrm;
+    }
+    if (iscale == 1) {
+	dlascl_(uplo, &c__0, &c__0, &c_b2453, &sigma, n, n, &a[a_offset], lda,
+		 info);
+    }
+
+/*     Call DSYTRD to reduce symmetric matrix to tridiagonal form. */
+
+    inde = 1;
+    indtau = inde + *n;
+    indwrk = indtau + *n;
+    llwork = *lwork - indwrk + 1;
+    indwk2 = indwrk + *n * *n;
+    llwrk2 = *lwork - indwk2 + 1;
+
+    dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], &
+	    work[indwrk], &llwork, &iinfo);
+    lopt = (integer) ((*n << 1) + work[indwrk]);
+
+/*
+       For eigenvalues only, call DSTERF.  For eigenvectors, first call
+       DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+       tridiagonal matrix, then call DORMTR to multiply it by the
+       Householder transformations stored in A.
+*/
+
+    if (! wantz) {
+	dsterf_(n, &w[1], &work[inde], info);
+    } else {
+	dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], &
+		llwrk2, &iwork[1], liwork, info);
+	dormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
+		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
+	dlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
+/*
+   Computing MAX
+   Computing 2nd power
+*/
+	i__3 = *n;
+	i__1 = lopt, i__2 = *n * 6 + 1 + (i__3 * i__3 << 1);
+	lopt = max(i__1,i__2);
+    }
+
+/*     If matrix was scaled, then rescale eigenvalues appropriately. */
+
+    if (iscale == 1) {
+	d__1 = 1. / sigma;
+	dscal_(n, &d__1, &w[1], &c__1);
+    }
+
+    work[1] = (doublereal) lopt;
+    iwork[1] = liopt;
+
+    return 0;
+
+/*     End of DSYEVD */
+
+} /* dsyevd_ */
+
+/* Subroutine */ int dsytd2_(char *uplo, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static doublereal taui;
+    extern /* Subroutine */ int dsyr2_(char *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static doublereal alpha;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *), dlarfg_(integer *, doublereal *,
+	     doublereal *, integer *, doublereal *), xerbla_(char *, integer *
+	    );
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal
+    form T by an orthogonal similarity transformation: Q' * A * Q = T.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is stored:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            n-by-n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n-by-n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit, if UPLO = 'U', the diagonal and first superdiagonal
+            of A are overwritten by the corresponding elements of the
+            tridiagonal matrix T, and the elements above the first
+            superdiagonal, with the array TAU, represent the orthogonal
+            matrix Q as a product of elementary reflectors; if UPLO
+            = 'L', the diagonal and first subdiagonal of A are over-
+            written by the corresponding elements of the tridiagonal
+            matrix T, and the elements below the first subdiagonal, with
+            the array TAU, represent the orthogonal matrix Q as a product
+            of elementary reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    D       (output) DOUBLE PRECISION array, dimension (N)
+            The diagonal elements of the tridiagonal matrix T:
+            D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (N-1)
+            The off-diagonal elements of the tridiagonal matrix T:
+            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n-1) . . . H(2) H(1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+    A(1:i-1,i+1), and tau in TAU(i).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(n-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+    and tau in TAU(i).
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  d   e   v2  v3  v4 )              (  d                  )
+      (      d   e   v3  v4 )              (  e   d              )
+      (          d   e   v4 )              (  v1  e   d          )
+      (              d   e  )              (  v1  v2  e   d      )
+      (                  d  )              (  v1  v2  v3  e   d  )
+
+    where d and e denote diagonal and off-diagonal elements of T, and vi
+    denotes an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tau;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DSYTD2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Reduce the upper triangle of A */
+
+	for (i__ = *n - 1; i__ >= 1; --i__) {
+
+/*
+             Generate elementary reflector H(i) = I - tau * v * v'
+             to annihilate A(1:i-1,i+1)
+*/
+
+	    dlarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1
+		    + 1], &c__1, &taui);
+	    e[i__] = a[i__ + (i__ + 1) * a_dim1];
+
+	    if (taui != 0.) {
+
+/*              Apply H(i) from both sides to A(1:i,1:i) */
+
+		a[i__ + (i__ + 1) * a_dim1] = 1.;
+
+/*              Compute  x := tau * A * v  storing x in TAU(1:i) */
+
+		dsymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
+			a_dim1 + 1], &c__1, &c_b2467, &tau[1], &c__1);
+
+/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
+
+		alpha = taui * -.5 * ddot_(&i__, &tau[1], &c__1, &a[(i__ + 1)
+			* a_dim1 + 1], &c__1);
+		daxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
+			1], &c__1);
+
+/*
+                Apply the transformation as a rank-2 update:
+                   A := A - v * w' - w * v'
+*/
+
+		dsyr2_(uplo, &i__, &c_b2589, &a[(i__ + 1) * a_dim1 + 1], &
+			c__1, &tau[1], &c__1, &a[a_offset], lda);
+
+		a[i__ + (i__ + 1) * a_dim1] = e[i__];
+	    }
+	    d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1];
+	    tau[i__] = taui;
+/* L10: */
+	}
+	d__[1] = a[a_dim1 + 1];
+    } else {
+
+/*        Reduce the lower triangle of A */
+
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*
+             Generate elementary reflector H(i) = I - tau * v * v'
+             to annihilate A(i+2:n,i)
+*/
+
+	    i__2 = *n - i__;
+/* Computing MIN */
+	    i__3 = i__ + 2;
+	    dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		     a_dim1], &c__1, &taui);
+	    e[i__] = a[i__ + 1 + i__ * a_dim1];
+
+	    if (taui != 0.) {
+
+/*              Apply H(i) from both sides to A(i+1:n,i+1:n) */
+
+		a[i__ + 1 + i__ * a_dim1] = 1.;
+
+/*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */
+
+		i__2 = *n - i__;
+		dsymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b2467, &
+			tau[i__], &c__1);
+
+/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
+
+		i__2 = *n - i__;
+		alpha = taui * -.5 * ddot_(&i__2, &tau[i__], &c__1, &a[i__ +
+			1 + i__ * a_dim1], &c__1);
+		i__2 = *n - i__;
+		daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+			i__], &c__1);
+
+/*
+                Apply the transformation as a rank-2 update:
+                   A := A - v * w' - w * v'
+*/
+
+		i__2 = *n - i__;
+		dsyr2_(uplo, &i__2, &c_b2589, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) *
+			a_dim1], lda);
+
+		a[i__ + 1 + i__ * a_dim1] = e[i__];
+	    }
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    tau[i__] = taui;
+/* L20: */
+	}
+	d__[*n] = a[*n + *n * a_dim1];
+    }
+
+    return 0;
+
+/*     End of DSYTD2 */
+
+} /* dsytd2_ */
+
+/* Subroutine */ int dsytrd_(char *uplo, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal *
+	work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, nb, kk, nx, iws;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    static logical upper;
+    extern /* Subroutine */ int dsytd2_(char *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dsyr2k_(char *, char *, integer *, integer *, doublereal
+	    *, doublereal *, integer *, doublereal *, integer *, doublereal *,
+	     doublereal *, integer *), dlatrd_(char *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DSYTRD reduces a real symmetric matrix A to real symmetric
+    tridiagonal form T by an orthogonal similarity transformation:
+    Q**T * A * Q = T.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit, if UPLO = 'U', the diagonal and first superdiagonal
+            of A are overwritten by the corresponding elements of the
+            tridiagonal matrix T, and the elements above the first
+            superdiagonal, with the array TAU, represent the orthogonal
+            matrix Q as a product of elementary reflectors; if UPLO
+            = 'L', the diagonal and first subdiagonal of A are over-
+            written by the corresponding elements of the tridiagonal
+            matrix T, and the elements below the first subdiagonal, with
+            the array TAU, represent the orthogonal matrix Q as a product
+            of elementary reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    D       (output) DOUBLE PRECISION array, dimension (N)
+            The diagonal elements of the tridiagonal matrix T:
+            D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (N-1)
+            The off-diagonal elements of the tridiagonal matrix T:
+            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= 1.
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n-1) . . . H(2) H(1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+    A(1:i-1,i+1), and tau in TAU(i).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(n-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+    and tau in TAU(i).
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  d   e   v2  v3  v4 )              (  d                  )
+      (      d   e   v3  v4 )              (  e   d              )
+      (          d   e   v4 )              (  v1  e   d          )
+      (              d   e  )              (  v1  v2  e   d      )
+      (                  d  )              (  v1  v2  v3  e   d  )
+
+    where d and e denote diagonal and off-diagonal elements of T, and vi
+    denotes an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    lquery = *lwork == -1;
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*lwork < 1 && ! lquery) {
+	*info = -9;
+    }
+
+    if (*info == 0) {
+
+/*        Determine the block size. */
+
+	nb = ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
+		 (ftnlen)1);
+	lwkopt = *n * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DSYTRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nx = *n;
+    iws = 1;
+    if (nb > 1 && nb < *n) {
+
+/*
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, &
+		c_n1, (ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *n) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code by setting NX = N.
+
+   Computing MAX
+*/
+		i__1 = *lwork / ldwork;
+		nb = max(i__1,1);
+		nbmin = ilaenv_(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		if (nb < nbmin) {
+		    nx = *n;
+		}
+	    }
+	} else {
+	    nx = *n;
+	}
+    } else {
+	nb = 1;
+    }
+
+    if (upper) {
+
+/*
+          Reduce the upper triangle of A.
+          Columns 1:kk are handled by the unblocked method.
+*/
+
+	kk = *n - (*n - nx + nb - 1) / nb * nb;
+	i__1 = kk + 1;
+	i__2 = -nb;
+	for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+		i__2) {
+
+/*
+             Reduce columns i:i+nb-1 to tridiagonal form and form the
+             matrix W which is needed to update the unreduced part of
+             the matrix
+*/
+
+	    i__3 = i__ + nb - 1;
+	    dlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
+		    work[1], &ldwork);
+
+/*
+             Update the unreduced submatrix A(1:i-1,1:i-1), using an
+             update of the form:  A := A - V*W' - W*V'
+*/
+
+	    i__3 = i__ - 1;
+	    dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b2589, &a[i__ *
+		    a_dim1 + 1], lda, &work[1], &ldwork, &c_b2453, &a[
+		    a_offset], lda);
+
+/*
+             Copy superdiagonal elements back into A, and diagonal
+             elements into D
+*/
+
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j - 1 + j * a_dim1] = e[j - 1];
+		d__[j] = a[j + j * a_dim1];
+/* L10: */
+	    }
+/* L20: */
+	}
+
+/*        Use unblocked code to reduce the last or only block */
+
+	dsytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
+    } else {
+
+/*        Reduce the lower triangle of A */
+
+	i__2 = *n - nx;
+	i__1 = nb;
+	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+
+/*
+             Reduce columns i:i+nb-1 to tridiagonal form and form the
+             matrix W which is needed to update the unreduced part of
+             the matrix
+*/
+
+	    i__3 = *n - i__ + 1;
+	    dlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
+		    tau[i__], &work[1], &ldwork);
+
+/*
+             Update the unreduced submatrix A(i+ib:n,i+ib:n), using
+             an update of the form:  A := A - V*W' - W*V'
+*/
+
+	    i__3 = *n - i__ - nb + 1;
+	    dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b2589, &a[i__ + nb +
+		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b2453, &a[
+		    i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*
+             Copy subdiagonal elements back into A, and diagonal
+             elements into D
+*/
+
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + 1 + j * a_dim1] = e[j];
+		d__[j] = a[j + j * a_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+
+/*        Use unblocked code to reduce the last or only block */
+
+	i__1 = *n - i__ + 1;
+	dsytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__],
+		&tau[i__], &iinfo);
+    }
+
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DSYTRD */
+
+} /* dsytrd_ */
+
+/* Subroutine */ int dtrevc_(char *side, char *howmny, logical *select,
+	integer *n, doublereal *t, integer *ldt, doublereal *vl, integer *
+	ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m,
+	doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k;
+    static doublereal x[4]	/* was [2][2] */;
+    static integer j1, j2, n2, ii, ki, ip, is;
+    static doublereal wi, wr, rec, ulp, beta, emax;
+    static logical pair;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static logical allv;
+    static integer ierr;
+    static doublereal unfl, ovfl, smin;
+    static logical over;
+    static doublereal vmax;
+    static integer jnxt;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    static doublereal scale;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+    static doublereal remax;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical leftv, bothv;
+    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *);
+    static doublereal vcrit;
+    static logical somev;
+    static doublereal xnorm;
+    extern /* Subroutine */ int dlaln2_(logical *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     doublereal *, doublereal *, integer *, doublereal *, doublereal *
+	    , doublereal *, integer *, doublereal *, doublereal *, integer *),
+	     dlabad_(doublereal *, doublereal *);
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal bignum;
+    static logical rightv;
+    static doublereal smlnum;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DTREVC computes some or all of the right and/or left eigenvectors of
+    a real upper quasi-triangular matrix T.
+
+    The right eigenvector x and the left eigenvector y of T corresponding
+    to an eigenvalue w are defined by:
+
+                 T*x = w*x,     y'*T = w*y'
+
+    where y' denotes the conjugate transpose of the vector y.
+
+    If all eigenvectors are requested, the routine may either return the
+    matrices X and/or Y of right or left eigenvectors of T, or the
+    products Q*X and/or Q*Y, where Q is an input orthogonal
+    matrix. If T was obtained from the real-Schur factorization of an
+    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
+    right or left eigenvectors of A.
+
+    T must be in Schur canonical form (as returned by DHSEQR), that is,
+    block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+    2-by-2 diagonal block has its diagonal elements equal and its
+    off-diagonal elements of opposite sign.  Corresponding to each 2-by-2
+    diagonal block is a complex conjugate pair of eigenvalues and
+    eigenvectors; only one eigenvector of the pair is computed, namely
+    the one corresponding to the eigenvalue with positive imaginary part.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  compute right eigenvectors only;
+            = 'L':  compute left eigenvectors only;
+            = 'B':  compute both right and left eigenvectors.
+
+    HOWMNY  (input) CHARACTER*1
+            = 'A':  compute all right and/or left eigenvectors;
+            = 'B':  compute all right and/or left eigenvectors,
+                    and backtransform them using the input matrices
+                    supplied in VR and/or VL;
+            = 'S':  compute selected right and/or left eigenvectors,
+                    specified by the logical array SELECT.
+
+    SELECT  (input/output) LOGICAL array, dimension (N)
+            If HOWMNY = 'S', SELECT specifies the eigenvectors to be
+            computed.
+            If HOWMNY = 'A' or 'B', SELECT is not referenced.
+            To select the real eigenvector corresponding to a real
+            eigenvalue w(j), SELECT(j) must be set to .TRUE..  To select
+            the complex eigenvector corresponding to a complex conjugate
+            pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be
+            set to .TRUE.; then on exit SELECT(j) is .TRUE. and
+            SELECT(j+1) is .FALSE..
+
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
+
+    T       (input) DOUBLE PRECISION array, dimension (LDT,N)
+            The upper quasi-triangular matrix T in Schur canonical form.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
+
+    VL      (input/output) DOUBLE PRECISION array, dimension (LDVL,MM)
+            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+            contain an N-by-N matrix Q (usually the orthogonal matrix Q
+            of Schur vectors returned by DHSEQR).
+            On exit, if SIDE = 'L' or 'B', VL contains:
+            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
+                             VL has the same quasi-lower triangular form
+                             as T'. If T(i,i) is a real eigenvalue, then
+                             the i-th column VL(i) of VL  is its
+                             corresponding eigenvector. If T(i:i+1,i:i+1)
+                             is a 2-by-2 block whose eigenvalues are
+                             complex-conjugate eigenvalues of T, then
+                             VL(i)+sqrt(-1)*VL(i+1) is the complex
+                             eigenvector corresponding to the eigenvalue
+                             with positive real part.
+            if HOWMNY = 'B', the matrix Q*Y;
+            if HOWMNY = 'S', the left eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VL, in the same order as their
+                             eigenvalues.
+            A complex eigenvector corresponding to a complex eigenvalue
+            is stored in two consecutive columns, the first holding the
+            real part, and the second the imaginary part.
+            If SIDE = 'R', VL is not referenced.
+
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= max(1,N) if
+            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+
+    VR      (input/output) DOUBLE PRECISION array, dimension (LDVR,MM)
+            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+            contain an N-by-N matrix Q (usually the orthogonal matrix Q
+            of Schur vectors returned by DHSEQR).
+            On exit, if SIDE = 'R' or 'B', VR contains:
+            if HOWMNY = 'A', the matrix X of right eigenvectors of T;
+                             VR has the same quasi-upper triangular form
+                             as T. If T(i,i) is a real eigenvalue, then
+                             the i-th column VR(i) of VR  is its
+                             corresponding eigenvector. If T(i:i+1,i:i+1)
+                             is a 2-by-2 block whose eigenvalues are
+                             complex-conjugate eigenvalues of T, then
+                             VR(i)+sqrt(-1)*VR(i+1) is the complex
+                             eigenvector corresponding to the eigenvalue
+                             with positive real part.
+            if HOWMNY = 'B', the matrix Q*X;
+            if HOWMNY = 'S', the right eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VR, in the same order as their
+                             eigenvalues.
+            A complex eigenvector corresponding to a complex eigenvalue
+            is stored in two consecutive columns, the first holding the
+            real part and the second the imaginary part.
+            If SIDE = 'L', VR is not referenced.
+
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= max(1,N) if
+            SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+
+    MM      (input) INTEGER
+            The number of columns in the arrays VL and/or VR. MM >= M.
+
+    M       (output) INTEGER
+            The number of columns in the arrays VL and/or VR actually
+            used to store the eigenvectors.
+            If HOWMNY = 'A' or 'B', M is set to N.
+            Each selected real eigenvector occupies one column and each
+            selected complex eigenvector occupies two columns.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The algorithm used in this program is basically backward (forward)
+    substitution, with scaling to make the the code robust against
+    possible overflow.
+
+    Each eigenvector is normalized so that the element of largest
+    magnitude has magnitude 1; here the magnitude of a complex number
+    (x,y) is taken to be |x| + |y|.
+
+    =====================================================================
+
+
+       Decode and test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --select;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+
+    /* Function Body */
+    bothv = lsame_(side, "B");
+    rightv = lsame_(side, "R") || bothv;
+    leftv = lsame_(side, "L") || bothv;
+
+    allv = lsame_(howmny, "A");
+    over = lsame_(howmny, "B");
+    somev = lsame_(howmny, "S");
+
+    *info = 0;
+    if (! rightv && ! leftv) {
+	*info = -1;
+    } else if (! allv && ! over && ! somev) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ldt < max(1,*n)) {
+	*info = -6;
+    } else if (*ldvl < 1 || leftv && *ldvl < *n) {
+	*info = -8;
+    } else if (*ldvr < 1 || rightv && *ldvr < *n) {
+	*info = -10;
+    } else {
+
+/*
+          Set M to the number of columns required to store the selected
+          eigenvectors, standardize the array SELECT if necessary, and
+          test MM.
+*/
+
+	if (somev) {
+	    *m = 0;
+	    pair = FALSE_;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (pair) {
+		    pair = FALSE_;
+		    select[j] = FALSE_;
+		} else {
+		    if (j < *n) {
+			if (t[j + 1 + j * t_dim1] == 0.) {
+			    if (select[j]) {
+				++(*m);
+			    }
+			} else {
+			    pair = TRUE_;
+			    if (select[j] || select[j + 1]) {
+				select[j] = TRUE_;
+				*m += 2;
+			    }
+			}
+		    } else {
+			if (select[*n]) {
+			    ++(*m);
+			}
+		    }
+		}
+/* L10: */
+	    }
+	} else {
+	    *m = *n;
+	}
+
+	if (*mm < *m) {
+	    *info = -11;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DTREVC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Set the constants to control overflow. */
+
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
+    smlnum = unfl * (*n / ulp);
+    bignum = (1. - ulp) / smlnum;
+
+/*
+       Compute 1-norm of each column of strictly upper triangular
+       part of T to control overflow in triangular solver.
+*/
+
+    work[1] = 0.;
+    i__1 = *n;
+    for (j = 2; j <= i__1; ++j) {
+	work[j] = 0.;
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1));
+/* L20: */
+	}
+/* L30: */
+    }
+
+/*
+       Index IP is used to specify the real or complex eigenvalue:
+         IP = 0, real eigenvalue,
+              1, first of conjugate complex pair: (wr,wi)
+             -1, second of conjugate complex pair: (wr,wi)
+*/
+
+    n2 = *n << 1;
+
+    if (rightv) {
+
+/*        Compute right eigenvectors. */
+
+	ip = 0;
+	is = *m;
+	for (ki = *n; ki >= 1; --ki) {
+
+	    if (ip == 1) {
+		goto L130;
+	    }
+	    if (ki == 1) {
+		goto L40;
+	    }
+	    if (t[ki + (ki - 1) * t_dim1] == 0.) {
+		goto L40;
+	    }
+	    ip = -1;
+
+L40:
+	    if (somev) {
+		if (ip == 0) {
+		    if (! select[ki]) {
+			goto L130;
+		    }
+		} else {
+		    if (! select[ki - 1]) {
+			goto L130;
+		    }
+		}
+	    }
+
+/*           Compute the KI-th eigenvalue (WR,WI). */
+
+	    wr = t[ki + ki * t_dim1];
+	    wi = 0.;
+	    if (ip != 0) {
+		wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) *
+			sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2)));
+	    }
+/* Computing MAX */
+	    d__1 = ulp * (abs(wr) + abs(wi));
+	    smin = max(d__1,smlnum);
+
+	    if (ip == 0) {
+
+/*              Real right eigenvector */
+
+		work[ki + *n] = 1.;
+
+/*              Form right-hand side */
+
+		i__1 = ki - 1;
+		for (k = 1; k <= i__1; ++k) {
+		    work[k + *n] = -t[k + ki * t_dim1];
+/* L50: */
+		}
+
+/*
+                Solve the upper quasi-triangular system:
+                   (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK.
+*/
+
+		jnxt = ki - 1;
+		for (j = ki - 1; j >= 1; --j) {
+		    if (j > jnxt) {
+			goto L60;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j - 1;
+		    if (j > 1) {
+			if (t[j + (j - 1) * t_dim1] != 0.) {
+			    j1 = j - 1;
+			    jnxt = j - 2;
+			}
+		    }
+
+		    if (j1 == j2) {
+
+/*                    1-by-1 diagonal block */
+
+			dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b2453, &t[j
+				+ j * t_dim1], ldt, &c_b2453, &c_b2453, &work[
+				j + *n], n, &wr, &c_b2467, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*
+                      Scale X(1,1) to avoid overflow when updating
+                      the right-hand side.
+*/
+
+			if (xnorm > 1.) {
+			    if (work[j] > bignum / xnorm) {
+				x[0] /= xnorm;
+				scale /= xnorm;
+			    }
+			}
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
+			}
+			work[j + *n] = x[0];
+
+/*                    Update right-hand side */
+
+			i__1 = j - 1;
+			d__1 = -x[0];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+
+		    } else {
+
+/*                    2-by-2 diagonal block */
+
+			dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b2453, &t[j
+				- 1 + (j - 1) * t_dim1], ldt, &c_b2453, &
+				c_b2453, &work[j - 1 + *n], n, &wr, &c_b2467,
+				x, &c__2, &scale, &xnorm, &ierr);
+
+/*
+                      Scale X(1,1) and X(2,1) to avoid overflow when
+                      updating the right-hand side.
+*/
+
+			if (xnorm > 1.) {
+/* Computing MAX */
+			    d__1 = work[j - 1], d__2 = work[j];
+			    beta = max(d__1,d__2);
+			    if (beta > bignum / xnorm) {
+				x[0] /= xnorm;
+				x[1] /= xnorm;
+				scale /= xnorm;
+			    }
+			}
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
+			}
+			work[j - 1 + *n] = x[0];
+			work[j + *n] = x[1];
+
+/*                    Update right-hand side */
+
+			i__1 = j - 2;
+			d__1 = -x[0];
+			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+				&work[*n + 1], &c__1);
+			i__1 = j - 2;
+			d__1 = -x[1];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+		    }
+L60:
+		    ;
+		}
+
+/*              Copy the vector x or Q*x to VR and normalize. */
+
+		if (! over) {
+		    dcopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], &
+			    c__1);
+
+		    ii = idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
+		    remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1));
+		    dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+
+		    i__1 = *n;
+		    for (k = ki + 1; k <= i__1; ++k) {
+			vr[k + is * vr_dim1] = 0.;
+/* L70: */
+		    }
+		} else {
+		    if (ki > 1) {
+			i__1 = ki - 1;
+			dgemv_("N", n, &i__1, &c_b2453, &vr[vr_offset], ldvr,
+				&work[*n + 1], &c__1, &work[ki + *n], &vr[ki *
+				 vr_dim1 + 1], &c__1);
+		    }
+
+		    ii = idamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
+		    remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1));
+		    dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+		}
+
+	    } else {
+
+/*
+                Complex right eigenvector.
+
+                Initial solve
+                  [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
+                  [ (T(KI,KI-1)   T(KI,KI)   )               ]
+*/
+
+		if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= (d__2 = t[
+			ki + (ki - 1) * t_dim1], abs(d__2))) {
+		    work[ki - 1 + *n] = 1.;
+		    work[ki + n2] = wi / t[ki - 1 + ki * t_dim1];
+		} else {
+		    work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1];
+		    work[ki + n2] = 1.;
+		}
+		work[ki + *n] = 0.;
+		work[ki - 1 + n2] = 0.;
+
+/*              Form right-hand side */
+
+		i__1 = ki - 2;
+		for (k = 1; k <= i__1; ++k) {
+		    work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) *
+			    t_dim1];
+		    work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1];
+/* L80: */
+		}
+
+/*
+                Solve upper quasi-triangular system:
+                (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
+*/
+
+		jnxt = ki - 2;
+		for (j = ki - 2; j >= 1; --j) {
+		    if (j > jnxt) {
+			goto L90;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j - 1;
+		    if (j > 1) {
+			if (t[j + (j - 1) * t_dim1] != 0.) {
+			    j1 = j - 1;
+			    jnxt = j - 2;
+			}
+		    }
+
+		    if (j1 == j2) {
+
+/*                    1-by-1 diagonal block */
+
+			dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b2453, &t[j
+				+ j * t_dim1], ldt, &c_b2453, &c_b2453, &work[
+				j + *n], n, &wr, &wi, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*
+                      Scale X(1,1) and X(1,2) to avoid overflow when
+                      updating the right-hand side.
+*/
+
+			if (xnorm > 1.) {
+			    if (work[j] > bignum / xnorm) {
+				x[0] /= xnorm;
+				x[2] /= xnorm;
+				scale /= xnorm;
+			    }
+			}
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
+			    dscal_(&ki, &scale, &work[n2 + 1], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + n2] = x[2];
+
+/*                    Update the right-hand side */
+
+			i__1 = j - 1;
+			d__1 = -x[0];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+			i__1 = j - 1;
+			d__1 = -x[2];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
+				n2 + 1], &c__1);
+
+		    } else {
+
+/*                    2-by-2 diagonal block */
+
+			dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b2453, &t[j
+				- 1 + (j - 1) * t_dim1], ldt, &c_b2453, &
+				c_b2453, &work[j - 1 + *n], n, &wr, &wi, x, &
+				c__2, &scale, &xnorm, &ierr);
+
+/*
+                      Scale X to avoid overflow when updating
+                      the right-hand side.
+*/
+
+			if (xnorm > 1.) {
+/* Computing MAX */
+			    d__1 = work[j - 1], d__2 = work[j];
+			    beta = max(d__1,d__2);
+			    if (beta > bignum / xnorm) {
+				rec = 1. / xnorm;
+				x[0] *= rec;
+				x[2] *= rec;
+				x[1] *= rec;
+				x[3] *= rec;
+				scale *= rec;
+			    }
+			}
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
+			    dscal_(&ki, &scale, &work[n2 + 1], &c__1);
+			}
+			work[j - 1 + *n] = x[0];
+			work[j + *n] = x[1];
+			work[j - 1 + n2] = x[2];
+			work[j + n2] = x[3];
+
+/*                    Update the right-hand side */
+
+			i__1 = j - 2;
+			d__1 = -x[0];
+			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+				&work[*n + 1], &c__1);
+			i__1 = j - 2;
+			d__1 = -x[1];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+			i__1 = j - 2;
+			d__1 = -x[2];
+			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+				&work[n2 + 1], &c__1);
+			i__1 = j - 2;
+			d__1 = -x[3];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
+				n2 + 1], &c__1);
+		    }
+L90:
+		    ;
+		}
+
+/*              Copy the vector x or Q*x to VR and normalize. */
+
+		if (! over) {
+		    dcopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1
+			    + 1], &c__1);
+		    dcopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], &
+			    c__1);
+
+		    emax = 0.;
+		    i__1 = ki;
+		    for (k = 1; k <= i__1; ++k) {
+/* Computing MAX */
+			d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1]
+				, abs(d__1)) + (d__2 = vr[k + is * vr_dim1],
+				abs(d__2));
+			emax = max(d__3,d__4);
+/* L100: */
+		    }
+
+		    remax = 1. / emax;
+		    dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1);
+		    dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+
+		    i__1 = *n;
+		    for (k = ki + 1; k <= i__1; ++k) {
+			vr[k + (is - 1) * vr_dim1] = 0.;
+			vr[k + is * vr_dim1] = 0.;
+/* L110: */
+		    }
+
+		} else {
+
+		    if (ki > 2) {
+			i__1 = ki - 2;
+			dgemv_("N", n, &i__1, &c_b2453, &vr[vr_offset], ldvr,
+				&work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[
+				(ki - 1) * vr_dim1 + 1], &c__1);
+			i__1 = ki - 2;
+			dgemv_("N", n, &i__1, &c_b2453, &vr[vr_offset], ldvr,
+				&work[n2 + 1], &c__1, &work[ki + n2], &vr[ki *
+				 vr_dim1 + 1], &c__1);
+		    } else {
+			dscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1
+				+ 1], &c__1);
+			dscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], &
+				c__1);
+		    }
+
+		    emax = 0.;
+		    i__1 = *n;
+		    for (k = 1; k <= i__1; ++k) {
+/* Computing MAX */
+			d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1]
+				, abs(d__1)) + (d__2 = vr[k + ki * vr_dim1],
+				abs(d__2));
+			emax = max(d__3,d__4);
+/* L120: */
+		    }
+		    remax = 1. / emax;
+		    dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1);
+		    dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+		}
+	    }
+
+	    --is;
+	    if (ip != 0) {
+		--is;
+	    }
+L130:
+	    if (ip == 1) {
+		ip = 0;
+	    }
+	    if (ip == -1) {
+		ip = 1;
+	    }
+/* L140: */
+	}
+    }
+
+    if (leftv) {
+
+/*        Compute left eigenvectors. */
+
+	ip = 0;
+	is = 1;
+	i__1 = *n;
+	for (ki = 1; ki <= i__1; ++ki) {
+
+	    if (ip == -1) {
+		goto L250;
+	    }
+	    if (ki == *n) {
+		goto L150;
+	    }
+	    if (t[ki + 1 + ki * t_dim1] == 0.) {
+		goto L150;
+	    }
+	    ip = 1;
+
+L150:
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L250;
+		}
+	    }
+
+/*           Compute the KI-th eigenvalue (WR,WI). */
+
+	    wr = t[ki + ki * t_dim1];
+	    wi = 0.;
+	    if (ip != 0) {
+		wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) *
+			sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2)));
+	    }
+/* Computing MAX */
+	    d__1 = ulp * (abs(wr) + abs(wi));
+	    smin = max(d__1,smlnum);
+
+	    if (ip == 0) {
+
+/*              Real left eigenvector. */
+
+		work[ki + *n] = 1.;
+
+/*              Form right-hand side */
+
+		i__2 = *n;
+		for (k = ki + 1; k <= i__2; ++k) {
+		    work[k + *n] = -t[ki + k * t_dim1];
+/* L160: */
+		}
+
+/*
+                Solve the quasi-triangular system:
+                   (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK
+*/
+
+		vmax = 1.;
+		vcrit = bignum;
+
+		jnxt = ki + 1;
+		i__2 = *n;
+		for (j = ki + 1; j <= i__2; ++j) {
+		    if (j < jnxt) {
+			goto L170;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j + 1;
+		    if (j < *n) {
+			if (t[j + 1 + j * t_dim1] != 0.) {
+			    j2 = j + 1;
+			    jnxt = j + 2;
+			}
+		    }
+
+		    if (j1 == j2) {
+
+/*
+                      1-by-1 diagonal block
+
+                      Scale if necessary to avoid overflow when forming
+                      the right-hand side.
+*/
+
+			if (work[j] > vcrit) {
+			    rec = 1. / vmax;
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    vmax = 1.;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 1;
+			work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1],
+				&c__1, &work[ki + 1 + *n], &c__1);
+
+/*                    Solve (T(J,J)-WR)'*X = WORK */
+
+			dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b2453, &t[j
+				+ j * t_dim1], ldt, &c_b2453, &c_b2453, &work[
+				j + *n], n, &wr, &c_b2467, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			}
+			work[j + *n] = x[0];
+/* Computing MAX */
+			d__2 = (d__1 = work[j + *n], abs(d__1));
+			vmax = max(d__2,vmax);
+			vcrit = bignum / vmax;
+
+		    } else {
+
+/*
+                      2-by-2 diagonal block
+
+                      Scale if necessary to avoid overflow when forming
+                      the right-hand side.
+
+   Computing MAX
+*/
+			d__1 = work[j], d__2 = work[j + 1];
+			beta = max(d__1,d__2);
+			if (beta > vcrit) {
+			    rec = 1. / vmax;
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    vmax = 1.;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 1;
+			work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1],
+				&c__1, &work[ki + 1 + *n], &c__1);
+
+			i__3 = j - ki - 1;
+			work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 1 + (j + 1) *
+				 t_dim1], &c__1, &work[ki + 1 + *n], &c__1);
+
+/*
+                      Solve
+                        [T(J,J)-WR   T(J,J+1)     ]'* X = SCALE*( WORK1 )
+                        [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 )
+*/
+
+			dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b2453, &t[j
+				+ j * t_dim1], ldt, &c_b2453, &c_b2453, &work[
+				j + *n], n, &wr, &c_b2467, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + 1 + *n] = x[1];
+
+/* Computing MAX */
+			d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2
+				= work[j + 1 + *n], abs(d__2)), d__3 = max(
+				d__3,d__4);
+			vmax = max(d__3,vmax);
+			vcrit = bignum / vmax;
+
+		    }
+L170:
+		    ;
+		}
+
+/*              Copy the vector x or Q*x to VL and normalize. */
+
+		if (! over) {
+		    i__2 = *n - ki + 1;
+		    dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
+			    vl_dim1], &c__1);
+
+		    i__2 = *n - ki + 1;
+		    ii = idamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki -
+			    1;
+		    remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1));
+		    i__2 = *n - ki + 1;
+		    dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+
+		    i__2 = ki - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			vl[k + is * vl_dim1] = 0.;
+/* L180: */
+		    }
+
+		} else {
+
+		    if (ki < *n) {
+			i__2 = *n - ki;
+			dgemv_("N", n, &i__2, &c_b2453, &vl[(ki + 1) *
+				vl_dim1 + 1], ldvl, &work[ki + 1 + *n], &c__1,
+				 &work[ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
+		    }
+
+		    ii = idamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
+		    remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1));
+		    dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+
+		}
+
+	    } else {
+
+/*
+                Complex left eigenvector.
+
+                 Initial solve:
+                   ((T(KI,KI)    T(KI,KI+1) )' - (WR - I* WI))*X = 0.
+                   ((T(KI+1,KI) T(KI+1,KI+1))                )
+*/
+
+		if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= (d__2 =
+			t[ki + 1 + ki * t_dim1], abs(d__2))) {
+		    work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1];
+		    work[ki + 1 + n2] = 1.;
+		} else {
+		    work[ki + *n] = 1.;
+		    work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1];
+		}
+		work[ki + 1 + *n] = 0.;
+		work[ki + n2] = 0.;
+
+/*              Form right-hand side */
+
+		i__2 = *n;
+		for (k = ki + 2; k <= i__2; ++k) {
+		    work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1];
+		    work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1]
+			    ;
+/* L190: */
+		}
+
+/*
+                Solve complex quasi-triangular system:
+                ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
+*/
+
+		vmax = 1.;
+		vcrit = bignum;
+
+		jnxt = ki + 2;
+		i__2 = *n;
+		for (j = ki + 2; j <= i__2; ++j) {
+		    if (j < jnxt) {
+			goto L200;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j + 1;
+		    if (j < *n) {
+			if (t[j + 1 + j * t_dim1] != 0.) {
+			    j2 = j + 1;
+			    jnxt = j + 2;
+			}
+		    }
+
+		    if (j1 == j2) {
+
+/*
+                      1-by-1 diagonal block
+
+                      Scale if necessary to avoid overflow when
+                      forming the right-hand side elements.
+*/
+
+			if (work[j] > vcrit) {
+			    rec = 1. / vmax;
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &rec, &work[ki + n2], &c__1);
+			    vmax = 1.;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 2;
+			work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + *n], &c__1);
+			i__3 = j - ki - 2;
+			work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + n2], &c__1);
+
+/*                    Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */
+
+			d__1 = -wi;
+			dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b2453, &t[j
+				+ j * t_dim1], ldt, &c_b2453, &c_b2453, &work[
+				j + *n], n, &wr, &d__1, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &scale, &work[ki + n2], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + n2] = x[2];
+/* Computing MAX */
+			d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2
+				= work[j + n2], abs(d__2)), d__3 = max(d__3,
+				d__4);
+			vmax = max(d__3,vmax);
+			vcrit = bignum / vmax;
+
+		    } else {
+
+/*
+                      2-by-2 diagonal block
+
+                      Scale if necessary to avoid overflow when forming
+                      the right-hand side elements.
+
+   Computing MAX
+*/
+			d__1 = work[j], d__2 = work[j + 1];
+			beta = max(d__1,d__2);
+			if (beta > vcrit) {
+			    rec = 1. / vmax;
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &rec, &work[ki + n2], &c__1);
+			    vmax = 1.;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 2;
+			work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + *n], &c__1);
+
+			i__3 = j - ki - 2;
+			work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + n2], &c__1);
+
+			i__3 = j - ki - 2;
+			work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 2 + (j + 1) *
+				 t_dim1], &c__1, &work[ki + 2 + *n], &c__1);
+
+			i__3 = j - ki - 2;
+			work[j + 1 + n2] -= ddot_(&i__3, &t[ki + 2 + (j + 1) *
+				 t_dim1], &c__1, &work[ki + 2 + n2], &c__1);
+
+/*
+                      Solve 2-by-2 complex linear equation
+                        ([T(j,j)   T(j,j+1)  ]'-(wr-i*wi)*I)*X = SCALE*B
+                        ([T(j+1,j) T(j+1,j+1)]             )
+*/
+
+			d__1 = -wi;
+			dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b2453, &t[j
+				+ j * t_dim1], ldt, &c_b2453, &c_b2453, &work[
+				j + *n], n, &wr, &d__1, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &scale, &work[ki + n2], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + n2] = x[2];
+			work[j + 1 + *n] = x[1];
+			work[j + 1 + n2] = x[3];
+/* Computing MAX */
+			d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1,
+				d__2), d__2 = abs(x[1]), d__1 = max(d__1,d__2)
+				, d__2 = abs(x[3]), d__1 = max(d__1,d__2);
+			vmax = max(d__1,vmax);
+			vcrit = bignum / vmax;
+
+		    }
+L200:
+		    ;
+		}
+
+/*
+                Copy the vector x or Q*x to VL and normalize.
+
+   L210:
+*/
+		if (! over) {
+		    i__2 = *n - ki + 1;
+		    dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
+			    vl_dim1], &c__1);
+		    i__2 = *n - ki + 1;
+		    dcopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) *
+			    vl_dim1], &c__1);
+
+		    emax = 0.;
+		    i__2 = *n;
+		    for (k = ki; k <= i__2; ++k) {
+/* Computing MAX */
+			d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs(
+				d__1)) + (d__2 = vl[k + (is + 1) * vl_dim1],
+				abs(d__2));
+			emax = max(d__3,d__4);
+/* L220: */
+		    }
+		    remax = 1. / emax;
+		    i__2 = *n - ki + 1;
+		    dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+		    i__2 = *n - ki + 1;
+		    dscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1)
+			    ;
+
+		    i__2 = ki - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			vl[k + is * vl_dim1] = 0.;
+			vl[k + (is + 1) * vl_dim1] = 0.;
+/* L230: */
+		    }
+		} else {
+		    if (ki < *n - 1) {
+			i__2 = *n - ki - 1;
+			dgemv_("N", n, &i__2, &c_b2453, &vl[(ki + 2) *
+				vl_dim1 + 1], ldvl, &work[ki + 2 + *n], &c__1,
+				 &work[ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
+			i__2 = *n - ki - 1;
+			dgemv_("N", n, &i__2, &c_b2453, &vl[(ki + 2) *
+				vl_dim1 + 1], ldvl, &work[ki + 2 + n2], &c__1,
+				 &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 +
+				1], &c__1);
+		    } else {
+			dscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], &
+				c__1);
+			dscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1
+				+ 1], &c__1);
+		    }
+
+		    emax = 0.;
+		    i__2 = *n;
+		    for (k = 1; k <= i__2; ++k) {
+/* Computing MAX */
+			d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs(
+				d__1)) + (d__2 = vl[k + (ki + 1) * vl_dim1],
+				abs(d__2));
+			emax = max(d__3,d__4);
+/* L240: */
+		    }
+		    remax = 1. / emax;
+		    dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+		    dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1);
+
+		}
+
+	    }
+
+	    ++is;
+	    if (ip != 0) {
+		++is;
+	    }
+L250:
+	    if (ip == -1) {
+		ip = 0;
+	    }
+	    if (ip == 1) {
+		ip = -1;
+	    }
+
+/* L260: */
+	}
+
+    }
+
+    return 0;
+
+/*     End of DTREVC */
+
+} /* dtrevc_ */
+
+/* Subroutine */ int dtrti2_(char *uplo, char *diag, integer *n, doublereal *
+	a, integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer j;
+    static doublereal ajj;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    static logical upper;
+    extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *,
+	    doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DTRTI2 computes the inverse of a real upper or lower triangular
+    matrix.
+
+    This is the Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the matrix A is upper or lower triangular.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    DIAG    (input) CHARACTER*1
+            Specifies whether or not the matrix A is unit triangular.
+            = 'N':  Non-unit triangular
+            = 'U':  Unit triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading n by n upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DTRTI2", &i__1);
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute inverse of upper triangular matrix. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (nounit) {
+		a[j + j * a_dim1] = 1. / a[j + j * a_dim1];
+		ajj = -a[j + j * a_dim1];
+	    } else {
+		ajj = -1.;
+	    }
+
+/*           Compute elements 1:j-1 of j-th column. */
+
+	    i__2 = j - 1;
+	    dtrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, &
+		    a[j * a_dim1 + 1], &c__1);
+	    i__2 = j - 1;
+	    dscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1);
+/* L10: */
+	}
+    } else {
+
+/*        Compute inverse of lower triangular matrix. */
+
+	for (j = *n; j >= 1; --j) {
+	    if (nounit) {
+		a[j + j * a_dim1] = 1. / a[j + j * a_dim1];
+		ajj = -a[j + j * a_dim1];
+	    } else {
+		ajj = -1.;
+	    }
+	    if (j < *n) {
+
+/*              Compute elements j+1:n of j-th column. */
+
+		i__1 = *n - j;
+		dtrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j +
+			1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1);
+		i__1 = *n - j;
+		dscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of DTRTI2 */
+
+} /* dtrti2_ */
+
+/* Subroutine */ int dtrtri_(char *uplo, char *diag, integer *n, doublereal *
+	a, integer *lda, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer j, jb, nb, nn;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), dtrsm_(
+	    char *, char *, char *, char *, integer *, integer *, doublereal *
+	    , doublereal *, integer *, doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int dtrti2_(char *, char *, integer *, doublereal
+	    *, integer *, integer *), xerbla_(char *, integer
+	    *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical nounit;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DTRTRI computes the inverse of a real upper or lower triangular
+    matrix A.
+
+    This is the Level 3 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  A is upper triangular;
+            = 'L':  A is lower triangular.
+
+    DIAG    (input) CHARACTER*1
+            = 'N':  A is non-unit triangular;
+            = 'U':  A is unit triangular.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
+                 matrix is singular and its inverse can not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DTRTRI", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Check for singularity if non-unit. */
+
+    if (nounit) {
+	i__1 = *n;
+	for (*info = 1; *info <= i__1; ++(*info)) {
+	    if (a[*info + *info * a_dim1] == 0.) {
+		return 0;
+	    }
+/* L10: */
+	}
+	*info = 0;
+    }
+
+/*
+       Determine the block size for this environment.
+
+   Writing concatenation
+*/
+    i__2[0] = 1, a__1[0] = uplo;
+    i__2[1] = 1, a__1[1] = diag;
+    s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
+    nb = ilaenv_(&c__1, "DTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (nb <= 1 || nb >= *n) {
+
+/*        Use unblocked code */
+
+	dtrti2_(uplo, diag, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code */
+
+	if (upper) {
+
+/*           Compute inverse of upper triangular matrix */
+
+	    i__1 = *n;
+	    i__3 = nb;
+	    for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) {
+/* Computing MIN */
+		i__4 = nb, i__5 = *n - j + 1;
+		jb = min(i__4,i__5);
+
+/*              Compute rows 1:j-1 of current block column */
+
+		i__4 = j - 1;
+		dtrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
+			c_b2453, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
+		i__4 = j - 1;
+		dtrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
+			c_b2589, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1],
+			lda);
+
+/*              Compute inverse of current diagonal block */
+
+		dtrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L20: */
+	    }
+	} else {
+
+/*           Compute inverse of lower triangular matrix */
+
+	    nn = (*n - 1) / nb * nb + 1;
+	    i__3 = -nb;
+	    for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) {
+/* Computing MIN */
+		i__1 = nb, i__4 = *n - j + 1;
+		jb = min(i__1,i__4);
+		if (j + jb <= *n) {
+
+/*                 Compute rows j+jb:n of current block column */
+
+		    i__1 = *n - j - jb + 1;
+		    dtrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
+			    &c_b2453, &a[j + jb + (j + jb) * a_dim1], lda, &a[
+			    j + jb + j * a_dim1], lda);
+		    i__1 = *n - j - jb + 1;
+		    dtrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
+			     &c_b2589, &a[j + j * a_dim1], lda, &a[j + jb + j
+			    * a_dim1], lda);
+		}
+
+/*              Compute inverse of current diagonal block */
+
+		dtrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L30: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DTRTRI */
+
+} /* dtrtri_ */
+
+integer ieeeck_(integer *ispec, real *zero, real *one)
+{
+    /* System generated locals */
+    integer ret_val;
+
+    /* Local variables */
+    static real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro,
+	    newzro;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1998
+
+
+    Purpose
+    =======
+
+    IEEECK is called from the ILAENV to verify that Infinity and
+    possibly NaN arithmetic is safe (i.e. will not trap).
+
+    Arguments
+    =========
+
+    ISPEC   (input) INTEGER
+            Specifies whether to test just for inifinity arithmetic
+            or whether to test for infinity and NaN arithmetic.
+            = 0: Verify infinity arithmetic only.
+            = 1: Verify infinity and NaN arithmetic.
+
+    ZERO    (input) REAL
+            Must contain the value 0.0
+            This is passed to prevent the compiler from optimizing
+            away this code.
+
+    ONE     (input) REAL
+            Must contain the value 1.0
+            This is passed to prevent the compiler from optimizing
+            away this code.
+
+    RETURN VALUE:  INTEGER
+            = 0:  Arithmetic failed to produce the correct answers
+            = 1:  Arithmetic produced the correct answers
+*/
+
+    ret_val = 1;
+
+    posinf = *one / *zero;
+    if (posinf <= *one) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    neginf = -(*one) / *zero;
+    if (neginf >= *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    negzro = *one / (neginf + *one);
+    if (negzro != *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    neginf = *one / negzro;
+    if (neginf >= *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    newzro = negzro + *zero;
+    if (newzro != *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    posinf = *one / newzro;
+    if (posinf <= *one) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    neginf *= posinf;
+    if (neginf >= *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    posinf *= posinf;
+    if (posinf <= *one) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+
+/*     Return if we were only asked to check infinity arithmetic */
+
+    if (*ispec == 0) {
+	return ret_val;
+    }
+
+    nan1 = posinf + neginf;
+
+    nan2 = posinf / neginf;
+
+    nan3 = posinf / posinf;
+
+    nan4 = posinf * *zero;
+
+    nan5 = neginf * negzro;
+
+    nan6 = nan5 * 0.f;
+
+    if (nan1 == nan1) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan2 == nan2) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan3 == nan3) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan4 == nan4) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan5 == nan5) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan6 == nan6) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    return ret_val;
+} /* ieeeck_ */
+
+integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
+	integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen
+	opts_len)
+{
+    /* System generated locals */
+    integer ret_val;
+
+    /* Builtin functions */
+    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
+    integer s_cmp(char *, char *, ftnlen, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static char c1[1], c2[2], c3[3], c4[2];
+    static integer ic, nb, iz, nx;
+    static logical cname, sname;
+    static integer nbmin;
+    extern integer ieeeck_(integer *, real *, real *);
+    static char subnam[6];
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ILAENV is called from the LAPACK routines to choose problem-dependent
+    parameters for the local environment.  See ISPEC for a description of
+    the parameters.
+
+    This version provides a set of parameters which should give good,
+    but not optimal, performance on many of the currently available
+    computers.  Users are encouraged to modify this subroutine to set
+    the tuning parameters for their particular machine using the option
+    and problem size information in the arguments.
+
+    This routine will not function correctly if it is converted to all
+    lower case.  Converting it to all upper case is allowed.
+
+    Arguments
+    =========
+
+    ISPEC   (input) INTEGER
+            Specifies the parameter to be returned as the value of
+            ILAENV.
+            = 1: the optimal blocksize; if this value is 1, an unblocked
+                 algorithm will give the best performance.
+            = 2: the minimum block size for which the block routine
+                 should be used; if the usable block size is less than
+                 this value, an unblocked routine should be used.
+            = 3: the crossover point (in a block routine, for N less
+                 than this value, an unblocked routine should be used)
+            = 4: the number of shifts, used in the nonsymmetric
+                 eigenvalue routines
+            = 5: the minimum column dimension for blocking to be used;
+                 rectangular blocks must have dimension at least k by m,
+                 where k is given by ILAENV(2,...) and m by ILAENV(5,...)
+            = 6: the crossover point for the SVD (when reducing an m by n
+                 matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds
+                 this value, a QR factorization is used first to reduce
+                 the matrix to a triangular form.)
+            = 7: the number of processors
+            = 8: the crossover point for the multishift QR and QZ methods
+                 for nonsymmetric eigenvalue problems.
+            = 9: maximum size of the subproblems at the bottom of the
+                 computation tree in the divide-and-conquer algorithm
+                 (used by xGELSD and xGESDD)
+            =10: ieee NaN arithmetic can be trusted not to trap
+            =11: infinity arithmetic can be trusted not to trap
+
+    NAME    (input) CHARACTER*(*)
+            The name of the calling subroutine, in either upper case or
+            lower case.
+
+    OPTS    (input) CHARACTER*(*)
+            The character options to the subroutine NAME, concatenated
+            into a single character string.  For example, UPLO = 'U',
+            TRANS = 'T', and DIAG = 'N' for a triangular routine would
+            be specified as OPTS = 'UTN'.
+
+    N1      (input) INTEGER
+    N2      (input) INTEGER
+    N3      (input) INTEGER
+    N4      (input) INTEGER
+            Problem dimensions for the subroutine NAME; these may not all
+            be required.
+
+   (ILAENV) (output) INTEGER
+            >= 0: the value of the parameter specified by ISPEC
+            < 0:  if ILAENV = -k, the k-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The following conventions have been used when calling ILAENV from the
+    LAPACK routines:
+    1)  OPTS is a concatenation of all of the character options to
+        subroutine NAME, in the same order that they appear in the
+        argument list for NAME, even if they are not used in determining
+        the value of the parameter specified by ISPEC.
+    2)  The problem dimensions N1, N2, N3, N4 are specified in the order
+        that they appear in the argument list for NAME.  N1 is used
+        first, N2 second, and so on, and unused problem dimensions are
+        passed a value of -1.
+    3)  The parameter value returned by ILAENV is checked for validity in
+        the calling subroutine.  For example, ILAENV is used to retrieve
+        the optimal blocksize for STRTRI as follows:
+
+        NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 )
+        IF( NB.LE.1 ) NB = MAX( 1, N )
+
+    =====================================================================
+*/
+
+
+    switch (*ispec) {
+	case 1:  goto L100;
+	case 2:  goto L100;
+	case 3:  goto L100;
+	case 4:  goto L400;
+	case 5:  goto L500;
+	case 6:  goto L600;
+	case 7:  goto L700;
+	case 8:  goto L800;
+	case 9:  goto L900;
+	case 10:  goto L1000;
+	case 11:  goto L1100;
+    }
+
+/*     Invalid value for ISPEC */
+
+    ret_val = -1;
+    return ret_val;
+
+L100:
+
+/*     Convert NAME to upper case if the first character is lower case. */
+
+    ret_val = 1;
+    s_copy(subnam, name__, (ftnlen)6, name_len);
+    ic = *(unsigned char *)subnam;
+    iz = 'Z';
+    if (iz == 90 || iz == 122) {
+
+/*        ASCII character set */
+
+	if (ic >= 97 && ic <= 122) {
+	    *(unsigned char *)subnam = (char) (ic - 32);
+	    for (i__ = 2; i__ <= 6; ++i__) {
+		ic = *(unsigned char *)&subnam[i__ - 1];
+		if (ic >= 97 && ic <= 122) {
+		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
+		}
+/* L10: */
+	    }
+	}
+
+    } else if (iz == 233 || iz == 169) {
+
+/*        EBCDIC character set */
+
+	if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 &&
+		ic <= 169) {
+	    *(unsigned char *)subnam = (char) (ic + 64);
+	    for (i__ = 2; i__ <= 6; ++i__) {
+		ic = *(unsigned char *)&subnam[i__ - 1];
+		if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >=
+			162 && ic <= 169) {
+		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64);
+		}
+/* L20: */
+	    }
+	}
+
+    } else if (iz == 218 || iz == 250) {
+
+/*        Prime machines:  ASCII+128 */
+
+	if (ic >= 225 && ic <= 250) {
+	    *(unsigned char *)subnam = (char) (ic - 32);
+	    for (i__ = 2; i__ <= 6; ++i__) {
+		ic = *(unsigned char *)&subnam[i__ - 1];
+		if (ic >= 225 && ic <= 250) {
+		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
+		}
+/* L30: */
+	    }
+	}
+    }
+
+    *(unsigned char *)c1 = *(unsigned char *)subnam;
+    sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D';
+    cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z';
+    if (! (cname || sname)) {
+	return ret_val;
+    }
+    s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2);
+    s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3);
+    s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2);
+
+    switch (*ispec) {
+	case 1:  goto L110;
+	case 2:  goto L200;
+	case 3:  goto L300;
+    }
+
+L110:
+
+/*
+       ISPEC = 1:  block size
+
+       In these examples, separate code is provided for setting NB for
+       real and complex.  We assume that NB will take the same value in
+       single or double precision.
+*/
+
+    nb = 1;
+
+    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	} else if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3,
+		"RQF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)
+		3, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3)
+		== 0) {
+	    if (sname) {
+		nb = 32;
+	    } else {
+		nb = 32;
+	    }
+	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 32;
+	    } else {
+		nb = 32;
+	    }
+	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 32;
+	    } else {
+		nb = 32;
+	    }
+	} else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	}
+    } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	}
+    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	} else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 32;
+	} else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 64;
+	}
+    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 64;
+	} else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 32;
+	} else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 64;
+	}
+    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nb = 32;
+	    }
+	} else if (*(unsigned char *)c3 == 'M') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nb = 32;
+	    }
+	}
+    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nb = 32;
+	    }
+	} else if (*(unsigned char *)c3 == 'M') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nb = 32;
+	    }
+	}
+    } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		if (*n4 <= 64) {
+		    nb = 1;
+		} else {
+		    nb = 32;
+		}
+	    } else {
+		if (*n4 <= 64) {
+		    nb = 1;
+		} else {
+		    nb = 32;
+		}
+	    }
+	}
+    } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		if (*n2 <= 64) {
+		    nb = 1;
+		} else {
+		    nb = 32;
+		}
+	    } else {
+		if (*n2 <= 64) {
+		    nb = 1;
+		} else {
+		    nb = 32;
+		}
+	    }
+	}
+    } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	}
+    } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	}
+    } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 1;
+	}
+    }
+    ret_val = nb;
+    return ret_val;
+
+L200:
+
+/*     ISPEC = 2:  minimum block size */
+
+    nbmin = 2;
+    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
+		ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, (
+		ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0)
+		 {
+	    if (sname) {
+		nbmin = 2;
+	    } else {
+		nbmin = 2;
+	    }
+	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nbmin = 2;
+	    } else {
+		nbmin = 2;
+	    }
+	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nbmin = 2;
+	    } else {
+		nbmin = 2;
+	    }
+	} else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nbmin = 2;
+	    } else {
+		nbmin = 2;
+	    }
+	}
+    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nbmin = 8;
+	    } else {
+		nbmin = 8;
+	    }
+	} else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nbmin = 2;
+	}
+    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nbmin = 2;
+	}
+    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nbmin = 2;
+	    }
+	} else if (*(unsigned char *)c3 == 'M') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nbmin = 2;
+	    }
+	}
+    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nbmin = 2;
+	    }
+	} else if (*(unsigned char *)c3 == 'M') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nbmin = 2;
+	    }
+	}
+    }
+    ret_val = nbmin;
+    return ret_val;
+
+L300:
+
+/*     ISPEC = 3:  crossover point */
+
+    nx = 0;
+    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
+		ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, (
+		ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0)
+		 {
+	    if (sname) {
+		nx = 128;
+	    } else {
+		nx = 128;
+	    }
+	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nx = 128;
+	    } else {
+		nx = 128;
+	    }
+	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nx = 128;
+	    } else {
+		nx = 128;
+	    }
+	}
+    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
+	if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nx = 32;
+	}
+    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nx = 32;
+	}
+    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nx = 128;
+	    }
+	}
+    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
+		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
+		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
+		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
+		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
+		    ftnlen)2, (ftnlen)2) == 0) {
+		nx = 128;
+	    }
+	}
+    }
+    ret_val = nx;
+    return ret_val;
+
+L400:
+
+/*     ISPEC = 4:  number of shifts (used by xHSEQR) */
+
+    ret_val = 6;
+    return ret_val;
+
+L500:
+
+/*     ISPEC = 5:  minimum column dimension (not used) */
+
+    ret_val = 2;
+    return ret_val;
+
+L600:
+
+/*     ISPEC = 6:  crossover point for SVD (used by xGELSS and xGESVD) */
+
+    ret_val = (integer) ((real) min(*n1,*n2) * 1.6f);
+    return ret_val;
+
+L700:
+
+/*     ISPEC = 7:  number of processors (not used) */
+
+    ret_val = 1;
+    return ret_val;
+
+L800:
+
+/*     ISPEC = 8:  crossover point for multishift (used by xHSEQR) */
+
+    ret_val = 50;
+    return ret_val;
+
+L900:
+
+/*
+       ISPEC = 9:  maximum size of the subproblems at the bottom of the
+                   computation tree in the divide-and-conquer algorithm
+                   (used by xGELSD and xGESDD)
+*/
+
+    ret_val = 25;
+    return ret_val;
+
+L1000:
+
+/*
+       ISPEC = 10: ieee NaN arithmetic can be trusted not to trap
+
+       ILAENV = 0
+*/
+    ret_val = 1;
+    if (ret_val == 1) {
+	ret_val = ieeeck_(&c__0, &c_b1101, &c_b871);
+    }
+    return ret_val;
+
+L1100:
+
+/*
+       ISPEC = 11: infinity arithmetic can be trusted not to trap
+
+       ILAENV = 0
+*/
+    ret_val = 1;
+    if (ret_val == 1) {
+	ret_val = ieeeck_(&c__1, &c_b1101, &c_b871);
+    }
+    return ret_val;
+
+/*     End of ILAENV */
+
+} /* ilaenv_ */
+
+/* Subroutine */ int sbdsdc_(char *uplo, char *compq, integer *n, real *d__,
+	real *e, real *u, integer *ldu, real *vt, integer *ldvt, real *q,
+	integer *iq, real *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
+    real r__1;
+
+    /* Builtin functions */
+    double r_sign(real *, real *), log(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k;
+    static real p, r__;
+    static integer z__, ic, ii, kk;
+    static real cs;
+    static integer is, iu;
+    static real sn;
+    static integer nm1;
+    static real eps;
+    static integer ivt, difl, difr, ierr, perm, mlvl, sqre;
+    extern logical lsame_(char *, char *);
+    static integer poles;
+    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *,
+	    integer *, real *, real *, real *, integer *);
+    static integer iuplo, nsize, start;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), sswap_(integer *, real *, integer *, real *, integer *
+	    ), slasd0_(integer *, integer *, real *, real *, real *, integer *
+	    , real *, integer *, integer *, integer *, real *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int slasda_(integer *, integer *, integer *,
+	    integer *, real *, real *, real *, integer *, real *, integer *,
+	    real *, real *, real *, real *, integer *, integer *, integer *,
+	    integer *, real *, real *, real *, real *, integer *, integer *),
+	    xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *);
+    static integer givcol;
+    extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, real *, real *, real *, integer *, real *
+	    , integer *, real *, integer *, real *, integer *);
+    static integer icompq;
+    extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *,
+	    real *, real *, integer *), slartg_(real *, real *, real *
+	    , real *, real *);
+    static real orgnrm;
+    static integer givnum;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    static integer givptr, qstart, smlsiz, wstart, smlszp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 1, 1999
+
+
+    Purpose
+    =======
+
+    SBDSDC computes the singular value decomposition (SVD) of a real
+    N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,
+    using a divide and conquer method, where S is a diagonal matrix
+    with non-negative diagonal elements (the singular values of B), and
+    U and VT are orthogonal matrices of left and right singular vectors,
+    respectively. SBDSDC can be used to compute all singular values,
+    and optionally, singular vectors or singular vectors in compact form.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See SLASD3 for details.
+
+    The code currently call SLASDQ if singular values only are desired.
+    However, it can be slightly modified to compute singular values
+    using the divide and conquer method.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  B is upper bidiagonal.
+            = 'L':  B is lower bidiagonal.
+
+    COMPQ   (input) CHARACTER*1
+            Specifies whether singular vectors are to be computed
+            as follows:
+            = 'N':  Compute singular values only;
+            = 'P':  Compute singular values and compute singular
+                    vectors in compact form;
+            = 'I':  Compute singular values and singular vectors.
+
+    N       (input) INTEGER
+            The order of the matrix B.  N >= 0.
+
+    D       (input/output) REAL array, dimension (N)
+            On entry, the n diagonal elements of the bidiagonal matrix B.
+            On exit, if INFO=0, the singular values of B.
+
+    E       (input/output) REAL array, dimension (N)
+            On entry, the elements of E contain the offdiagonal
+            elements of the bidiagonal matrix whose SVD is desired.
+            On exit, E has been destroyed.
+
+    U       (output) REAL array, dimension (LDU,N)
+            If  COMPQ = 'I', then:
+               On exit, if INFO = 0, U contains the left singular vectors
+               of the bidiagonal matrix.
+            For other values of COMPQ, U is not referenced.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= 1.
+            If singular vectors are desired, then LDU >= max( 1, N ).
+
+    VT      (output) REAL array, dimension (LDVT,N)
+            If  COMPQ = 'I', then:
+               On exit, if INFO = 0, VT' contains the right singular
+               vectors of the bidiagonal matrix.
+            For other values of COMPQ, VT is not referenced.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.  LDVT >= 1.
+            If singular vectors are desired, then LDVT >= max( 1, N ).
+
+    Q       (output) REAL array, dimension (LDQ)
+            If  COMPQ = 'P', then:
+               On exit, if INFO = 0, Q and IQ contain the left
+               and right singular vectors in a compact form,
+               requiring O(N log N) space instead of 2*N**2.
+               In particular, Q contains all the REAL data in
+               LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1))))
+               words of memory, where SMLSIZ is returned by ILAENV and
+               is equal to the maximum size of the subproblems at the
+               bottom of the computation tree (usually about 25).
+            For other values of COMPQ, Q is not referenced.
+
+    IQ      (output) INTEGER array, dimension (LDIQ)
+            If  COMPQ = 'P', then:
+               On exit, if INFO = 0, Q and IQ contain the left
+               and right singular vectors in a compact form,
+               requiring O(N log N) space instead of 2*N**2.
+               In particular, IQ contains all INTEGER data in
+               LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1))))
+               words of memory, where SMLSIZ is returned by ILAENV and
+               is equal to the maximum size of the subproblems at the
+               bottom of the computation tree (usually about 25).
+            For other values of COMPQ, IQ is not referenced.
+
+    WORK    (workspace) REAL array, dimension (LWORK)
+            If COMPQ = 'N' then LWORK >= (4 * N).
+            If COMPQ = 'P' then LWORK >= (6 * N).
+            If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).
+
+    IWORK   (workspace) INTEGER array, dimension (8*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an singular value.
+                  The update process of divide and conquer failed.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --q;
+    --iq;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    iuplo = 0;
+    if (lsame_(uplo, "U")) {
+	iuplo = 1;
+    }
+    if (lsame_(uplo, "L")) {
+	iuplo = 2;
+    }
+    if (lsame_(compq, "N")) {
+	icompq = 0;
+    } else if (lsame_(compq, "P")) {
+	icompq = 1;
+    } else if (lsame_(compq, "I")) {
+	icompq = 2;
+    } else {
+	icompq = -1;
+    }
+    if (iuplo == 0) {
+	*info = -1;
+    } else if (icompq < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ldu < 1 || icompq == 2 && *ldu < *n) {
+	*info = -7;
+    } else if (*ldvt < 1 || icompq == 2 && *ldvt < *n) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SBDSDC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    smlsiz = ilaenv_(&c__9, "SBDSDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+    if (*n == 1) {
+	if (icompq == 1) {
+	    q[1] = r_sign(&c_b871, &d__[1]);
+	    q[smlsiz * *n + 1] = 1.f;
+	} else if (icompq == 2) {
+	    u[u_dim1 + 1] = r_sign(&c_b871, &d__[1]);
+	    vt[vt_dim1 + 1] = 1.f;
+	}
+	d__[1] = dabs(d__[1]);
+	return 0;
+    }
+    nm1 = *n - 1;
+
+/*
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left
+*/
+
+    wstart = 1;
+    qstart = 3;
+    if (icompq == 1) {
+	scopy_(n, &d__[1], &c__1, &q[1], &c__1);
+	i__1 = *n - 1;
+	scopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1);
+    }
+    if (iuplo == 2) {
+	qstart = 5;
+	wstart = (*n << 1) - 1;
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (icompq == 1) {
+		q[i__ + (*n << 1)] = cs;
+		q[i__ + *n * 3] = sn;
+	    } else if (icompq == 2) {
+		work[i__] = cs;
+		work[nm1 + i__] = -sn;
+	    }
+/* L10: */
+	}
+    }
+
+/*     If ICOMPQ = 0, use SLASDQ to compute the singular values. */
+
+    if (icompq == 0) {
+	slasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
+		vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
+		wstart], info);
+	goto L40;
+    }
+
+/*
+       If N is smaller than the minimum divide size SMLSIZ, then solve
+       the problem with another solver.
+*/
+
+    if (*n <= smlsiz) {
+	if (icompq == 2) {
+	    slaset_("A", n, n, &c_b1101, &c_b871, &u[u_offset], ldu);
+	    slaset_("A", n, n, &c_b1101, &c_b871, &vt[vt_offset], ldvt);
+	    slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
+		    , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
+		    wstart], info);
+	} else if (icompq == 1) {
+	    iu = 1;
+	    ivt = iu + *n;
+	    slaset_("A", n, n, &c_b1101, &c_b871, &q[iu + (qstart - 1) * *n],
+		    n);
+	    slaset_("A", n, n, &c_b1101, &c_b871, &q[ivt + (qstart - 1) * *n],
+		     n);
+	    slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (
+		    qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[
+		    iu + (qstart - 1) * *n], n, &work[wstart], info);
+	}
+	goto L40;
+    }
+
+    if (icompq == 2) {
+	slaset_("A", n, n, &c_b1101, &c_b871, &u[u_offset], ldu);
+	slaset_("A", n, n, &c_b1101, &c_b871, &vt[vt_offset], ldvt)
+		;
+    }
+
+/*     Scale. */
+
+    orgnrm = slanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.f) {
+	return 0;
+    }
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, n, &c__1, &d__[1], n, &ierr);
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &nm1, &c__1, &e[1], &nm1, &
+	    ierr);
+
+    eps = slamch_("Epsilon");
+
+    mlvl = (integer) (log((real) (*n) / (real) (smlsiz + 1)) / log(2.f)) + 1;
+    smlszp = smlsiz + 1;
+
+    if (icompq == 1) {
+	iu = 1;
+	ivt = smlsiz + 1;
+	difl = ivt + smlszp;
+	difr = difl + mlvl;
+	z__ = difr + (mlvl << 1);
+	ic = z__ + mlvl;
+	is = ic + 1;
+	poles = is + 1;
+	givnum = poles + (mlvl << 1);
+
+	k = 1;
+	givptr = 2;
+	perm = 3;
+	givcol = perm + mlvl;
+    }
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((r__1 = d__[i__], dabs(r__1)) < eps) {
+	    d__[i__] = r_sign(&eps, &d__[i__]);
+	}
+/* L20: */
+    }
+
+    start = 1;
+    sqre = 0;
+
+    i__1 = nm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((r__1 = e[i__], dabs(r__1)) < eps || i__ == nm1) {
+
+/*
+          Subproblem found. First determine its size and then
+          apply divide and conquer on it.
+*/
+
+	    if (i__ < nm1) {
+
+/*        A subproblem with E(I) small for I < NM1. */
+
+		nsize = i__ - start + 1;
+	    } else if ((r__1 = e[i__], dabs(r__1)) >= eps) {
+
+/*        A subproblem with E(NM1) not too small but I = NM1. */
+
+		nsize = *n - start + 1;
+	    } else {
+
+/*
+          A subproblem with E(NM1) small. This implies an
+          1-by-1 subproblem at D(N). Solve this 1-by-1 problem
+          first.
+*/
+
+		nsize = i__ - start + 1;
+		if (icompq == 2) {
+		    u[*n + *n * u_dim1] = r_sign(&c_b871, &d__[*n]);
+		    vt[*n + *n * vt_dim1] = 1.f;
+		} else if (icompq == 1) {
+		    q[*n + (qstart - 1) * *n] = r_sign(&c_b871, &d__[*n]);
+		    q[*n + (smlsiz + qstart - 1) * *n] = 1.f;
+		}
+		d__[*n] = (r__1 = d__[*n], dabs(r__1));
+	    }
+	    if (icompq == 2) {
+		slasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start +
+			start * u_dim1], ldu, &vt[start + start * vt_dim1],
+			ldvt, &smlsiz, &iwork[1], &work[wstart], info);
+	    } else {
+		slasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[
+			start], &q[start + (iu + qstart - 2) * *n], n, &q[
+			start + (ivt + qstart - 2) * *n], &iq[start + k * *n],
+			 &q[start + (difl + qstart - 2) * *n], &q[start + (
+			difr + qstart - 2) * *n], &q[start + (z__ + qstart -
+			2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[
+			start + givptr * *n], &iq[start + givcol * *n], n, &
+			iq[start + perm * *n], &q[start + (givnum + qstart -
+			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
+			start + (is + qstart - 2) * *n], &work[wstart], &
+			iwork[1], info);
+		if (*info != 0) {
+		    return 0;
+		}
+	    }
+	    start = i__ + 1;
+	}
+/* L30: */
+    }
+
+/*     Unscale */
+
+    slascl_("G", &c__0, &c__0, &c_b871, &orgnrm, n, &c__1, &d__[1], n, &ierr);
+L40:
+
+/*     Use Selection Sort to minimize swaps of singular vectors */
+
+    i__1 = *n;
+    for (ii = 2; ii <= i__1; ++ii) {
+	i__ = ii - 1;
+	kk = i__;
+	p = d__[i__];
+	i__2 = *n;
+	for (j = ii; j <= i__2; ++j) {
+	    if (d__[j] > p) {
+		kk = j;
+		p = d__[j];
+	    }
+/* L50: */
+	}
+	if (kk != i__) {
+	    d__[kk] = d__[i__];
+	    d__[i__] = p;
+	    if (icompq == 1) {
+		iq[i__] = kk;
+	    } else if (icompq == 2) {
+		sswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &
+			c__1);
+		sswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt);
+	    }
+	} else if (icompq == 1) {
+	    iq[i__] = i__;
+	}
+/* L60: */
+    }
+
+/*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */
+
+    if (icompq == 1) {
+	if (iuplo == 1) {
+	    iq[*n] = 1;
+	} else {
+	    iq[*n] = 0;
+	}
+    }
+
+/*
+       If B is lower bidiagonal, update U by those Givens rotations
+       which rotated B to be upper bidiagonal
+*/
+
+    if (iuplo == 2 && icompq == 2) {
+	slasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu);
+    }
+
+    return 0;
+
+/*     End of SBDSDC */
+
+} /* sbdsdc_ */
+
+/* Subroutine */ int sbdsqr_(char *uplo, integer *n, integer *ncvt, integer *
+	nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real *
+	u, integer *ldu, real *c__, integer *ldc, real *work, integer *info)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2;
+    real r__1, r__2, r__3, r__4;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double pow_dd(doublereal *, doublereal *), sqrt(doublereal), r_sign(real *
+	    , real *);
+
+    /* Local variables */
+    static real f, g, h__;
+    static integer i__, j, m;
+    static real r__, cs;
+    static integer ll;
+    static real sn, mu;
+    static integer nm1, nm12, nm13, lll;
+    static real eps, sll, tol, abse;
+    static integer idir;
+    static real abss;
+    static integer oldm;
+    static real cosl;
+    static integer isub, iter;
+    static real unfl, sinl, cosr, smin, smax, sinr;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *), slas2_(real *, real *, real *, real *,
+	     real *);
+    extern logical lsame_(char *, char *);
+    static real oldcs;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static integer oldll;
+    static real shift, sigmn, oldsn;
+    static integer maxit;
+    static real sminl;
+    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *,
+	    integer *, real *, real *, real *, integer *);
+    static real sigmx;
+    static logical lower;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
+	    integer *), slasq1_(integer *, real *, real *, real *, integer *),
+	     slasv2_(real *, real *, real *, real *, real *, real *, real *,
+	    real *, real *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real sminoa;
+    extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
+	    );
+    static real thresh;
+    static logical rotate;
+    static real sminlo, tolmul;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SBDSQR computes the singular value decomposition (SVD) of a real
+    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
+    denotes the transpose of P), where S is a diagonal matrix with
+    non-negative diagonal elements (the singular values of B), and Q
+    and P are orthogonal matrices.
+
+    The routine computes S, and optionally computes U * Q, P' * VT,
+    or Q' * C, for given real input matrices U, VT, and C.
+
+    See "Computing  Small Singular Values of Bidiagonal Matrices With
+    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+    LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
+    no. 5, pp. 873-912, Sept 1990) and
+    "Accurate singular values and differential qd algorithms," by
+    B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
+    Department, University of California at Berkeley, July 1992
+    for a detailed description of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  B is upper bidiagonal;
+            = 'L':  B is lower bidiagonal.
+
+    N       (input) INTEGER
+            The order of the matrix B.  N >= 0.
+
+    NCVT    (input) INTEGER
+            The number of columns of the matrix VT. NCVT >= 0.
+
+    NRU     (input) INTEGER
+            The number of rows of the matrix U. NRU >= 0.
+
+    NCC     (input) INTEGER
+            The number of columns of the matrix C. NCC >= 0.
+
+    D       (input/output) REAL array, dimension (N)
+            On entry, the n diagonal elements of the bidiagonal matrix B.
+            On exit, if INFO=0, the singular values of B in decreasing
+            order.
+
+    E       (input/output) REAL array, dimension (N)
+            On entry, the elements of E contain the
+            offdiagonal elements of the bidiagonal matrix whose SVD
+            is desired. On normal exit (INFO = 0), E is destroyed.
+            If the algorithm does not converge (INFO > 0), D and E
+            will contain the diagonal and superdiagonal elements of a
+            bidiagonal matrix orthogonally equivalent to the one given
+            as input. E(N) is used for workspace.
+
+    VT      (input/output) REAL array, dimension (LDVT, NCVT)
+            On entry, an N-by-NCVT matrix VT.
+            On exit, VT is overwritten by P' * VT.
+            VT is not referenced if NCVT = 0.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.
+            LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
+
+    U       (input/output) REAL array, dimension (LDU, N)
+            On entry, an NRU-by-N matrix U.
+            On exit, U is overwritten by U * Q.
+            U is not referenced if NRU = 0.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= max(1,NRU).
+
+    C       (input/output) REAL array, dimension (LDC, NCC)
+            On entry, an N-by-NCC matrix C.
+            On exit, C is overwritten by Q' * C.
+            C is not referenced if NCC = 0.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C.
+            LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
+
+    WORK    (workspace) REAL array, dimension (4*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  If INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm did not converge; D and E contain the
+                  elements of a bidiagonal matrix which is orthogonally
+                  similar to the input matrix B;  if INFO = i, i
+                  elements of E have not converged to zero.
+
+    Internal Parameters
+    ===================
+
+    TOLMUL  REAL, default = max(10,min(100,EPS**(-1/8)))
+            TOLMUL controls the convergence criterion of the QR loop.
+            If it is positive, TOLMUL*EPS is the desired relative
+               precision in the computed singular values.
+            If it is negative, abs(TOLMUL*EPS*sigma_max) is the
+               desired absolute accuracy in the computed singular
+               values (corresponds to relative accuracy
+               abs(TOLMUL*EPS) in the largest singular value.
+            abs(TOLMUL) should be between 1 and 1/EPS, and preferably
+               between 10 (for fast convergence) and .1/EPS
+               (for there to be some accuracy in the results).
+            Default is to lose at either one eighth or 2 of the
+               available decimal digits in each computed singular value
+               (whichever is smaller).
+
+    MAXITR  INTEGER, default = 6
+            MAXITR controls the maximum number of passes of the
+            algorithm through its inner loop. The algorithms stops
+            (and so fails to converge) if the number of passes
+            through the inner loop exceeds MAXITR*N**2.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lower = lsame_(uplo, "L");
+    if (! lsame_(uplo, "U") && ! lower) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ncvt < 0) {
+	*info = -3;
+    } else if (*nru < 0) {
+	*info = -4;
+    } else if (*ncc < 0) {
+	*info = -5;
+    } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < max(1,*n)) {
+	*info = -9;
+    } else if (*ldu < max(1,*nru)) {
+	*info = -11;
+    } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < max(1,*n)) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SBDSQR", &i__1);
+	return 0;
+    }
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	goto L160;
+    }
+
+/*     ROTATE is true if any singular vectors desired, false otherwise */
+
+    rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
+
+/*     If no singular vectors desired, use qd algorithm */
+
+    if (! rotate) {
+	slasq1_(n, &d__[1], &e[1], &work[1], info);
+	return 0;
+    }
+
+    nm1 = *n - 1;
+    nm12 = nm1 + nm1;
+    nm13 = nm12 + nm1;
+    idir = 0;
+
+/*     Get machine constants */
+
+    eps = slamch_("Epsilon");
+    unfl = slamch_("Safe minimum");
+
+/*
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left
+*/
+
+    if (lower) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    work[i__] = cs;
+	    work[nm1 + i__] = sn;
+/* L10: */
+	}
+
+/*        Update singular vectors if desired */
+
+	if (*nru > 0) {
+	    slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset],
+		    ldu);
+	}
+	if (*ncc > 0) {
+	    slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
+		     ldc);
+	}
+    }
+
+/*
+       Compute singular values to relative accuracy TOL
+       (By setting TOL to be negative, algorithm will compute
+       singular values to absolute accuracy ABS(TOL)*norm(input matrix))
+
+   Computing MAX
+   Computing MIN
+*/
+    d__1 = (doublereal) eps;
+    r__3 = 100.f, r__4 = pow_dd(&d__1, &c_b2532);
+    r__1 = 10.f, r__2 = dmin(r__3,r__4);
+    tolmul = dmax(r__1,r__2);
+    tol = tolmul * eps;
+
+/*     Compute approximate maximum, minimum singular values */
+
+    smax = 0.f;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	r__2 = smax, r__3 = (r__1 = d__[i__], dabs(r__1));
+	smax = dmax(r__2,r__3);
+/* L20: */
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	r__2 = smax, r__3 = (r__1 = e[i__], dabs(r__1));
+	smax = dmax(r__2,r__3);
+/* L30: */
+    }
+    sminl = 0.f;
+    if (tol >= 0.f) {
+
+/*        Relative accuracy desired */
+
+	sminoa = dabs(d__[1]);
+	if (sminoa == 0.f) {
+	    goto L50;
+	}
+	mu = sminoa;
+	i__1 = *n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    mu = (r__2 = d__[i__], dabs(r__2)) * (mu / (mu + (r__1 = e[i__ -
+		    1], dabs(r__1))));
+	    sminoa = dmin(sminoa,mu);
+	    if (sminoa == 0.f) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+	sminoa /= sqrt((real) (*n));
+/* Computing MAX */
+	r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl;
+	thresh = dmax(r__1,r__2);
+    } else {
+
+/*
+          Absolute accuracy desired
+
+   Computing MAX
+*/
+	r__1 = dabs(tol) * smax, r__2 = *n * 6 * *n * unfl;
+	thresh = dmax(r__1,r__2);
+    }
+
+/*
+       Prepare for main iteration loop for the singular values
+       (MAXIT is the maximum number of passes through the inner
+       loop permitted before nonconvergence signalled.)
+*/
+
+    maxit = *n * 6 * *n;
+    iter = 0;
+    oldll = -1;
+    oldm = -1;
+
+/*     M points to last element of unconverged part of matrix */
+
+    m = *n;
+
+/*     Begin main iteration loop */
+
+L60:
+
+/*     Check for convergence or exceeding iteration count */
+
+    if (m <= 1) {
+	goto L160;
+    }
+    if (iter > maxit) {
+	goto L200;
+    }
+
+/*     Find diagonal block of matrix to work on */
+
+    if (tol < 0.f && (r__1 = d__[m], dabs(r__1)) <= thresh) {
+	d__[m] = 0.f;
+    }
+    smax = (r__1 = d__[m], dabs(r__1));
+    smin = smax;
+    i__1 = m - 1;
+    for (lll = 1; lll <= i__1; ++lll) {
+	ll = m - lll;
+	abss = (r__1 = d__[ll], dabs(r__1));
+	abse = (r__1 = e[ll], dabs(r__1));
+	if (tol < 0.f && abss <= thresh) {
+	    d__[ll] = 0.f;
+	}
+	if (abse <= thresh) {
+	    goto L80;
+	}
+	smin = dmin(smin,abss);
+/* Computing MAX */
+	r__1 = max(smax,abss);
+	smax = dmax(r__1,abse);
+/* L70: */
+    }
+    ll = 0;
+    goto L90;
+L80:
+    e[ll] = 0.f;
+
+/*     Matrix splits since E(LL) = 0 */
+
+    if (ll == m - 1) {
+
+/*        Convergence of bottom singular value, return to top of loop */
+
+	--m;
+	goto L60;
+    }
+L90:
+    ++ll;
+
+/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */
+
+    if (ll == m - 1) {
+
+/*        2 by 2 block, handle separately */
+
+	slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
+		 &sinl, &cosl);
+	d__[m - 1] = sigmx;
+	e[m - 1] = 0.f;
+	d__[m] = sigmn;
+
+/*        Compute singular vectors, if desired */
+
+	if (*ncvt > 0) {
+	    srot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &
+		    cosr, &sinr);
+	}
+	if (*nru > 0) {
+	    srot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &
+		    c__1, &cosl, &sinl);
+	}
+	if (*ncc > 0) {
+	    srot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &
+		    cosl, &sinl);
+	}
+	m += -2;
+	goto L60;
+    }
+
+/*
+       If working on new submatrix, choose shift direction
+       (from larger end diagonal element towards smaller)
+*/
+
+    if (ll > oldm || m < oldll) {
+	if ((r__1 = d__[ll], dabs(r__1)) >= (r__2 = d__[m], dabs(r__2))) {
+
+/*           Chase bulge from top (big end) to bottom (small end) */
+
+	    idir = 1;
+	} else {
+
+/*           Chase bulge from bottom (big end) to top (small end) */
+
+	    idir = 2;
+	}
+    }
+
+/*     Apply convergence tests */
+
+    if (idir == 1) {
+
+/*
+          Run convergence test in forward direction
+          First apply standard test to bottom of matrix
+*/
+
+	if ((r__2 = e[m - 1], dabs(r__2)) <= dabs(tol) * (r__1 = d__[m], dabs(
+		r__1)) || tol < 0.f && (r__3 = e[m - 1], dabs(r__3)) <=
+		thresh) {
+	    e[m - 1] = 0.f;
+	    goto L60;
+	}
+
+	if (tol >= 0.f) {
+
+/*
+             If relative accuracy desired,
+             apply convergence criterion forward
+*/
+
+	    mu = (r__1 = d__[ll], dabs(r__1));
+	    sminl = mu;
+	    i__1 = m - 1;
+	    for (lll = ll; lll <= i__1; ++lll) {
+		if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) {
+		    e[lll] = 0.f;
+		    goto L60;
+		}
+		sminlo = sminl;
+		mu = (r__2 = d__[lll + 1], dabs(r__2)) * (mu / (mu + (r__1 =
+			e[lll], dabs(r__1))));
+		sminl = dmin(sminl,mu);
+/* L100: */
+	    }
+	}
+
+    } else {
+
+/*
+          Run convergence test in backward direction
+          First apply standard test to top of matrix
+*/
+
+	if ((r__2 = e[ll], dabs(r__2)) <= dabs(tol) * (r__1 = d__[ll], dabs(
+		r__1)) || tol < 0.f && (r__3 = e[ll], dabs(r__3)) <= thresh) {
+	    e[ll] = 0.f;
+	    goto L60;
+	}
+
+	if (tol >= 0.f) {
+
+/*
+             If relative accuracy desired,
+             apply convergence criterion backward
+*/
+
+	    mu = (r__1 = d__[m], dabs(r__1));
+	    sminl = mu;
+	    i__1 = ll;
+	    for (lll = m - 1; lll >= i__1; --lll) {
+		if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) {
+		    e[lll] = 0.f;
+		    goto L60;
+		}
+		sminlo = sminl;
+		mu = (r__2 = d__[lll], dabs(r__2)) * (mu / (mu + (r__1 = e[
+			lll], dabs(r__1))));
+		sminl = dmin(sminl,mu);
+/* L110: */
+	    }
+	}
+    }
+    oldll = ll;
+    oldm = m;
+
+/*
+       Compute shift.  First, test if shifting would ruin relative
+       accuracy, and if so set the shift to zero.
+
+   Computing MAX
+*/
+    r__1 = eps, r__2 = tol * .01f;
+    if (tol >= 0.f && *n * tol * (sminl / smax) <= dmax(r__1,r__2)) {
+
+/*        Use a zero shift to avoid loss of relative accuracy */
+
+	shift = 0.f;
+    } else {
+
+/*        Compute the shift from 2-by-2 block at end of matrix */
+
+	if (idir == 1) {
+	    sll = (r__1 = d__[ll], dabs(r__1));
+	    slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
+	} else {
+	    sll = (r__1 = d__[m], dabs(r__1));
+	    slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
+	}
+
+/*        Test if shift negligible, and if so set to zero */
+
+	if (sll > 0.f) {
+/* Computing 2nd power */
+	    r__1 = shift / sll;
+	    if (r__1 * r__1 < eps) {
+		shift = 0.f;
+	    }
+	}
+    }
+
+/*     Increment iteration count */
+
+    iter = iter + m - ll;
+
+/*     If SHIFT = 0, do simplified QR iteration */
+
+    if (shift == 0.f) {
+	if (idir == 1) {
+
+/*
+             Chase bulge from top to bottom
+             Save cosines and sines for later singular vector updates
+*/
+
+	    cs = 1.f;
+	    oldcs = 1.f;
+	    i__1 = m - 1;
+	    for (i__ = ll; i__ <= i__1; ++i__) {
+		r__1 = d__[i__] * cs;
+		slartg_(&r__1, &e[i__], &cs, &sn, &r__);
+		if (i__ > ll) {
+		    e[i__ - 1] = oldsn * r__;
+		}
+		r__1 = oldcs * r__;
+		r__2 = d__[i__ + 1] * sn;
+		slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
+		work[i__ - ll + 1] = cs;
+		work[i__ - ll + 1 + nm1] = sn;
+		work[i__ - ll + 1 + nm12] = oldcs;
+		work[i__ - ll + 1 + nm13] = oldsn;
+/* L120: */
+	    }
+	    h__ = d__[m] * cs;
+	    d__[m] = h__ * oldcs;
+	    e[m - 1] = h__ * oldsn;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
+			ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
+			+ 1], &u[ll * u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
+			+ 1], &c__[ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) {
+		e[m - 1] = 0.f;
+	    }
+
+	} else {
+
+/*
+             Chase bulge from bottom to top
+             Save cosines and sines for later singular vector updates
+*/
+
+	    cs = 1.f;
+	    oldcs = 1.f;
+	    i__1 = ll + 1;
+	    for (i__ = m; i__ >= i__1; --i__) {
+		r__1 = d__[i__] * cs;
+		slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__);
+		if (i__ < m) {
+		    e[i__] = oldsn * r__;
+		}
+		r__1 = oldcs * r__;
+		r__2 = d__[i__ - 1] * sn;
+		slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
+		work[i__ - ll] = cs;
+		work[i__ - ll + nm1] = -sn;
+		work[i__ - ll + nm12] = oldcs;
+		work[i__ - ll + nm13] = -oldsn;
+/* L130: */
+	    }
+	    h__ = d__[ll] * cs;
+	    d__[ll] = h__ * oldcs;
+	    e[ll] = h__ * oldsn;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
+			nm13 + 1], &vt[ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
+			 u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
+			ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((r__1 = e[ll], dabs(r__1)) <= thresh) {
+		e[ll] = 0.f;
+	    }
+	}
+    } else {
+
+/*        Use nonzero shift */
+
+	if (idir == 1) {
+
+/*
+             Chase bulge from top to bottom
+             Save cosines and sines for later singular vector updates
+*/
+
+	    f = ((r__1 = d__[ll], dabs(r__1)) - shift) * (r_sign(&c_b871, &
+		    d__[ll]) + shift / d__[ll]);
+	    g = e[ll];
+	    i__1 = m - 1;
+	    for (i__ = ll; i__ <= i__1; ++i__) {
+		slartg_(&f, &g, &cosr, &sinr, &r__);
+		if (i__ > ll) {
+		    e[i__ - 1] = r__;
+		}
+		f = cosr * d__[i__] + sinr * e[i__];
+		e[i__] = cosr * e[i__] - sinr * d__[i__];
+		g = sinr * d__[i__ + 1];
+		d__[i__ + 1] = cosr * d__[i__ + 1];
+		slartg_(&f, &g, &cosl, &sinl, &r__);
+		d__[i__] = r__;
+		f = cosl * e[i__] + sinl * d__[i__ + 1];
+		d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
+		if (i__ < m - 1) {
+		    g = sinl * e[i__ + 1];
+		    e[i__ + 1] = cosl * e[i__ + 1];
+		}
+		work[i__ - ll + 1] = cosr;
+		work[i__ - ll + 1 + nm1] = sinr;
+		work[i__ - ll + 1 + nm12] = cosl;
+		work[i__ - ll + 1 + nm13] = sinl;
+/* L140: */
+	    }
+	    e[m - 1] = f;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
+			ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
+			+ 1], &u[ll * u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
+			+ 1], &c__[ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) {
+		e[m - 1] = 0.f;
+	    }
+
+	} else {
+
+/*
+             Chase bulge from bottom to top
+             Save cosines and sines for later singular vector updates
+*/
+
+	    f = ((r__1 = d__[m], dabs(r__1)) - shift) * (r_sign(&c_b871, &d__[
+		    m]) + shift / d__[m]);
+	    g = e[m - 1];
+	    i__1 = ll + 1;
+	    for (i__ = m; i__ >= i__1; --i__) {
+		slartg_(&f, &g, &cosr, &sinr, &r__);
+		if (i__ < m) {
+		    e[i__] = r__;
+		}
+		f = cosr * d__[i__] + sinr * e[i__ - 1];
+		e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
+		g = sinr * d__[i__ - 1];
+		d__[i__ - 1] = cosr * d__[i__ - 1];
+		slartg_(&f, &g, &cosl, &sinl, &r__);
+		d__[i__] = r__;
+		f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
+		d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
+		if (i__ > ll + 1) {
+		    g = sinl * e[i__ - 2];
+		    e[i__ - 2] = cosl * e[i__ - 2];
+		}
+		work[i__ - ll] = cosr;
+		work[i__ - ll + nm1] = -sinr;
+		work[i__ - ll + nm12] = cosl;
+		work[i__ - ll + nm13] = -sinl;
+/* L150: */
+	    }
+	    e[ll] = f;
+
+/*           Test convergence */
+
+	    if ((r__1 = e[ll], dabs(r__1)) <= thresh) {
+		e[ll] = 0.f;
+	    }
+
+/*           Update singular vectors if desired */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
+			nm13 + 1], &vt[ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
+			 u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
+			ll + c_dim1], ldc);
+	    }
+	}
+    }
+
+/*     QR iteration finished, go back and check convergence */
+
+    goto L60;
+
+/*     All singular values converged, so make them positive */
+
+L160:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (d__[i__] < 0.f) {
+	    d__[i__] = -d__[i__];
+
+/*           Change sign of singular vectors, if desired */
+
+	    if (*ncvt > 0) {
+		sscal_(ncvt, &c_b1150, &vt[i__ + vt_dim1], ldvt);
+	    }
+	}
+/* L170: */
+    }
+
+/*
+       Sort the singular values into decreasing order (insertion sort on
+       singular values, but only one transposition per singular vector)
+*/
+
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Scan for smallest D(I) */
+
+	isub = 1;
+	smin = d__[1];
+	i__2 = *n + 1 - i__;
+	for (j = 2; j <= i__2; ++j) {
+	    if (d__[j] <= smin) {
+		isub = j;
+		smin = d__[j];
+	    }
+/* L180: */
+	}
+	if (isub != *n + 1 - i__) {
+
+/*           Swap singular values and vectors */
+
+	    d__[isub] = d__[*n + 1 - i__];
+	    d__[*n + 1 - i__] = smin;
+	    if (*ncvt > 0) {
+		sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ +
+			vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) *
+			u_dim1 + 1], &c__1);
+	    }
+	    if (*ncc > 0) {
+		sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ +
+			c_dim1], ldc);
+	    }
+	}
+/* L190: */
+    }
+    goto L220;
+
+/*     Maximum number of iterations exceeded, failure to converge */
+
+L200:
+    *info = 0;
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (e[i__] != 0.f) {
+	    ++(*info);
+	}
+/* L210: */
+    }
+L220:
+    return 0;
+
+/*     End of SBDSQR */
+
+} /* sbdsqr_ */
+
+/* Subroutine */ int sgebak_(char *job, char *side, integer *n, integer *ilo,
+	integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer
+	*info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, i__1;
+
+    /* Local variables */
+    static integer i__, k;
+    static real s;
+    static integer ii;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static logical leftv;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
+	    integer *), xerbla_(char *, integer *);
+    static logical rightv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SGEBAK forms the right or left eigenvectors of a real general matrix
+    by backward transformation on the computed eigenvectors of the
+    balanced matrix output by SGEBAL.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            Specifies the type of backward transformation required:
+            = 'N', do nothing, return immediately;
+            = 'P', do backward transformation for permutation only;
+            = 'S', do backward transformation for scaling only;
+            = 'B', do backward transformations for both permutation and
+                   scaling.
+            JOB must be the same as the argument JOB supplied to SGEBAL.
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  V contains right eigenvectors;
+            = 'L':  V contains left eigenvectors.
+
+    N       (input) INTEGER
+            The number of rows of the matrix V.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            The integers ILO and IHI determined by SGEBAL.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    SCALE   (input) REAL array, dimension (N)
+            Details of the permutation and scaling factors, as returned
+            by SGEBAL.
+
+    M       (input) INTEGER
+            The number of columns of the matrix V.  M >= 0.
+
+    V       (input/output) REAL array, dimension (LDV,M)
+            On entry, the matrix of right or left eigenvectors to be
+            transformed, as returned by SHSEIN or STREVC.
+            On exit, V is overwritten by the transformed eigenvectors.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V. LDV >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Decode and Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --scale;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+
+    /* Function Body */
+    rightv = lsame_(side, "R");
+    leftv = lsame_(side, "L");
+
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (! rightv && ! leftv) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+	*info = -4;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -7;
+    } else if (*ldv < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEBAK", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*m == 0) {
+	return 0;
+    }
+    if (lsame_(job, "N")) {
+	return 0;
+    }
+
+    if (*ilo == *ihi) {
+	goto L30;
+    }
+
+/*     Backward balance */
+
+    if (lsame_(job, "S") || lsame_(job, "B")) {
+
+	if (rightv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = scale[i__];
+		sscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L10: */
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = 1.f / scale[i__];
+		sscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L20: */
+	    }
+	}
+
+    }
+
+/*
+       Backward permutation
+
+       For  I = ILO-1 step -1 until 1,
+                IHI+1 step 1 until N do --
+*/
+
+L30:
+    if (lsame_(job, "P") || lsame_(job, "B")) {
+	if (rightv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L40;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = scale[i__];
+		if (k == i__) {
+		    goto L40;
+		}
+		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L40:
+		;
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L50;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = scale[i__];
+		if (k == i__) {
+		    goto L50;
+		}
+		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L50:
+		;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SGEBAK */
+
+} /* sgebak_ */
+
+/* Subroutine */ int sgebal_(char *job, integer *n, real *a, integer *lda,
+	integer *ilo, integer *ihi, real *scale, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Local variables */
+    static real c__, f, g;
+    static integer i__, j, k, l, m;
+    static real r__, s, ca, ra;
+    static integer ica, ira, iexc;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sswap_(integer *, real *, integer *, real *, integer *);
+    static real sfmin1, sfmin2, sfmax1, sfmax2;
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    static logical noconv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SGEBAL balances a general real matrix A.  This involves, first,
+    permuting A by a similarity transformation to isolate eigenvalues
+    in the first 1 to ILO-1 and last IHI+1 to N elements on the
+    diagonal; and second, applying a diagonal similarity transformation
+    to rows and columns ILO to IHI to make the rows and columns as
+    close in norm as possible.  Both steps are optional.
+
+    Balancing may reduce the 1-norm of the matrix, and improve the
+    accuracy of the computed eigenvalues and/or eigenvectors.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            Specifies the operations to be performed on A:
+            = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
+                    for i = 1,...,N;
+            = 'P':  permute only;
+            = 'S':  scale only;
+            = 'B':  both permute and scale.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the input matrix A.
+            On exit,  A is overwritten by the balanced matrix.
+            If JOB = 'N', A is not referenced.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    ILO     (output) INTEGER
+    IHI     (output) INTEGER
+            ILO and IHI are set to integers such that on exit
+            A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
+            If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+
+    SCALE   (output) REAL array, dimension (N)
+            Details of the permutations and scaling factors applied to
+            A.  If P(j) is the index of the row and column interchanged
+            with row and column j and D(j) is the scaling factor
+            applied to row and column j, then
+            SCALE(j) = P(j)    for j = 1,...,ILO-1
+                     = D(j)    for j = ILO,...,IHI
+                     = P(j)    for j = IHI+1,...,N.
+            The order in which the interchanges are made is N to IHI+1,
+            then 1 to ILO-1.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The permutations consist of row and column interchanges which put
+    the matrix in the form
+
+               ( T1   X   Y  )
+       P A P = (  0   B   Z  )
+               (  0   0   T2 )
+
+    where T1 and T2 are upper triangular matrices whose eigenvalues lie
+    along the diagonal.  The column indices ILO and IHI mark the starting
+    and ending columns of the submatrix B. Balancing consists of applying
+    a diagonal similarity transformation inv(D) * B * D to make the
+    1-norms of each row of B and its corresponding column nearly equal.
+    The output matrix is
+
+       ( T1     X*D          Y    )
+       (  0  inv(D)*B*D  inv(D)*Z ).
+       (  0      0           T2   )
+
+    Information about the permutations P and the diagonal matrix D is
+    returned in the vector SCALE.
+
+    This subroutine is based on the EISPACK routine BALANC.
+
+    Modified by Tzu-Yi Chen, Computer Science Division, University of
+      California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --scale;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEBAL", &i__1);
+	return 0;
+    }
+
+    k = 1;
+    l = *n;
+
+    if (*n == 0) {
+	goto L210;
+    }
+
+    if (lsame_(job, "N")) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scale[i__] = 1.f;
+/* L10: */
+	}
+	goto L210;
+    }
+
+    if (lsame_(job, "S")) {
+	goto L120;
+    }
+
+/*     Permutation to isolate eigenvalues if possible */
+
+    goto L50;
+
+/*     Row and column exchange. */
+
+L20:
+    scale[m] = (real) j;
+    if (j == m) {
+	goto L30;
+    }
+
+    sswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
+    i__1 = *n - k + 1;
+    sswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
+
+L30:
+    switch (iexc) {
+	case 1:  goto L40;
+	case 2:  goto L80;
+    }
+
+/*     Search for rows isolating an eigenvalue and push them down. */
+
+L40:
+    if (l == 1) {
+	goto L210;
+    }
+    --l;
+
+L50:
+    for (j = l; j >= 1; --j) {
+
+	i__1 = l;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (i__ == j) {
+		goto L60;
+	    }
+	    if (a[j + i__ * a_dim1] != 0.f) {
+		goto L70;
+	    }
+L60:
+	    ;
+	}
+
+	m = l;
+	iexc = 1;
+	goto L20;
+L70:
+	;
+    }
+
+    goto L90;
+
+/*     Search for columns isolating an eigenvalue and push them left. */
+
+L80:
+    ++k;
+
+L90:
+    i__1 = l;
+    for (j = k; j <= i__1; ++j) {
+
+	i__2 = l;
+	for (i__ = k; i__ <= i__2; ++i__) {
+	    if (i__ == j) {
+		goto L100;
+	    }
+	    if (a[i__ + j * a_dim1] != 0.f) {
+		goto L110;
+	    }
+L100:
+	    ;
+	}
+
+	m = k;
+	iexc = 2;
+	goto L20;
+L110:
+	;
+    }
+
+L120:
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	scale[i__] = 1.f;
+/* L130: */
+    }
+
+    if (lsame_(job, "P")) {
+	goto L210;
+    }
+
+/*
+       Balance the submatrix in rows K to L.
+
+       Iterative loop for norm reduction
+*/
+
+    sfmin1 = slamch_("S") / slamch_("P");
+    sfmax1 = 1.f / sfmin1;
+    sfmin2 = sfmin1 * 8.f;
+    sfmax2 = 1.f / sfmin2;
+L140:
+    noconv = FALSE_;
+
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	c__ = 0.f;
+	r__ = 0.f;
+
+	i__2 = l;
+	for (j = k; j <= i__2; ++j) {
+	    if (j == i__) {
+		goto L150;
+	    }
+	    c__ += (r__1 = a[j + i__ * a_dim1], dabs(r__1));
+	    r__ += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+L150:
+	    ;
+	}
+	ica = isamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
+	ca = (r__1 = a[ica + i__ * a_dim1], dabs(r__1));
+	i__2 = *n - k + 1;
+	ira = isamax_(&i__2, &a[i__ + k * a_dim1], lda);
+	ra = (r__1 = a[i__ + (ira + k - 1) * a_dim1], dabs(r__1));
+
+/*        Guard against zero C or R due to underflow. */
+
+	if (c__ == 0.f || r__ == 0.f) {
+	    goto L200;
+	}
+	g = r__ / 8.f;
+	f = 1.f;
+	s = c__ + r__;
+L160:
+/* Computing MAX */
+	r__1 = max(f,c__);
+/* Computing MIN */
+	r__2 = min(r__,g);
+	if (c__ >= g || dmax(r__1,ca) >= sfmax2 || dmin(r__2,ra) <= sfmin2) {
+	    goto L170;
+	}
+	f *= 8.f;
+	c__ *= 8.f;
+	ca *= 8.f;
+	r__ /= 8.f;
+	g /= 8.f;
+	ra /= 8.f;
+	goto L160;
+
+L170:
+	g = c__ / 8.f;
+L180:
+/* Computing MIN */
+	r__1 = min(f,c__), r__1 = min(r__1,g);
+	if (g < r__ || dmax(r__,ra) >= sfmax2 || dmin(r__1,ca) <= sfmin2) {
+	    goto L190;
+	}
+	f /= 8.f;
+	c__ /= 8.f;
+	g /= 8.f;
+	ca /= 8.f;
+	r__ *= 8.f;
+	ra *= 8.f;
+	goto L180;
+
+/*        Now balance. */
+
+L190:
+	if (c__ + r__ >= s * .95f) {
+	    goto L200;
+	}
+	if (f < 1.f && scale[i__] < 1.f) {
+	    if (f * scale[i__] <= sfmin1) {
+		goto L200;
+	    }
+	}
+	if (f > 1.f && scale[i__] > 1.f) {
+	    if (scale[i__] >= sfmax1 / f) {
+		goto L200;
+	    }
+	}
+	g = 1.f / f;
+	scale[i__] *= f;
+	noconv = TRUE_;
+
+	i__2 = *n - k + 1;
+	sscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
+	sscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
+
+L200:
+	;
+    }
+
+    if (noconv) {
+	goto L140;
+    }
+
+L210:
+    *ilo = k;
+    *ihi = l;
+
+    return 0;
+
+/*     End of SGEBAL */
+
+} /* sgebal_ */
+
+/* Subroutine */ int sgebd2_(integer *m, integer *n, real *a, integer *lda,
+	real *d__, real *e, real *tauq, real *taup, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *), slarfg_(integer *, real *, real *,
+	    integer *, real *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SGEBD2 reduces a real general m by n matrix A to upper or lower
+    bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
+
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the orthogonal matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the orthogonal matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) REAL array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+
+    TAUQ    (output) REAL array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix Q. See Further Details.
+
+    TAUP    (output) REAL array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix P. See Further Details.
+
+    WORK    (workspace) REAL array, dimension (max(M,N))
+
+    INFO    (output) INTEGER
+            = 0: successful exit.
+            < 0: if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+    If m >= n,
+
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
+    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("SGEBD2", &i__1);
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
+		    a_dim1], &c__1, &tauq[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = d__[i__];
+
+	    if (i__ < *n) {
+
+/*
+                Generate elementary reflector G(i) to annihilate
+                A(i,i+2:n)
+*/
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
+			i__3,*n) * a_dim1], lda, &taup[i__]);
+		e[i__] = a[i__ + (i__ + 1) * a_dim1];
+		a[i__ + (i__ + 1) * a_dim1] = 1.f;
+
+/*              Apply G(i) to A(i+1:m,i+1:n) from the right */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		slarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1],
+			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &work[1]);
+		a[i__ + (i__ + 1) * a_dim1] = e[i__];
+	    } else {
+		taup[i__] = 0.f;
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
+		    a_dim1], lda, &taup[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+
+/*           Apply G(i) to A(i+1:m,i:n) from the right */
+
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+/* Computing MIN */
+	    i__4 = i__ + 1;
+	    slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
+		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = d__[i__];
+
+	    if (i__ < *m) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:m,i)
+*/
+
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
+			i__ * a_dim1], &c__1, &tauq[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.f;
+
+/*              Apply H(i) to A(i+1:m,i+1:n) from the left */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &work[1]);
+		a[i__ + 1 + i__ * a_dim1] = e[i__];
+	    } else {
+		tauq[i__] = 0.f;
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of SGEBD2 */
+
+} /* sgebd2_ */
+
+/* Subroutine */ int sgebrd_(integer *m, integer *n, real *a, integer *lda,
+	real *d__, real *e, real *tauq, real *taup, real *work, integer *
+	lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, nb, nx;
+    static real ws;
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer minmn;
+    extern /* Subroutine */ int sgebd2_(integer *, integer *, real *, integer
+	    *, real *, real *, real *, real *, real *, integer *), slabrd_(
+	    integer *, integer *, integer *, real *, integer *, real *, real *
+	    , real *, real *, real *, integer *, real *, integer *), xerbla_(
+	    char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwrkx, ldwrky, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SGEBRD reduces a general real M-by-N matrix A to upper or lower
+    bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
+
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the M-by-N general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the orthogonal matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the orthogonal matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) REAL array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+
+    TAUQ    (output) REAL array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix Q. See Further Details.
+
+    TAUP    (output) REAL array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix P. See Further Details.
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,M,N).
+            For optimum performance LWORK >= (M+N)*NB, where NB
+            is the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+    If m >= n,
+
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
+    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = max(i__1,i__2);
+    lwkopt = (*m + *n) * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = max(1,*m);
+	if (*lwork < max(i__1,*n) && ! lquery) {
+	    *info = -10;
+	}
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("SGEBRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    minmn = min(*m,*n);
+    if (minmn == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    ws = (real) max(*m,*n);
+    ldwrkx = *m;
+    ldwrky = *n;
+
+    if (nb > 1 && nb < minmn) {
+
+/*
+          Set the crossover point NX.
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+
+/*        Determine when to switch from blocked to unblocked code. */
+
+	if (nx < minmn) {
+	    ws = (real) ((*m + *n) * nb);
+	    if ((real) (*lwork) < ws) {
+
+/*
+                Not enough work space for the optimal NB, consider using
+                a smaller block size.
+*/
+
+		nbmin = ilaenv_(&c__2, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		if (*lwork >= (*m + *n) * nbmin) {
+		    nb = *lwork / (*m + *n);
+		} else {
+		    nb = 1;
+		    nx = minmn;
+		}
+	    }
+	}
+    } else {
+	nx = minmn;
+    }
+
+    i__1 = minmn - nx;
+    i__2 = nb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+
+/*
+          Reduce rows and columns i:i+nb-1 to bidiagonal form and return
+          the matrices X and Y which are needed to update the unreduced
+          part of the matrix
+*/
+
+	i__3 = *m - i__ + 1;
+	i__4 = *n - i__ + 1;
+	slabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
+		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx
+		* nb + 1], &ldwrky);
+
+/*
+          Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
+          of the form  A := A - V*Y' - X*U'
+*/
+
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	sgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b1150, &a[
+		i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], &
+		ldwrky, &c_b871, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	sgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b1150, &
+		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
+		c_b871, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*        Copy diagonal and off-diagonal elements of B back into A */
+
+	if (*m >= *n) {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + j * a_dim1] = d__[j];
+		a[j + (j + 1) * a_dim1] = e[j];
+/* L10: */
+	    }
+	} else {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + j * a_dim1] = d__[j];
+		a[j + 1 + j * a_dim1] = e[j];
+/* L20: */
+	    }
+	}
+/* L30: */
+    }
+
+/*     Use unblocked code to reduce the remainder of the matrix */
+
+    i__2 = *m - i__ + 1;
+    i__1 = *n - i__ + 1;
+    sgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
+	    tauq[i__], &taup[i__], &work[1], &iinfo);
+    work[1] = ws;
+    return 0;
+
+/*     End of SGEBRD */
+
+} /* sgebrd_ */
+
+/* Subroutine */ int sgeev_(char *jobvl, char *jobvr, integer *n, real *a,
+	integer *lda, real *wr, real *wi, real *vl, integer *ldvl, real *vr,
+	integer *ldvr, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3, i__4;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, k;
+    static real r__, cs, sn;
+    static integer ihi;
+    static real scl;
+    static integer ilo;
+    static real dum[1], eps;
+    static integer ibal;
+    static char side[1];
+    static integer maxb;
+    static real anrm;
+    static integer ierr, itau, iwrk, nout;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *);
+    extern doublereal snrm2_(integer *, real *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    extern doublereal slapy2_(real *, real *);
+    extern /* Subroutine */ int slabad_(real *, real *);
+    static logical scalea;
+    static real cscale;
+    extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *,
+	    integer *, integer *, real *, integer *);
+    extern doublereal slamch_(char *), slange_(char *, integer *,
+	    integer *, real *, integer *, real *);
+    extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *), xerbla_(char
+	    *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical select[1];
+    static real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
+	    integer *, real *, integer *), slartg_(real *, real *,
+	    real *, real *, real *), sorghr_(integer *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *, integer *), shseqr_(
+	    char *, char *, integer *, integer *, integer *, real *, integer *
+	    , real *, real *, real *, integer *, real *, integer *, integer *), strevc_(char *, char *, logical *, integer *,
+	    real *, integer *, real *, integer *, real *, integer *, integer *
+	    , integer *, real *, integer *);
+    static integer minwrk, maxwrk;
+    static logical wantvl;
+    static real smlnum;
+    static integer hswork;
+    static logical lquery, wantvr;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 8, 1999
+
+
+    Purpose
+    =======
+
+    SGEEV computes for an N-by-N real nonsymmetric matrix A, the
+    eigenvalues and, optionally, the left and/or right eigenvectors.
+
+    The right eigenvector v(j) of A satisfies
+                     A * v(j) = lambda(j) * v(j)
+    where lambda(j) is its eigenvalue.
+    The left eigenvector u(j) of A satisfies
+                  u(j)**H * A = lambda(j) * u(j)**H
+    where u(j)**H denotes the conjugate transpose of u(j).
+
+    The computed eigenvectors are normalized to have Euclidean norm
+    equal to 1 and largest component real.
+
+    Arguments
+    =========
+
+    JOBVL   (input) CHARACTER*1
+            = 'N': left eigenvectors of A are not computed;
+            = 'V': left eigenvectors of A are computed.
+
+    JOBVR   (input) CHARACTER*1
+            = 'N': right eigenvectors of A are not computed;
+            = 'V': right eigenvectors of A are computed.
+
+    N       (input) INTEGER
+            The order of the matrix A. N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the N-by-N matrix A.
+            On exit, A has been overwritten.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    WR      (output) REAL array, dimension (N)
+    WI      (output) REAL array, dimension (N)
+            WR and WI contain the real and imaginary parts,
+            respectively, of the computed eigenvalues.  Complex
+            conjugate pairs of eigenvalues appear consecutively
+            with the eigenvalue having the positive imaginary part
+            first.
+
+    VL      (output) REAL array, dimension (LDVL,N)
+            If JOBVL = 'V', the left eigenvectors u(j) are stored one
+            after another in the columns of VL, in the same order
+            as their eigenvalues.
+            If JOBVL = 'N', VL is not referenced.
+            If the j-th eigenvalue is real, then u(j) = VL(:,j),
+            the j-th column of VL.
+            If the j-th and (j+1)-st eigenvalues form a complex
+            conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
+            u(j+1) = VL(:,j) - i*VL(:,j+1).
+
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= 1; if
+            JOBVL = 'V', LDVL >= N.
+
+    VR      (output) REAL array, dimension (LDVR,N)
+            If JOBVR = 'V', the right eigenvectors v(j) are stored one
+            after another in the columns of VR, in the same order
+            as their eigenvalues.
+            If JOBVR = 'N', VR is not referenced.
+            If the j-th eigenvalue is real, then v(j) = VR(:,j),
+            the j-th column of VR.
+            If the j-th and (j+1)-st eigenvalues form a complex
+            conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
+            v(j+1) = VR(:,j) - i*VR(:,j+1).
+
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= 1; if
+            JOBVR = 'V', LDVR >= N.
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,3*N), and
+            if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N.  For good
+            performance, LWORK must generally be larger.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = i, the QR algorithm failed to compute all the
+                  eigenvalues, and no eigenvectors have been computed;
+                  elements i+1:N of WR and WI contain eigenvalues which
+                  have converged.
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --wr;
+    --wi;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvl = lsame_(jobvl, "V");
+    wantvr = lsame_(jobvr, "V");
+    if (! wantvl && ! lsame_(jobvl, "N")) {
+	*info = -1;
+    } else if (! wantvr && ! lsame_(jobvr, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldvl < 1 || wantvl && *ldvl < *n) {
+	*info = -9;
+    } else if (*ldvr < 1 || wantvr && *ldvr < *n) {
+	*info = -11;
+    }
+
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         NB refers to the optimal block size for the immediately
+         following subroutine, as returned by ILAENV.
+         HSWORK refers to the workspace preferred by SHSEQR, as
+         calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+         the worst case.)
+*/
+
+    minwrk = 1;
+    if (*info == 0 && (*lwork >= 1 || lquery)) {
+	maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, n, &
+		c__0, (ftnlen)6, (ftnlen)1);
+	if (! wantvl && ! wantvr) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n * 3;
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "SHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "SHSEQR", "EN", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = *n << 1;
+	    hswork = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
+		    hswork;
+	    maxwrk = max(i__1,i__2);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n << 2;
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "SOR"
+		    "GHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "SHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "SHSEQR", "SV", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = *n << 1;
+	    hswork = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
+		    hswork;
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n << 2;
+	    maxwrk = max(i__1,i__2);
+	}
+	work[1] = (real) maxwrk;
+    }
+    if (*lwork < minwrk && ! lquery) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEEV ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*
+       Balance the matrix
+       (Workspace: need N)
+*/
+
+    ibal = 1;
+    sgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
+
+/*
+       Reduce to upper Hessenberg form
+       (Workspace: need 3*N, prefer 2*N+N*NB)
+*/
+
+    itau = ibal + *n;
+    iwrk = itau + *n;
+    i__1 = *lwork - iwrk + 1;
+    sgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
+
+    if (wantvl) {
+
+/*
+          Want left eigenvectors
+          Copy Householder vectors to VL
+*/
+
+	*(unsigned char *)side = 'L';
+	slacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
+		;
+
+/*
+          Generate orthogonal matrix in VL
+          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*/
+
+	i__1 = *lwork - iwrk + 1;
+	sorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*
+          Perform QR iteration, accumulating Schur vectors in VL
+          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vl[vl_offset], ldvl, &work[iwrk], &i__1, info);
+
+	if (wantvr) {
+
+/*
+             Want left and right eigenvectors
+             Copy Schur vectors to VR
+*/
+
+	    *(unsigned char *)side = 'B';
+	    slacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
+	}
+
+    } else if (wantvr) {
+
+/*
+          Want right eigenvectors
+          Copy Householder vectors to VR
+*/
+
+	*(unsigned char *)side = 'R';
+	slacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
+		;
+
+/*
+          Generate orthogonal matrix in VR
+          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*/
+
+	i__1 = *lwork - iwrk + 1;
+	sorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*
+          Perform QR iteration, accumulating Schur vectors in VR
+          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+
+    } else {
+
+/*
+          Compute eigenvalues only
+          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	shseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+    }
+
+/*     If INFO > 0 from SHSEQR, then quit */
+
+    if (*info > 0) {
+	goto L50;
+    }
+
+    if (wantvl || wantvr) {
+
+/*
+          Compute left and/or right eigenvectors
+          (Workspace: need 4*N)
+*/
+
+	strevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl,
+		 &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr);
+    }
+
+    if (wantvl) {
+
+/*
+          Undo balancing of left eigenvectors
+          (Workspace: need N)
+*/
+
+	sgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl,
+		 &ierr);
+
+/*        Normalize left eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.f) {
+		scl = 1.f / snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.f) {
+		r__1 = snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		r__2 = snrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		scl = 1.f / slapy2_(&r__1, &r__2);
+		sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    r__1 = vl[k + i__ * vl_dim1];
+/* Computing 2nd power */
+		    r__2 = vl[k + (i__ + 1) * vl_dim1];
+		    work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2;
+/* L10: */
+		}
+		k = isamax_(n, &work[iwrk], &c__1);
+		slartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1],
+			&cs, &sn, &r__);
+		srot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) *
+			vl_dim1 + 1], &c__1, &cs, &sn);
+		vl[k + (i__ + 1) * vl_dim1] = 0.f;
+	    }
+/* L20: */
+	}
+    }
+
+    if (wantvr) {
+
+/*
+          Undo balancing of right eigenvectors
+          (Workspace: need N)
+*/
+
+	sgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr,
+		 &ierr);
+
+/*        Normalize right eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.f) {
+		scl = 1.f / snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.f) {
+		r__1 = snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		r__2 = snrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		scl = 1.f / slapy2_(&r__1, &r__2);
+		sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    r__1 = vr[k + i__ * vr_dim1];
+/* Computing 2nd power */
+		    r__2 = vr[k + (i__ + 1) * vr_dim1];
+		    work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2;
+/* L30: */
+		}
+		k = isamax_(n, &work[iwrk], &c__1);
+		slartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1],
+			&cs, &sn, &r__);
+		srot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) *
+			vr_dim1 + 1], &c__1, &cs, &sn);
+		vr[k + (i__ + 1) * vr_dim1] = 0.f;
+	    }
+/* L40: */
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+L50:
+    if (scalea) {
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = max(i__3,1);
+	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info +
+		1], &i__2, &ierr);
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = max(i__3,1);
+	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info +
+		1], &i__2, &ierr);
+	if (*info > 0) {
+	    i__1 = ilo - 1;
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1],
+		    n, &ierr);
+	    i__1 = ilo - 1;
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1],
+		    n, &ierr);
+	}
+    }
+
+    work[1] = (real) maxwrk;
+    return 0;
+
+/*     End of SGEEV */
+
+} /* sgeev_ */
+
+/* Subroutine */ int sgehd2_(integer *n, integer *ilo, integer *ihi, real *a,
+	integer *lda, real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__;
+    static real aii;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *), slarfg_(integer *, real *, real *,
+	    integer *, real *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SGEHD2 reduces a real general matrix A to upper Hessenberg form H by
+    an orthogonal similarity transformation:  Q' * A * Q = H .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to SGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= max(1,N).
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the n by n general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the orthogonal matrix Q as a product of elementary
+            reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) REAL array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) REAL array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
+
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
+
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
+
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+	*info = -2;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEHD2", &i__1);
+	return 0;
+    }
+
+    i__1 = *ihi - 1;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+
+/*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
+
+	i__2 = *ihi - i__;
+/* Computing MIN */
+	i__3 = i__ + 2;
+	slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		a_dim1], &c__1, &tau[i__]);
+	aii = a[i__ + 1 + i__ * a_dim1];
+	a[i__ + 1 + i__ * a_dim1] = 1.f;
+
+/*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
+
+	i__2 = *ihi - i__;
+	slarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
+
+/*        Apply H(i) to A(i+1:ihi,i+1:n) from the left */
+
+	i__2 = *ihi - i__;
+	i__3 = *n - i__;
+	slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
+
+	a[i__ + 1 + i__ * a_dim1] = aii;
+/* L10: */
+    }
+
+    return 0;
+
+/*     End of SGEHD2 */
+
+} /* sgehd2_ */
+
+/* Subroutine */ int sgehrd_(integer *n, integer *ilo, integer *ihi, real *a,
+	integer *lda, real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__;
+    static real t[4160]	/* was [65][64] */;
+    static integer ib;
+    static real ei;
+    static integer nb, nh, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *), sgehd2_(integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *), slarfb_(
+	    char *, char *, char *, char *, integer *, integer *, integer *,
+	    real *, integer *, real *, integer *, real *, integer *, real *,
+	    integer *), slahrd_(integer *,
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , real *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SGEHRD reduces a real general matrix A to upper Hessenberg form H by
+    an orthogonal similarity transformation:  Q' * A * Q = H .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to SGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the N-by-N general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the orthogonal matrix Q as a product of elementary
+            reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) REAL array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+            zero.
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
+
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
+
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
+
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MIN */
+    i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
+    lwkopt = *n * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+	*info = -2;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEHRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	tau[i__] = 0.f;
+/* L10: */
+    }
+    i__1 = *n - 1;
+    for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
+	tau[i__] = 0.f;
+/* L20: */
+    }
+
+/*     Quick return if possible */
+
+    nh = *ihi - *ilo + 1;
+    if (nh <= 1) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+/*
+       Determine the block size.
+
+   Computing MIN
+*/
+    i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
+    nbmin = 2;
+    iws = 1;
+    if (nb > 1 && nb < nh) {
+
+/*
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < nh) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    iws = *n * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code.
+
+   Computing MAX
+*/
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEHRD", " ", n, ilo, ihi, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+		if (*lwork >= *n * nbmin) {
+		    nb = *lwork / *n;
+		} else {
+		    nb = 1;
+		}
+	    }
+	}
+    }
+    ldwork = *n;
+
+    if (nb < nbmin || nb >= nh) {
+
+/*        Use unblocked code below */
+
+	i__ = *ilo;
+
+    } else {
+
+/*        Use blocked code */
+
+	i__1 = *ihi - 1 - nx;
+	i__2 = nb;
+	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = nb, i__4 = *ihi - i__;
+	    ib = min(i__3,i__4);
+
+/*
+             Reduce columns i:i+ib-1 to Hessenberg form, returning the
+             matrices V and T of the block reflector H = I - V*T*V'
+             which performs the reduction, and also the matrix Y = A*V*T
+*/
+
+	    slahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
+		    c__65, &work[1], &ldwork);
+
+/*
+             Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
+             right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+             to 1.
+*/
+
+	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.f;
+	    i__3 = *ihi - i__ - ib + 1;
+	    sgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b1150, &
+		    work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
+		    c_b871, &a[(i__ + ib) * a_dim1 + 1], lda);
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
+
+/*
+             Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
+             left
+*/
+
+	    i__3 = *ihi - i__;
+	    i__4 = *n - i__ - ib + 1;
+	    slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[
+		    i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork);
+/* L30: */
+	}
+    }
+
+/*     Use unblocked code to reduce the rest of the matrix */
+
+    sgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    work[1] = (real) iws;
+
+    return 0;
+
+/*     End of SGEHRD */
+
+} /* sgehrd_ */
+
+/* Subroutine */ int sgelq2_(integer *m, integer *n, real *a, integer *lda,
+	real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, k;
+    static real aii;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *), slarfg_(integer *, real *, real *,
+	    integer *, real *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SGELQ2 computes an LQ factorization of a real m by n matrix A:
+    A = L * Q.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m by min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) REAL array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) REAL array, dimension (M)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(k) . . . H(2) H(1), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELQ2", &i__1);
+	return 0;
+    }
+
+    k = min(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
+
+	i__2 = *n - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1]
+		, lda, &tau[i__]);
+	if (i__ < *m) {
+
+/*           Apply H(i) to A(i+1:m,i:n) from the right */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+	    slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
+		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of SGELQ2 */
+
+} /* sgelq2_ */
+
+/* Subroutine */ int sgelqf_(integer *m, integer *n, real *a, integer *lda,
+	real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int sgelq2_(integer *, integer *, real *, integer
+	    *, real *, real *, integer *), slarfb_(char *, char *, char *,
+	    char *, integer *, integer *, integer *, real *, integer *, real *
+	    , integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SGELQF computes an LQ factorization of a real M-by-N matrix A:
+    A = L * Q.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m-by-min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) REAL array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(k) . . . H(2) H(1), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *m * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELQF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SGELQF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGELQF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
+
+/*
+             Compute the LQ factorization of the current block
+             A(i:i+ib-1,i:n)
+*/
+
+	    i__3 = *n - i__ + 1;
+	    sgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *m) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__3 = *n - i__ + 1;
+		slarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i+ib:m,i:n) from the right */
+
+		i__3 = *m - i__ - ib + 1;
+		i__4 = *n - i__ + 1;
+		slarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
+			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
+			1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	sgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SGELQF */
+
+} /* sgelqf_ */
+
+/* Subroutine */ int sgeqr2_(integer *m, integer *n, real *a, integer *lda,
+	real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, k;
+    static real aii;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *), slarfg_(integer *, real *, real *,
+	    integer *, real *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SGEQR2 computes a QR factorization of a real m by n matrix A:
+    A = Q * R.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(m,n) by n upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) REAL array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) REAL array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQR2", &i__1);
+	return 0;
+    }
+
+    k = min(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1]
+		, &c__1, &tau[i__]);
+	if (i__ < *n) {
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of SGEQR2 */
+
+} /* sgeqr2_ */
+
+/* Subroutine */ int sgeqrf_(integer *m, integer *n, real *a, integer *lda,
+	real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer
+	    *, real *, real *, integer *), slarfb_(char *, char *, char *,
+	    char *, integer *, integer *, integer *, real *, integer *, real *
+	    , integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SGEQRF computes a QR factorization of a real M-by-N matrix A:
+    A = Q * R.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of min(m,n) elementary reflectors (see Further
+            Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) REAL array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *n * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQRF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
+
+/*
+             Compute the QR factorization of the current block
+             A(i:m,i:i+ib-1)
+*/
+
+	    i__3 = *m - i__ + 1;
+	    sgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *n) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__3 = *m - i__ + 1;
+		slarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H' to A(i:m,i+ib:n) from the left */
+
+		i__3 = *m - i__ + 1;
+		i__4 = *n - i__ - ib + 1;
+		slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib
+			+ 1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	sgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SGEQRF */
+
+} /* sgeqrf_ */
+
+/* Subroutine */ int sgesdd_(char *jobz, integer *m, integer *n, real *a,
+	integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt,
+	 real *work, integer *lwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2, i__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, ie, il, ir, iu, blk;
+    static real dum[1], eps;
+    static integer ivt, iscl;
+    static real anrm;
+    static integer idum[1], ierr, itau;
+    extern logical lsame_(char *, char *);
+    static integer chunk;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer minmn, wrkbl, itaup, itauq, mnthr;
+    static logical wntqa;
+    static integer nwork;
+    static logical wntqn, wntqo, wntqs;
+    static integer bdspac;
+    extern /* Subroutine */ int sbdsdc_(char *, char *, integer *, real *,
+	    real *, real *, integer *, real *, integer *, real *, integer *,
+	    real *, integer *, integer *), sgebrd_(integer *,
+	    integer *, real *, integer *, real *, real *, real *, real *,
+	    real *, integer *, integer *);
+    extern doublereal slamch_(char *), slange_(char *, integer *,
+	    integer *, real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static real bignum;
+    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer
+	    *, real *, real *, integer *, integer *), slascl_(char *, integer
+	    *, integer *, real *, real *, integer *, integer *, real *,
+	    integer *, integer *), sgeqrf_(integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *), slacpy_(char
+	    *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *,
+	    real *, integer *), sorgbr_(char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, integer *
+	    );
+    static integer ldwrkl;
+    extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *,
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , real *, integer *, integer *);
+    static integer ldwrkr, minwrk, ldwrku, maxwrk;
+    extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *);
+    static integer ldwkvt;
+    static real smlnum;
+    static logical wntqas;
+    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *);
+    static logical lquery;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SGESDD computes the singular value decomposition (SVD) of a real
+    M-by-N matrix A, optionally computing the left and right singular
+    vectors.  If singular vectors are desired, it uses a
+    divide-and-conquer algorithm.
+
+    The SVD is written
+
+         A = U * SIGMA * transpose(V)
+
+    where SIGMA is an M-by-N matrix which is zero except for its
+    min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
+    V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
+    are the singular values of A; they are real and non-negative, and
+    are returned in descending order.  The first min(m,n) columns of
+    U and V are the left and right singular vectors of A.
+
+    Note that the routine returns VT = V**T, not V.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    JOBZ    (input) CHARACTER*1
+            Specifies options for computing all or part of the matrix U:
+            = 'A':  all M columns of U and all N rows of V**T are
+                    returned in the arrays U and VT;
+            = 'S':  the first min(M,N) columns of U and the first
+                    min(M,N) rows of V**T are returned in the arrays U
+                    and VT;
+            = 'O':  If M >= N, the first N columns of U are overwritten
+                    on the array A and all rows of V**T are returned in
+                    the array VT;
+                    otherwise, all columns of U are returned in the
+                    array U and the first M rows of V**T are overwritten
+                    in the array VT;
+            = 'N':  no columns of U or rows of V**T are computed.
+
+    M       (input) INTEGER
+            The number of rows of the input matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the input matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit,
+            if JOBZ = 'O',  A is overwritten with the first N columns
+                            of U (the left singular vectors, stored
+                            columnwise) if M >= N;
+                            A is overwritten with the first M rows
+                            of V**T (the right singular vectors, stored
+                            rowwise) otherwise.
+            if JOBZ .ne. 'O', the contents of A are destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    S       (output) REAL array, dimension (min(M,N))
+            The singular values of A, sorted so that S(i) >= S(i+1).
+
+    U       (output) REAL array, dimension (LDU,UCOL)
+            UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
+            UCOL = min(M,N) if JOBZ = 'S'.
+            If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
+            orthogonal matrix U;
+            if JOBZ = 'S', U contains the first min(M,N) columns of U
+            (the left singular vectors, stored columnwise);
+            if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= 1; if
+            JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
+
+    VT      (output) REAL array, dimension (LDVT,N)
+            If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
+            N-by-N orthogonal matrix V**T;
+            if JOBZ = 'S', VT contains the first min(M,N) rows of
+            V**T (the right singular vectors, stored rowwise);
+            if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.  LDVT >= 1; if
+            JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
+            if JOBZ = 'S', LDVT >= min(M,N).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= 1.
+            If JOBZ = 'N',
+              LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)).
+            If JOBZ = 'O',
+              LWORK >= 3*min(M,N)*min(M,N) +
+                       max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)).
+            If JOBZ = 'S' or 'A'
+              LWORK >= 3*min(M,N)*min(M,N) +
+                       max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)).
+            For good performance, LWORK should generally be larger.
+            If LWORK < 0 but other input arguments are legal, WORK(1)
+            returns the optimal LWORK.
+
+    IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  SBDSDC did not converge, updating process failed.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = min(*m,*n);
+    mnthr = (integer) (minmn * 11.f / 6.f);
+    wntqa = lsame_(jobz, "A");
+    wntqs = lsame_(jobz, "S");
+    wntqas = wntqa || wntqs;
+    wntqo = lsame_(jobz, "O");
+    wntqn = lsame_(jobz, "N");
+    minwrk = 1;
+    maxwrk = 1;
+    lquery = *lwork == -1;
+
+    if (! (wntqa || wntqs || wntqo || wntqn)) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*ldu < 1 || wntqas && *ldu < *m || wntqo && *m < *n && *ldu < *
+	    m) {
+	*info = -8;
+    } else if (*ldvt < 1 || wntqa && *ldvt < *n || wntqs && *ldvt < minmn ||
+	    wntqo && *m >= *n && *ldvt < *n) {
+	*info = -10;
+    }
+
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         NB refers to the optimal block size for the immediately
+         following subroutine, as returned by ILAENV.)
+*/
+
+    if (*info == 0 && *m > 0 && *n > 0) {
+	if (*m >= *n) {
+
+/*           Compute space needed for SBDSDC */
+
+	    if (wntqn) {
+		bdspac = *n * 7;
+	    } else {
+		bdspac = *n * 3 * *n + (*n << 2);
+	    }
+	    if (*m >= mnthr) {
+		if (wntqn) {
+
+/*                 Path 1 (M much larger than N, JOBZ='N') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
+			    "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = bdspac + *n;
+		} else if (wntqo) {
+
+/*                 Path 2 (M much larger than N, JOBZ='O') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
+			    "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + (*n << 1) * *n;
+		    minwrk = bdspac + (*n << 1) * *n + *n * 3;
+		} else if (wntqs) {
+
+/*                 Path 3 (M much larger than N, JOBZ='S') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
+			    "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *n * *n;
+		    minwrk = bdspac + *n * *n + *n * 3;
+		} else if (wntqa) {
+
+/*                 Path 4 (M much larger than N, JOBZ='A') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "SORGQR",
+			    " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + (*n << 1) * ilaenv_(&c__1,
+			    "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *n * *n;
+		    minwrk = bdspac + *n * *n + *n * 3;
+		}
+	    } else {
+
+/*              Path 5 (M at least N, but not much larger) */
+
+		wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m,
+			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		if (wntqn) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *n * 3 + max(*m,bdspac);
+		} else if (wntqo) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *n;
+/* Computing MAX */
+		    i__1 = *m, i__2 = *n * *n + bdspac;
+		    minwrk = *n * 3 + max(i__1,i__2);
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *n * 3 + max(*m,bdspac);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = bdspac + *n * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *n * 3 + max(*m,bdspac);
+		}
+	    }
+	} else {
+
+/*           Compute space needed for SBDSDC */
+
+	    if (wntqn) {
+		bdspac = *m * 7;
+	    } else {
+		bdspac = *m * 3 * *m + (*m << 2);
+	    }
+	    if (*n >= mnthr) {
+		if (wntqn) {
+
+/*                 Path 1t (N much larger than M, JOBZ='N') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
+			    "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = bdspac + *m;
+		} else if (wntqo) {
+
+/*                 Path 2t (N much larger than M, JOBZ='O') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "SORGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
+			    "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + (*m << 1) * *m;
+		    minwrk = bdspac + (*m << 1) * *m + *m * 3;
+		} else if (wntqs) {
+
+/*                 Path 3t (N much larger than M, JOBZ='S') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "SORGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
+			    "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *m;
+		    minwrk = bdspac + *m * *m + *m * 3;
+		} else if (wntqa) {
+
+/*                 Path 4t (N much larger than M, JOBZ='A') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "SORGLQ",
+			    " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + (*m << 1) * ilaenv_(&c__1,
+			    "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)6, (
+			    ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *m;
+		    minwrk = bdspac + *m * *m + *m * 3;
+		}
+	    } else {
+
+/*              Path 5t (N greater than M, but not much larger) */
+
+		wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m,
+			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		if (wntqn) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3 + max(*n,bdspac);
+		} else if (wntqo) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *n;
+/* Computing MAX */
+		    i__1 = *n, i__2 = *m * *m + bdspac;
+		    minwrk = *m * 3 + max(i__1,i__2);
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3 + max(*n,bdspac);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3 + max(*n,bdspac);
+		}
+	    }
+	}
+	work[1] = (real) maxwrk;
+    }
+
+    if (*lwork < minwrk && ! lquery) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGESDD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	if (*lwork >= 1) {
+	    work[1] = 1.f;
+	}
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = sqrt(slamch_("S")) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", m, n, &a[a_offset], lda, dum);
+    iscl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+	iscl = 1;
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
+		ierr);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+    if (*m >= *n) {
+
+/*
+          A has at least as many rows as columns. If A has sufficiently
+          more rows than columns, first reduce using the QR
+          decomposition (if sufficient workspace available)
+*/
+
+	if (*m >= mnthr) {
+
+	    if (wntqn) {
+
+/*
+                Path 1 (M much larger than N, JOBZ='N')
+                No singular vectors to be computed
+*/
+
+		itau = 1;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (Workspace: need 2*N, prefer N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out below R */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		slaset_("L", &i__1, &i__2, &c_b1101, &c_b1101, &a[a_dim1 + 2],
+			 lda);
+		ie = 1;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in A
+                (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nwork = ie + *n;
+
+/*
+                Perform bidiagonal SVD, computing singular values only
+                (Workspace: need N+BDSPAC)
+*/
+
+		sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*
+                Path 2 (M much larger than N, JOBZ = 'O')
+                N left singular vectors to be overwritten on A and
+                N right singular vectors to be computed in VT
+*/
+
+		ir = 1;
+
+/*              WORK(IR) is LDWRKR by N */
+
+		if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) {
+		    ldwrkr = *lda;
+		} else {
+		    ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n;
+		}
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		slaset_("L", &i__1, &i__2, &c_b1101, &c_b1101, &work[ir + 1],
+			&ldwrkr);
+
+/*
+                Generate Q in A
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in VT, copying result to WORK(IR)
+                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*              WORK(IU) is N by N */
+
+		iu = nwork;
+		nwork = iu + *n * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in WORK(IU) and computing right
+                singular vectors of bidiagonal matrix in VT
+                (Workspace: need N+N*N+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite WORK(IU) by left singular vectors of R
+                and VT by right singular vectors of R
+                (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &work[iu], n, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IU), storing result in WORK(IR) and copying to A
+                (Workspace: need 2*N*N, prefer N*N+M*N)
+*/
+
+		i__1 = *m;
+		i__2 = ldwrkr;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *m - i__ + 1;
+		    chunk = min(i__3,ldwrkr);
+		    sgemm_("N", "N", &chunk, n, n, &c_b871, &a[i__ + a_dim1],
+			    lda, &work[iu], n, &c_b1101, &work[ir], &ldwrkr);
+		    slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+			    a_dim1], lda);
+/* L10: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Path 3 (M much larger than N, JOBZ='S')
+                N left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
+
+		ir = 1;
+
+/*              WORK(IR) is N by N */
+
+		ldwrkr = *n;
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		slaset_("L", &i__2, &i__1, &c_b1101, &c_b1101, &work[ir + 1],
+			&ldwrkr);
+
+/*
+                Generate Q in A
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in WORK(IR)
+                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagoal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need N+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of R and VT
+                by right singular vectors of R
+                (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IR), storing result in U
+                (Workspace: need N*N)
+*/
+
+		slacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
+		sgemm_("N", "N", m, n, n, &c_b871, &a[a_offset], lda, &work[
+			ir], &ldwrkr, &c_b1101, &u[u_offset], ldu);
+
+	    } else if (wntqa) {
+
+/*
+                Path 4 (M much larger than N, JOBZ='A')
+                M left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
+
+		iu = 1;
+
+/*              WORK(IU) is N by N */
+
+		ldwrku = *n;
+		itau = iu + ldwrku * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R, copying result to U
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+/*
+                Generate Q in U
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+		i__2 = *lwork - nwork + 1;
+		sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+
+/*              Produce R in A, zeroing out other entries */
+
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		slaset_("L", &i__2, &i__1, &c_b1101, &c_b1101, &a[a_dim1 + 2],
+			 lda);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in A
+                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in WORK(IU) and computing right
+                singular vectors of bidiagonal matrix in VT
+                (Workspace: need N+N*N+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite WORK(IU) by left singular vectors of R and VT
+                by right singular vectors of R
+                (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
+			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			ierr);
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply Q in U by left singular vectors of R in
+                WORK(IU), storing result in A
+                (Workspace: need N*N)
+*/
+
+		sgemm_("N", "N", m, n, n, &c_b871, &u[u_offset], ldu, &work[
+			iu], &ldwrku, &c_b1101, &a[a_offset], lda);
+
+/*              Copy left singular vectors of A from A to U */
+
+		slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+	    }
+
+	} else {
+
+/*
+             M .LT. MNTHR
+
+             Path 5 (M at least N, but not much larger)
+             Reduce to bidiagonal form without QR decomposition
+*/
+
+	    ie = 1;
+	    itauq = ie + *n;
+	    itaup = itauq + *n;
+	    nwork = itaup + *n;
+
+/*
+             Bidiagonalize A
+             (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*
+                Perform bidiagonal SVD, only computing singular values
+                (Workspace: need N+BDSPAC)
+*/
+
+		sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+	    } else if (wntqo) {
+		iu = nwork;
+		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+
+/*                 WORK( IU ) is M by N */
+
+		    ldwrku = *m;
+		    nwork = iu + ldwrku * *n;
+		    slaset_("F", m, n, &c_b1101, &c_b1101, &work[iu], &ldwrku);
+		} else {
+
+/*                 WORK( IU ) is N by N */
+
+		    ldwrku = *n;
+		    nwork = iu + ldwrku * *n;
+
+/*                 WORK(IR) is LDWRKR by N */
+
+		    ir = nwork;
+		    ldwrkr = (*lwork - *n * *n - *n * 3) / *n;
+		}
+		nwork = iu + ldwrku * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in WORK(IU) and computing right
+                singular vectors of bidiagonal matrix in VT
+                (Workspace: need N+N*N+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, &
+			vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[
+			1], info);
+
+/*
+                Overwrite VT by right singular vectors of A
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+
+/*
+                   Overwrite WORK(IU) by left singular vectors of A
+                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			    itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			    ierr);
+
+/*                 Copy left singular vectors of A from WORK(IU) to A */
+
+		    slacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate Q in A
+                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
+			    work[nwork], &i__2, &ierr);
+
+/*
+                   Multiply Q in A by left singular vectors of
+                   bidiagonal matrix in WORK(IU), storing result in
+                   WORK(IR) and copying to A
+                   (Workspace: need 2*N*N, prefer N*N+M*N)
+*/
+
+		    i__2 = *m;
+		    i__1 = ldwrkr;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *m - i__ + 1;
+			chunk = min(i__3,ldwrkr);
+			sgemm_("N", "N", &chunk, n, n, &c_b871, &a[i__ +
+				a_dim1], lda, &work[iu], &ldwrku, &c_b1101, &
+				work[ir], &ldwrkr);
+			slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+				a_dim1], lda);
+/* L20: */
+		    }
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need N+BDSPAC)
+*/
+
+		slaset_("F", m, n, &c_b1101, &c_b1101, &u[u_offset], ldu);
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need 3*N, prefer 2*N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else if (wntqa) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need N+BDSPAC)
+*/
+
+		slaset_("F", m, m, &c_b1101, &c_b1101, &u[u_offset], ldu);
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*              Set the right corner of U to identity matrix */
+
+		i__1 = *m - *n;
+		i__2 = *m - *n;
+		slaset_("F", &i__1, &i__2, &c_b1101, &c_b871, &u[*n + 1 + (*n
+			+ 1) * u_dim1], ldu);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    } else {
+
+/*
+          A has more columns than rows. If A has sufficiently more
+          columns than rows, first reduce using the LQ decomposition (if
+          sufficient workspace available)
+*/
+
+	if (*n >= mnthr) {
+
+	    if (wntqn) {
+
+/*
+                Path 1t (N much larger than M, JOBZ='N')
+                No singular vectors to be computed
+*/
+
+		itau = 1;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (Workspace: need 2*M, prefer M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out above L */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		slaset_("U", &i__1, &i__2, &c_b1101, &c_b1101, &a[(a_dim1 <<
+			1) + 1], lda);
+		ie = 1;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in A
+                (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nwork = ie + *m;
+
+/*
+                Perform bidiagonal SVD, computing singular values only
+                (Workspace: need M+BDSPAC)
+*/
+
+		sbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*
+                Path 2t (N much larger than M, JOBZ='O')
+                M right singular vectors to be overwritten on A and
+                M left singular vectors to be computed in U
+*/
+
+		ivt = 1;
+
+/*              IVT is M by M */
+
+		il = ivt + *m * *m;
+		if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) {
+
+/*                 WORK(IL) is M by N */
+
+		    ldwrkl = *m;
+		    chunk = *n;
+		} else {
+		    ldwrkl = *m;
+		    chunk = (*lwork - *m * *m) / *m;
+		}
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy L to WORK(IL), zeroing about above it */
+
+		slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		slaset_("U", &i__1, &i__2, &c_b1101, &c_b1101, &work[il +
+			ldwrkl], &ldwrkl);
+
+/*
+                Generate Q in A
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in WORK(IL)
+                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U, and computing right singular
+                vectors of bidiagonal matrix in WORK(IVT)
+                (Workspace: need M+M*M+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], m, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of L and WORK(IVT)
+                by right singular vectors of L
+                (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &work[ivt], m, &work[nwork], &i__1, &ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IVT) by Q
+                in A, storing result in WORK(IL) and copying to A
+                (Workspace: need 2*M*M, prefer M*M+M*N)
+*/
+
+		i__1 = *n;
+		i__2 = chunk;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *n - i__ + 1;
+		    blk = min(i__3,chunk);
+		    sgemm_("N", "N", m, &blk, m, &c_b871, &work[ivt], m, &a[
+			    i__ * a_dim1 + 1], lda, &c_b1101, &work[il], &
+			    ldwrkl);
+		    slacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
+			    + 1], lda);
+/* L30: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Path 3t (N much larger than M, JOBZ='S')
+                M right singular vectors to be computed in VT and
+                M left singular vectors to be computed in U
+*/
+
+		il = 1;
+
+/*              WORK(IL) is M by M */
+
+		ldwrkl = *m;
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy L to WORK(IL), zeroing out above it */
+
+		slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		slaset_("U", &i__2, &i__1, &c_b1101, &c_b1101, &work[il +
+			ldwrkl], &ldwrkl);
+
+/*
+                Generate Q in A
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in WORK(IU), copying result to U
+                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need M+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of L and VT
+                by right singular vectors of L
+                (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IL) by
+                Q in A, storing result in VT
+                (Workspace: need M*M)
+*/
+
+		slacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
+		sgemm_("N", "N", m, n, m, &c_b871, &work[il], &ldwrkl, &a[
+			a_offset], lda, &c_b1101, &vt[vt_offset], ldvt);
+
+	    } else if (wntqa) {
+
+/*
+                Path 4t (N much larger than M, JOBZ='A')
+                N right singular vectors to be computed in VT and
+                M left singular vectors to be computed in U
+*/
+
+		ivt = 1;
+
+/*              WORK(IVT) is M by M */
+
+		ldwkvt = *m;
+		itau = ivt + ldwkvt * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q, copying result to VT
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+/*
+                Generate Q in VT
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
+			nwork], &i__2, &ierr);
+
+/*              Produce L in A, zeroing out other entries */
+
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		slaset_("U", &i__2, &i__1, &c_b1101, &c_b1101, &a[(a_dim1 <<
+			1) + 1], lda);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in A
+                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in WORK(IVT)
+                (Workspace: need M+M*M+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
+			, info);
+
+/*
+                Overwrite U by left singular vectors of L and WORK(IVT)
+                by right singular vectors of L
+                (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[
+			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IVT) by
+                Q in VT, storing result in A
+                (Workspace: need M*M)
+*/
+
+		sgemm_("N", "N", m, n, m, &c_b871, &work[ivt], &ldwkvt, &vt[
+			vt_offset], ldvt, &c_b1101, &a[a_offset], lda);
+
+/*              Copy right singular vectors of A from A to VT */
+
+		slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+	    }
+
+	} else {
+
+/*
+             N .LT. MNTHR
+
+             Path 5t (N greater than M, but not much larger)
+             Reduce to bidiagonal form without LQ decomposition
+*/
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+             Bidiagonalize A
+             (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*
+                Perform bidiagonal SVD, only computing singular values
+                (Workspace: need M+BDSPAC)
+*/
+
+		sbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+	    } else if (wntqo) {
+		ldwkvt = *m;
+		ivt = nwork;
+		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+
+/*                 WORK( IVT ) is M by N */
+
+		    slaset_("F", m, n, &c_b1101, &c_b1101, &work[ivt], &
+			    ldwkvt);
+		    nwork = ivt + ldwkvt * *n;
+		} else {
+
+/*                 WORK( IVT ) is M by M */
+
+		    nwork = ivt + ldwkvt * *m;
+		    il = nwork;
+
+/*                 WORK(IL) is M by CHUNK */
+
+		    chunk = (*lwork - *m * *m - *m * 3) / *m;
+		}
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in WORK(IVT)
+                (Workspace: need M*M+BDSPAC)
+*/
+
+		sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
+			, info);
+
+/*
+                Overwrite U by left singular vectors of A
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+
+/*
+                   Overwrite WORK(IVT) by left singular vectors of A
+                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2,
+			    &ierr);
+
+/*                 Copy right singular vectors of A from WORK(IVT) to A */
+
+		    slacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate P**T in A
+                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
+			    work[nwork], &i__2, &ierr);
+
+/*
+                   Multiply Q in A by right singular vectors of
+                   bidiagonal matrix in WORK(IVT), storing result in
+                   WORK(IL) and copying to A
+                   (Workspace: need 2*M*M, prefer M*M+M*N)
+*/
+
+		    i__2 = *n;
+		    i__1 = chunk;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *n - i__ + 1;
+			blk = min(i__3,chunk);
+			sgemm_("N", "N", m, &blk, m, &c_b871, &work[ivt], &
+				ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b1101, &
+				work[il], m);
+			slacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 +
+				1], lda);
+/* L40: */
+		    }
+		}
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need M+BDSPAC)
+*/
+
+		slaset_("F", m, n, &c_b1101, &c_b1101, &vt[vt_offset], ldvt);
+		sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need 3*M, prefer 2*M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else if (wntqa) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need M+BDSPAC)
+*/
+
+		slaset_("F", n, n, &c_b1101, &c_b1101, &vt[vt_offset], ldvt);
+		sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*              Set the right corner of VT to identity matrix */
+
+		i__1 = *n - *m;
+		i__2 = *n - *m;
+		slaset_("F", &i__1, &i__2, &c_b1101, &c_b871, &vt[*m + 1 + (*
+			m + 1) * vt_dim1], ldvt);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need 2*M+N, prefer 2*M+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    }
+
+/*     Undo scaling if necessary */
+
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (anrm < smlnum) {
+	    slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+    }
+
+/*     Return optimal workspace in WORK(1) */
+
+    work[1] = (real) maxwrk;
+
+    return 0;
+
+/*     End of SGESDD */
+
+} /* sgesdd_ */
+
+/* Subroutine */ int sgesv_(integer *n, integer *nrhs, real *a, integer *lda,
+	integer *ipiv, real *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int xerbla_(char *, integer *), sgetrf_(
+	    integer *, integer *, real *, integer *, integer *, integer *),
+	    sgetrs_(char *, integer *, integer *, real *, integer *, integer *
+	    , real *, integer *, integer *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    SGESV computes the solution to a real system of linear equations
+       A * X = B,
+    where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+
+    The LU decomposition with partial pivoting and row interchanges is
+    used to factor A as
+       A = P * L * U,
+    where P is a permutation matrix, L is unit lower triangular, and U is
+    upper triangular.  The factored form of A is then used to solve the
+    system of equations A * X = B.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of linear equations, i.e., the order of the
+            matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the N-by-N coefficient matrix A.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    IPIV    (output) INTEGER array, dimension (N)
+            The pivot indices that define the permutation matrix P;
+            row i of the matrix was interchanged with row IPIV(i).
+
+    B       (input/output) REAL array, dimension (LDB,NRHS)
+            On entry, the N-by-NRHS matrix of right hand side matrix B.
+            On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+                  has been completed, but the factor U is exactly
+                  singular, so the solution could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*nrhs < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGESV ", &i__1);
+	return 0;
+    }
+
+/*     Compute the LU factorization of A. */
+
+    sgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
+    if (*info == 0) {
+
+/*        Solve the system A*X = B, overwriting B with X. */
+
+	sgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
+		b_offset], ldb, info);
+    }
+    return 0;
+
+/*     End of SGESV */
+
+} /* sgesv_ */
+
+/* Subroutine */ int sgetf2_(integer *m, integer *n, real *a, integer *lda,
+	integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1;
+
+    /* Local variables */
+    static integer j, jp;
+    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, integer *), sscal_(integer *
+	    , real *, real *, integer *), sswap_(integer *, real *, integer *,
+	     real *, integer *), xerbla_(char *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1992
+
+
+    Purpose
+    =======
+
+    SGETF2 computes an LU factorization of a general m-by-n matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the m by n matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, U(k,k) is exactly zero. The factorization
+                 has been completed, but the factor U is exactly
+                 singular, and division by zero will occur if it is used
+                 to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETF2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+    i__1 = min(*m,*n);
+    for (j = 1; j <= i__1; ++j) {
+
+/*        Find pivot and test for singularity. */
+
+	i__2 = *m - j + 1;
+	jp = j - 1 + isamax_(&i__2, &a[j + j * a_dim1], &c__1);
+	ipiv[j] = jp;
+	if (a[jp + j * a_dim1] != 0.f) {
+
+/*           Apply the interchange to columns 1:N. */
+
+	    if (jp != j) {
+		sswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
+	    }
+
+/*           Compute elements J+1:M of J-th column. */
+
+	    if (j < *m) {
+		i__2 = *m - j;
+		r__1 = 1.f / a[j + j * a_dim1];
+		sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+
+	} else if (*info == 0) {
+
+	    *info = j;
+	}
+
+	if (j < min(*m,*n)) {
+
+/*           Update trailing submatrix. */
+
+	    i__2 = *m - j;
+	    i__3 = *n - j;
+	    sger_(&i__2, &i__3, &c_b1150, &a[j + 1 + j * a_dim1], &c__1, &a[j
+		    + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1],
+		    lda);
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of SGETF2 */
+
+} /* sgetf2_ */
+
+/* Subroutine */ int sgetrf_(integer *m, integer *n, real *a, integer *lda,
+	integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    static integer i__, j, jb, nb, iinfo;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *), strsm_(char *, char *, char *,
+	     char *, integer *, integer *, real *, real *, integer *, real *,
+	    integer *), sgetf2_(integer *,
+	    integer *, real *, integer *, integer *, integer *), xerbla_(char
+	    *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer
+	    *, integer *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    SGETRF computes an LU factorization of a general M-by-N matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 3 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the M-by-N matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+                  has been completed, but the factor U is exactly
+                  singular, and division by zero will occur if it is used
+                  to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETRF", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "SGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    if (nb <= 1 || nb >= min(*m,*n)) {
+
+/*        Use unblocked code. */
+
+	sgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
+    } else {
+
+/*        Use blocked code. */
+
+	i__1 = min(*m,*n);
+	i__2 = nb;
+	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+	    i__3 = min(*m,*n) - j + 1;
+	    jb = min(i__3,nb);
+
+/*
+             Factor diagonal and subdiagonal blocks and test for exact
+             singularity.
+*/
+
+	    i__3 = *m - j + 1;
+	    sgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
+
+/*           Adjust INFO and the pivot indices. */
+
+	    if (*info == 0 && iinfo > 0) {
+		*info = iinfo + j - 1;
+	    }
+/* Computing MIN */
+	    i__4 = *m, i__5 = j + jb - 1;
+	    i__3 = min(i__4,i__5);
+	    for (i__ = j; i__ <= i__3; ++i__) {
+		ipiv[i__] = j - 1 + ipiv[i__];
+/* L10: */
+	    }
+
+/*           Apply interchanges to columns 1:J-1. */
+
+	    i__3 = j - 1;
+	    i__4 = j + jb - 1;
+	    slaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
+
+	    if (j + jb <= *n) {
+
+/*              Apply interchanges to columns J+JB:N. */
+
+		i__3 = *n - j - jb + 1;
+		i__4 = j + jb - 1;
+		slaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
+			ipiv[1], &c__1);
+
+/*              Compute block row of U. */
+
+		i__3 = *n - j - jb + 1;
+		strsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
+			c_b871, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
+			a_dim1], lda);
+		if (j + jb <= *m) {
+
+/*                 Update trailing submatrix. */
+
+		    i__3 = *m - j - jb + 1;
+		    i__4 = *n - j - jb + 1;
+		    sgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
+			    &c_b1150, &a[j + jb + j * a_dim1], lda, &a[j + (j
+			    + jb) * a_dim1], lda, &c_b871, &a[j + jb + (j +
+			    jb) * a_dim1], lda);
+		}
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of SGETRF */
+
+} /* sgetrf_ */
+
+/* Subroutine */ int sgetrs_(char *trans, integer *n, integer *nrhs, real *a,
+	integer *lda, integer *ipiv, real *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int strsm_(char *, char *, char *, char *,
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), xerbla_(char *, integer *);
+    static logical notran;
+    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer
+	    *, integer *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    SGETRS solves a system of linear equations
+       A * X = B  or  A' * X = B
+    with a general N-by-N matrix A using the LU factorization computed
+    by SGETRF.
+
+    Arguments
+    =========
+
+    TRANS   (input) CHARACTER*1
+            Specifies the form of the system of equations:
+            = 'N':  A * X = B  (No transpose)
+            = 'T':  A'* X = B  (Transpose)
+            = 'C':  A'* X = B  (Conjugate transpose = Transpose)
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The factors L and U from the factorization A = P*L*U
+            as computed by SGETRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    IPIV    (input) INTEGER array, dimension (N)
+            The pivot indices from SGETRF; for 1<=i<=N, row i of the
+            matrix was interchanged with row IPIV(i).
+
+    B       (input/output) REAL array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
+
+    if (notran) {
+
+/*
+          Solve A * X = B.
+
+          Apply row interchanges to the right hand sides.
+*/
+
+	slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
+
+/*        Solve L*X = B, overwriting B with X. */
+
+	strsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b871, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b871,
+		&a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A' * X = B.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b871, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	strsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b871, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Apply row interchanges to the solution vectors. */
+
+	slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
+    }
+
+    return 0;
+
+/*     End of SGETRS */
+
+} /* sgetrs_ */
+
+/* Subroutine */ int shseqr_(char *job, char *compz, integer *n, integer *ilo,
+	 integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__,
+	 integer *ldz, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    real r__1, r__2;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__, j, k, l;
+    static real s[225]	/* was [15][15] */, v[16];
+    static integer i1, i2, ii, nh, nr, ns, nv;
+    static real vv[16];
+    static integer itn;
+    static real tau;
+    static integer its;
+    static real ulp, tst1;
+    static integer maxb;
+    static real absw;
+    static integer ierr;
+    static real unfl, temp, ovfl;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static integer itemp;
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    static logical initz, wantt;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    static logical wantz;
+    extern doublereal slapy2_(real *, real *);
+    extern /* Subroutine */ int slabad_(real *, real *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
+	    real *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern doublereal slanhs_(char *, integer *, real *, integer *, real *);
+    extern /* Subroutine */ int slahqr_(logical *, logical *, integer *,
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , integer *, real *, integer *, integer *), slacpy_(char *,
+	    integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *,
+	    real *, integer *), slarfx_(char *, integer *, integer *,
+	    real *, real *, real *, integer *, real *);
+    static real smlnum;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SHSEQR computes the eigenvalues of a real upper Hessenberg matrix H
+    and, optionally, the matrices T and Z from the Schur decomposition
+    H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur
+    form), and Z is the orthogonal matrix of Schur vectors.
+
+    Optionally Z may be postmultiplied into an input orthogonal matrix Q,
+    so that this routine can give the Schur factorization of a matrix A
+    which has been reduced to the Hessenberg form H by the orthogonal
+    matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            = 'E':  compute eigenvalues only;
+            = 'S':  compute eigenvalues and the Schur form T.
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  no Schur vectors are computed;
+            = 'I':  Z is initialized to the unit matrix and the matrix Z
+                    of Schur vectors of H is returned;
+            = 'V':  Z must contain an orthogonal matrix Q on entry, and
+                    the product Q*Z is returned.
+
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to SGEBAL, and then passed to SGEHRD
+            when the matrix output by SGEBAL is reduced to Hessenberg
+            form. Otherwise ILO and IHI should be set to 1 and N
+            respectively.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    H       (input/output) REAL array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if JOB = 'S', H contains the upper quasi-triangular
+            matrix T from the Schur decomposition (the Schur form);
+            2-by-2 diagonal blocks (corresponding to complex conjugate
+            pairs of eigenvalues) are returned in standard form, with
+            H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E',
+            the contents of H are unspecified on exit.
+
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
+
+    WR      (output) REAL array, dimension (N)
+    WI      (output) REAL array, dimension (N)
+            The real and imaginary parts, respectively, of the computed
+            eigenvalues. If two eigenvalues are computed as a complex
+            conjugate pair, they are stored in consecutive elements of
+            WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and
+            WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the
+            same order as on the diagonal of the Schur form returned in
+            H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
+            diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and
+            WI(i+1) = -WI(i).
+
+    Z       (input/output) REAL array, dimension (LDZ,N)
+            If COMPZ = 'N': Z is not referenced.
+            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
+            contains the orthogonal matrix Z of the Schur vectors of H.
+            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
+            which is assumed to be equal to the unit matrix except for
+            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
+            Normally Q is the orthogonal matrix generated by SORGHR after
+            the call to SGEHRD which formed the Hessenberg matrix H.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.
+            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, SHSEQR failed to compute all of the
+                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
+                  elements 1:ilo-1 and i+1:n of WR and WI contain those
+                  eigenvalues which have been successfully computed.
+
+    =====================================================================
+
+
+       Decode and test the input parameters
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    wantt = lsame_(job, "S");
+    initz = lsame_(compz, "I");
+    wantz = initz || lsame_(compz, "V");
+
+    *info = 0;
+    work[1] = (real) max(1,*n);
+    lquery = *lwork == -1;
+    if (! lsame_(job, "E") && ! wantt) {
+	*info = -1;
+    } else if (! lsame_(compz, "N") && ! wantz) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ilo < 1 || *ilo > max(1,*n)) {
+	*info = -4;
+    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
+	*info = -5;
+    } else if (*ldh < max(1,*n)) {
+	*info = -7;
+    } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) {
+	*info = -11;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SHSEQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Initialize Z, if necessary */
+
+    if (initz) {
+	slaset_("Full", n, n, &c_b1101, &c_b871, &z__[z_offset], ldz);
+    }
+
+/*     Store the eigenvalues isolated by SGEBAL. */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.f;
+/* L10: */
+    }
+    i__1 = *n;
+    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.f;
+/* L20: */
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
+	wi[*ilo] = 0.f;
+	return 0;
+    }
+
+/*
+       Set rows and columns ILO to IHI to zero below the first
+       subdiagonal.
+*/
+
+    i__1 = *ihi - 2;
+    for (j = *ilo; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = j + 2; i__ <= i__2; ++i__) {
+	    h__[i__ + j * h_dim1] = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    nh = *ihi - *ilo + 1;
+
+/*
+       Determine the order of the multi-shift QR algorithm to be used.
+
+   Writing concatenation
+*/
+    i__3[0] = 1, a__1[0] = job;
+    i__3[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+    ns = ilaenv_(&c__4, "SHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+/* Writing concatenation */
+    i__3[0] = 1, a__1[0] = job;
+    i__3[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+    maxb = ilaenv_(&c__8, "SHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (ns <= 2 || ns > nh || maxb >= nh) {
+
+/*        Use the standard double-shift algorithm */
+
+	slahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
+		1], ilo, ihi, &z__[z_offset], ldz, info);
+	return 0;
+    }
+    maxb = max(3,maxb);
+/* Computing MIN */
+    i__1 = min(ns,maxb);
+    ns = min(i__1,15);
+
+/*
+       Now 2 < NS <= MAXB < NH.
+
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
+
+    unfl = slamch_("Safe minimum");
+    ovfl = 1.f / unfl;
+    slabad_(&unfl, &ovfl);
+    ulp = slamch_("Precision");
+    smlnum = unfl * (nh / ulp);
+
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are set inside the main loop.
+*/
+
+    if (wantt) {
+	i1 = 1;
+	i2 = *n;
+    }
+
+/*     ITN is the total number of multiple-shift QR iterations allowed. */
+
+    itn = nh * 30;
+
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of at most MAXB. Each iteration of the loop
+       works with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
+
+    i__ = *ihi;
+L50:
+    l = *ilo;
+    if (i__ < *ilo) {
+	goto L170;
+    }
+
+/*
+       Perform multiple-shift QR iterations on rows and columns ILO to I
+       until a submatrix of order at most MAXB splits off at the bottom
+       because a subdiagonal element has become negligible.
+*/
+
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2
+		    = h__[k + k * h_dim1], dabs(r__2));
+	    if (tst1 == 0.f) {
+		i__4 = i__ - l + 1;
+		tst1 = slanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1]
+			);
+	    }
+/* Computing MAX */
+	    r__2 = ulp * tst1;
+	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2,
+		    smlnum)) {
+		goto L70;
+	    }
+/* L60: */
+	}
+L70:
+	l = k;
+	if (l > *ilo) {
+
+/*           H(L,L-1) is negligible. */
+
+	    h__[l + (l - 1) * h_dim1] = 0.f;
+	}
+
+/*        Exit from loop if a submatrix of order <= MAXB has split off. */
+
+	if (l >= i__ - maxb + 1) {
+	    goto L160;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! wantt) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if (its == 20 || its == 30) {
+
+/*           Exceptional shifts. */
+
+	    i__2 = i__;
+	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
+		wr[ii] = ((r__1 = h__[ii + (ii - 1) * h_dim1], dabs(r__1)) + (
+			r__2 = h__[ii + ii * h_dim1], dabs(r__2))) * 1.5f;
+		wi[ii] = 0.f;
+/* L80: */
+	    }
+	} else {
+
+/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
+
+	    slacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
+		    h_dim1], ldh, s, &c__15);
+	    slahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ -
+		    ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset],
+		    ldz, &ierr);
+	    if (ierr > 0) {
+
+/*
+                If SLAHQR failed to compute all NS eigenvalues, use the
+                unconverged diagonal elements as the remaining shifts.
+*/
+
+		i__2 = ierr;
+		for (ii = 1; ii <= i__2; ++ii) {
+		    wr[i__ - ns + ii] = s[ii + ii * 15 - 16];
+		    wi[i__ - ns + ii] = 0.f;
+/* L90: */
+		}
+	    }
+	}
+
+/*
+          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
+          where G is the Hessenberg submatrix H(L:I,L:I) and w is
+          the vector of shifts (stored in WR and WI). The result is
+          stored in the local array V.
+*/
+
+	v[0] = 1.f;
+	i__2 = ns + 1;
+	for (ii = 2; ii <= i__2; ++ii) {
+	    v[ii - 1] = 0.f;
+/* L100: */
+	}
+	nv = 1;
+	i__2 = i__;
+	for (j = i__ - ns + 1; j <= i__2; ++j) {
+	    if (wi[j] >= 0.f) {
+		if (wi[j] == 0.f) {
+
+/*                 real shift */
+
+		    i__4 = nv + 1;
+		    scopy_(&i__4, v, &c__1, vv, &c__1);
+		    i__4 = nv + 1;
+		    r__1 = -wr[j];
+		    sgemv_("No transpose", &i__4, &nv, &c_b871, &h__[l + l *
+			    h_dim1], ldh, vv, &c__1, &r__1, v, &c__1);
+		    ++nv;
+		} else if (wi[j] > 0.f) {
+
+/*                 complex conjugate pair of shifts */
+
+		    i__4 = nv + 1;
+		    scopy_(&i__4, v, &c__1, vv, &c__1);
+		    i__4 = nv + 1;
+		    r__1 = wr[j] * -2.f;
+		    sgemv_("No transpose", &i__4, &nv, &c_b871, &h__[l + l *
+			    h_dim1], ldh, v, &c__1, &r__1, vv, &c__1);
+		    i__4 = nv + 1;
+		    itemp = isamax_(&i__4, vv, &c__1);
+/* Computing MAX */
+		    r__2 = (r__1 = vv[itemp - 1], dabs(r__1));
+		    temp = 1.f / dmax(r__2,smlnum);
+		    i__4 = nv + 1;
+		    sscal_(&i__4, &temp, vv, &c__1);
+		    absw = slapy2_(&wr[j], &wi[j]);
+		    temp = temp * absw * absw;
+		    i__4 = nv + 2;
+		    i__5 = nv + 1;
+		    sgemv_("No transpose", &i__4, &i__5, &c_b871, &h__[l + l *
+			     h_dim1], ldh, vv, &c__1, &temp, v, &c__1);
+		    nv += 2;
+		}
+
+/*
+                Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
+                reset it to the unit vector.
+*/
+
+		itemp = isamax_(&nv, v, &c__1);
+		temp = (r__1 = v[itemp - 1], dabs(r__1));
+		if (temp == 0.f) {
+		    v[0] = 1.f;
+		    i__4 = nv;
+		    for (ii = 2; ii <= i__4; ++ii) {
+			v[ii - 1] = 0.f;
+/* L110: */
+		    }
+		} else {
+		    temp = dmax(temp,smlnum);
+		    r__1 = 1.f / temp;
+		    sscal_(&nv, &r__1, v, &c__1);
+		}
+	    }
+/* L120: */
+	}
+
+/*        Multiple-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = l; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix. NR is the order of G.
+
+   Computing MIN
+*/
+	    i__4 = ns + 1, i__5 = i__ - k + 1;
+	    nr = min(i__4,i__5);
+	    if (k > l) {
+		scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    slarfg_(&nr, v, &v[1], &c__1, &tau);
+	    if (k > l) {
+		h__[k + (k - 1) * h_dim1] = v[0];
+		i__4 = i__;
+		for (ii = k + 1; ii <= i__4; ++ii) {
+		    h__[ii + (k - 1) * h_dim1] = 0.f;
+/* L130: */
+		}
+	    }
+	    v[0] = 1.f;
+
+/*
+             Apply G from the left to transform the rows of the matrix in
+             columns K to I2.
+*/
+
+	    i__4 = i2 - k + 1;
+	    slarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, &
+		    work[1]);
+
+/*
+             Apply G from the right to transform the columns of the
+             matrix in rows I1 to min(K+NR,I).
+
+   Computing MIN
+*/
+	    i__5 = k + nr;
+	    i__4 = min(i__5,i__) - i1 + 1;
+	    slarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
+		    &work[1]);
+
+	    if (wantz) {
+
+/*              Accumulate transformations in the matrix Z */
+
+		slarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
+			ldz, &work[1]);
+	    }
+/* L140: */
+	}
+
+/* L150: */
+    }
+
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
+    return 0;
+
+L160:
+
+/*
+       A submatrix of order <= MAXB in rows and columns L to I has split
+       off. Use the double-shift QR algorithm to handle it.
+*/
+
+    slahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1],
+	    ilo, ihi, &z__[z_offset], ldz, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with a new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L50;
+
+L170:
+    work[1] = (real) max(1,*n);
+    return 0;
+
+/*     End of SHSEQR */
+
+} /* shseqr_ */
+
+/* Subroutine */ int slabad_(real *small, real *large)
+{
+    /* Builtin functions */
+    double r_lg10(real *), sqrt(doublereal);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLABAD takes as input the values computed by SLAMCH for underflow and
+    overflow, and returns the square root of each of these values if the
+    log of LARGE is sufficiently large.  This subroutine is intended to
+    identify machines with a large exponent range, such as the Crays, and
+    redefine the underflow and overflow limits to be the square roots of
+    the values computed by SLAMCH.  This subroutine is needed because
+    SLAMCH does not compensate for poor arithmetic in the upper half of
+    the exponent range, as is found on a Cray.
+
+    Arguments
+    =========
+
+    SMALL   (input/output) REAL
+            On entry, the underflow threshold as computed by SLAMCH.
+            On exit, if LOG10(LARGE) is sufficiently large, the square
+            root of SMALL, otherwise unchanged.
+
+    LARGE   (input/output) REAL
+            On entry, the overflow threshold as computed by SLAMCH.
+            On exit, if LOG10(LARGE) is sufficiently large, the square
+            root of LARGE, otherwise unchanged.
+
+    =====================================================================
+
+
+       If it looks like we're on a Cray, take the square root of
+       SMALL and LARGE to avoid overflow and underflow problems.
+*/
+
+    if (r_lg10(large) > 2e3f) {
+	*small = sqrt(*small);
+	*large = sqrt(*large);
+    }
+
+    return 0;
+
+/*     End of SLABAD */
+
+} /* slabad_ */
+
+/* Subroutine */ int slabrd_(integer *m, integer *n, integer *nb, real *a,
+	integer *lda, real *d__, real *e, real *tauq, real *taup, real *x,
+	integer *ldx, real *y, integer *ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+
+    /* Local variables */
+    static integer i__;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *), slarfg_(
+	    integer *, real *, real *, integer *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLABRD reduces the first NB rows and columns of a real general
+    m by n matrix A to upper or lower bidiagonal form by an orthogonal
+    transformation Q' * A * P, and returns the matrices X and Y which
+    are needed to apply the transformation to the unreduced part of A.
+
+    If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
+    bidiagonal form.
+
+    This is an auxiliary routine called by SGEBRD
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.
+
+    NB      (input) INTEGER
+            The number of leading rows and columns of A to be reduced.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit, the first NB rows and columns of the matrix are
+            overwritten; the rest of the array is unchanged.
+            If m >= n, elements on and below the diagonal in the first NB
+              columns, with the array TAUQ, represent the orthogonal
+              matrix Q as a product of elementary reflectors; and
+              elements above the diagonal in the first NB rows, with the
+              array TAUP, represent the orthogonal matrix P as a product
+              of elementary reflectors.
+            If m < n, elements below the diagonal in the first NB
+              columns, with the array TAUQ, represent the orthogonal
+              matrix Q as a product of elementary reflectors, and
+              elements on and above the diagonal in the first NB rows,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) REAL array, dimension (NB)
+            The diagonal elements of the first NB rows and columns of
+            the reduced matrix.  D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (NB)
+            The off-diagonal elements of the first NB rows and columns of
+            the reduced matrix.
+
+    TAUQ    (output) REAL array dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix Q. See Further Details.
+
+    TAUP    (output) REAL array, dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix P. See Further Details.
+
+    X       (output) REAL array, dimension (LDX,NB)
+            The m-by-nb matrix X required to update the unreduced part
+            of A.
+
+    LDX     (input) INTEGER
+            The leading dimension of the array X. LDX >= M.
+
+    Y       (output) REAL array, dimension (LDY,NB)
+            The n-by-nb matrix Y required to update the unreduced part
+            of A.
+
+    LDY     (output) INTEGER
+            The leading dimension of the array Y. LDY >= N.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+       Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors.
+
+    If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
+    A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
+    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The elements of the vectors v and u together form the m-by-nb matrix
+    V and the nb-by-n matrix U' which are needed, with X and Y, to apply
+    the transformation to the unreduced part of the matrix, using a block
+    update of the form:  A := A - V*Y' - X*U'.
+
+    The contents of A on exit are illustrated by the following examples
+    with nb = 2:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
+      (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
+      (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )
+
+    where a denotes an element of the original matrix which is unchanged,
+    vi denotes an element of the vector defining H(i), and ui an element
+    of the vector defining G(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+
+    /* Function Body */
+    if (*m <= 0 || *n <= 0) {
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i:m,i) */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[i__ + a_dim1],
+		    lda, &y[i__ + y_dim1], ldy, &c_b871, &a[i__ + i__ *
+		    a_dim1], &c__1);
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    sgemv_("No transpose", &i__2, &i__3, &c_b1150, &x[i__ + x_dim1],
+		    ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b871, &a[i__ + i__ *
+		    a_dim1], &c__1);
+
+/*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
+		    a_dim1], &c__1, &tauq[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    if (i__ < *n) {
+		a[i__ + i__ * a_dim1] = 1.f;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[i__ + (i__ + 1)
+			* a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &
+			c_b1101, &y[i__ + 1 + i__ * y_dim1], &c__1)
+			;
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[i__ + a_dim1],
+			lda, &a[i__ + i__ * a_dim1], &c__1, &c_b1101, &y[i__ *
+			 y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1150, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b871, &
+			y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		sgemv_("Transpose", &i__2, &i__3, &c_b871, &x[i__ + x_dim1],
+			ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b1101, &y[i__ *
+			 y_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1150, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b871, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *n - i__;
+		sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+
+/*              Update A(i,i+1:n) */
+
+		i__2 = *n - i__;
+		sgemv_("No transpose", &i__2, &i__, &c_b1150, &y[i__ + 1 +
+			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b871, &a[i__
+			+ (i__ + 1) * a_dim1], lda);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1150, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b871, &a[
+			i__ + (i__ + 1) * a_dim1], lda);
+
+/*              Generate reflection P(i) to annihilate A(i,i+2:n) */
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
+			i__3,*n) * a_dim1], lda, &taup[i__]);
+		e[i__] = a[i__ + (i__ + 1) * a_dim1];
+		a[i__ + (i__ + 1) * a_dim1] = 1.f;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		sgemv_("No transpose", &i__2, &i__3, &c_b871, &a[i__ + 1 + (
+			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
+			 lda, &c_b1101, &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__;
+		sgemv_("Transpose", &i__2, &i__, &c_b871, &y[i__ + 1 + y_dim1]
+			, ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &c_b1101, &
+			x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		sgemv_("No transpose", &i__2, &i__, &c_b1150, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b871, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		sgemv_("No transpose", &i__2, &i__3, &c_b871, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b1101, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1150, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b871, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i,i:n) */
+
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    sgemv_("No transpose", &i__2, &i__3, &c_b1150, &y[i__ + y_dim1],
+		    ldy, &a[i__ + a_dim1], lda, &c_b871, &a[i__ + i__ *
+		    a_dim1], lda);
+	    i__2 = i__ - 1;
+	    i__3 = *n - i__ + 1;
+	    sgemv_("Transpose", &i__2, &i__3, &c_b1150, &a[i__ * a_dim1 + 1],
+		    lda, &x[i__ + x_dim1], ldx, &c_b871, &a[i__ + i__ *
+		    a_dim1], lda);
+
+/*           Generate reflection P(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
+		    a_dim1], lda, &taup[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    if (i__ < *m) {
+		a[i__ + i__ * a_dim1] = 1.f;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b871, &a[i__ + 1 +
+			i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &
+			c_b1101, &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__ + 1;
+		i__3 = i__ - 1;
+		sgemv_("Transpose", &i__2, &i__3, &c_b871, &y[i__ + y_dim1],
+			ldy, &a[i__ + i__ * a_dim1], lda, &c_b1101, &x[i__ *
+			x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b871, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__ + 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b871, &a[i__ * a_dim1
+			+ 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b1101, &x[
+			i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1150, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b871, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+
+/*              Update A(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[i__ + 1 +
+			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b871, &a[i__
+			+ 1 + i__ * a_dim1], &c__1);
+		i__2 = *m - i__;
+		sgemv_("No transpose", &i__2, &i__, &c_b1150, &x[i__ + 1 +
+			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b871, &
+			a[i__ + 1 + i__ * a_dim1], &c__1);
+
+/*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
+
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
+			i__ * a_dim1], &c__1, &tauq[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.f;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[i__ + 1 + (i__
+			+ 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &c_b1101, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[i__ + 1 +
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b1101, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1150, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b871, &
+			y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__;
+		sgemv_("Transpose", &i__2, &i__, &c_b871, &x[i__ + 1 + x_dim1]
+			, ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b1101, &
+			y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		sgemv_("Transpose", &i__, &i__2, &c_b1150, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b871, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *n - i__;
+		sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of SLABRD */
+
+} /* slabrd_ */
+
+/* Subroutine */ int slacpy_(char *uplo, integer *m, integer *n, real *a,
+	integer *lda, real *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLACPY copies all or part of a two-dimensional matrix A to another
+    matrix B.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be copied to B.
+            = 'U':      Upper triangular part
+            = 'L':      Lower triangular part
+            Otherwise:  All of the matrix A
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The m by n matrix A.  If UPLO = 'U', only the upper triangle
+            or trapezoid is accessed; if UPLO = 'L', only the lower
+            triangle or trapezoid is accessed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    B       (output) REAL array, dimension (LDB,N)
+            On exit, B = A in the locations specified by UPLO.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,M).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if (lsame_(uplo, "L")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+    return 0;
+
+/*     End of SLACPY */
+
+} /* slacpy_ */
+
+/* Subroutine */ int sladiv_(real *a, real *b, real *c__, real *d__, real *p,
+	real *q)
+{
+    static real e, f;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLADIV performs complex division in  real arithmetic
+
+                          a + i*b
+               p + i*q = ---------
+                          c + i*d
+
+    The algorithm is due to Robert L. Smith and can be found
+    in D. Knuth, The art of Computer Programming, Vol.2, p.195
+
+    Arguments
+    =========
+
+    A       (input) REAL
+    B       (input) REAL
+    C       (input) REAL
+    D       (input) REAL
+            The scalars a, b, c, and d in the above expression.
+
+    P       (output) REAL
+    Q       (output) REAL
+            The scalars p and q in the above expression.
+
+    =====================================================================
+*/
+
+
+    if (dabs(*d__) < dabs(*c__)) {
+	e = *d__ / *c__;
+	f = *c__ + *d__ * e;
+	*p = (*a + *b * e) / f;
+	*q = (*b - *a * e) / f;
+    } else {
+	e = *c__ / *d__;
+	f = *d__ + *c__ * e;
+	*p = (*b + *a * e) / f;
+	*q = (-(*a) + *b * e) / f;
+    }
+
+    return 0;
+
+/*     End of SLADIV */
+
+} /* sladiv_ */
+
+/* Subroutine */ int slae2_(real *a, real *b, real *c__, real *rt1, real *rt2)
+{
+    /* System generated locals */
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real ab, df, tb, sm, rt, adf, acmn, acmx;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
+       [  A   B  ]
+       [  B   C  ].
+    On return, RT1 is the eigenvalue of larger absolute value, and RT2
+    is the eigenvalue of smaller absolute value.
+
+    Arguments
+    =========
+
+    A       (input) REAL
+            The (1,1) element of the 2-by-2 matrix.
+
+    B       (input) REAL
+            The (1,2) and (2,1) elements of the 2-by-2 matrix.
+
+    C       (input) REAL
+            The (2,2) element of the 2-by-2 matrix.
+
+    RT1     (output) REAL
+            The eigenvalue of larger absolute value.
+
+    RT2     (output) REAL
+            The eigenvalue of smaller absolute value.
+
+    Further Details
+    ===============
+
+    RT1 is accurate to a few ulps barring over/underflow.
+
+    RT2 may be inaccurate if there is massive cancellation in the
+    determinant A*C-B*B; higher precision or correctly rounded or
+    correctly truncated arithmetic would be needed to compute RT2
+    accurately in all cases.
+
+    Overflow is possible only if RT1 is within a factor of 5 of overflow.
+    Underflow is harmless if the input data is 0 or exceeds
+       underflow_threshold / macheps.
+
+   =====================================================================
+
+
+       Compute the eigenvalues
+*/
+
+    sm = *a + *c__;
+    df = *a - *c__;
+    adf = dabs(df);
+    tb = *b + *b;
+    ab = dabs(tb);
+    if (dabs(*a) > dabs(*c__)) {
+	acmx = *a;
+	acmn = *c__;
+    } else {
+	acmx = *c__;
+	acmn = *a;
+    }
+    if (adf > ab) {
+/* Computing 2nd power */
+	r__1 = ab / adf;
+	rt = adf * sqrt(r__1 * r__1 + 1.f);
+    } else if (adf < ab) {
+/* Computing 2nd power */
+	r__1 = adf / ab;
+	rt = ab * sqrt(r__1 * r__1 + 1.f);
+    } else {
+
+/*        Includes case AB=ADF=0 */
+
+	rt = ab * sqrt(2.f);
+    }
+    if (sm < 0.f) {
+	*rt1 = (sm - rt) * .5f;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else if (sm > 0.f) {
+	*rt1 = (sm + rt) * .5f;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else {
+
+/*        Includes case RT1 = RT2 = 0 */
+
+	*rt1 = rt * .5f;
+	*rt2 = rt * -.5f;
+    }
+    return 0;
+
+/*     End of SLAE2 */
+
+} /* slae2_ */
+
+/* Subroutine */ int slaed0_(integer *icompq, integer *qsiz, integer *n, real
+	*d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs,
+	real *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
+    real r__1;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2;
+    static real temp;
+    static integer curr;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer iperm, indxq, iwrem;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    static integer iqptr, tlvls;
+    extern /* Subroutine */ int slaed1_(integer *, real *, real *, integer *,
+	    integer *, real *, integer *, real *, integer *, integer *),
+	    slaed7_(integer *, integer *, integer *, integer *, integer *,
+	    integer *, real *, real *, integer *, integer *, real *, integer *
+	    , real *, integer *, integer *, integer *, integer *, integer *,
+	    real *, real *, integer *, integer *);
+    static integer igivcl;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer igivnm, submat;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
+	    integer *, real *, integer *);
+    static integer curprb, subpbs, igivpt, curlvl, matsiz, iprmpt, smlsiz;
+    extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *,
+	    real *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLAED0 computes all eigenvalues and corresponding eigenvectors of a
+    symmetric tridiagonal matrix using the divide and conquer method.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+            = 2:  Compute eigenvalues and eigenvectors of tridiagonal
+                  matrix.
+
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, the main diagonal of the tridiagonal matrix.
+           On exit, its eigenvalues.
+
+    E      (input) REAL array, dimension (N-1)
+           The off-diagonal elements of the tridiagonal matrix.
+           On exit, E has been destroyed.
+
+    Q      (input/output) REAL array, dimension (LDQ, N)
+           On entry, Q must contain an N-by-N orthogonal matrix.
+           If ICOMPQ = 0    Q is not referenced.
+           If ICOMPQ = 1    On entry, Q is a subset of the columns of the
+                            orthogonal matrix used to reduce the full
+                            matrix to tridiagonal form corresponding to
+                            the subset of the full matrix which is being
+                            decomposed at this time.
+           If ICOMPQ = 2    On entry, Q will be the identity matrix.
+                            On exit, Q contains the eigenvectors of the
+                            tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  If eigenvectors are
+           desired, then  LDQ >= max(1,N).  In any case,  LDQ >= 1.
+
+    QSTORE (workspace) REAL array, dimension (LDQS, N)
+           Referenced only when ICOMPQ = 1.  Used to store parts of
+           the eigenvector matrix when the updating matrix multiplies
+           take place.
+
+    LDQS   (input) INTEGER
+           The leading dimension of the array QSTORE.  If ICOMPQ = 1,
+           then  LDQS >= max(1,N).  In any case,  LDQS >= 1.
+
+    WORK   (workspace) REAL array,
+           If ICOMPQ = 0 or 1, the dimension of WORK must be at least
+                       1 + 3*N + 2*N*lg N + 2*N**2
+                       ( lg( N ) = smallest integer k
+                                   such that 2^k >= N )
+           If ICOMPQ = 2, the dimension of WORK must be at least
+                       4*N + N**2.
+
+    IWORK  (workspace) INTEGER array,
+           If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
+                          6 + 6*N + 5*N*lg N.
+                          ( lg( N ) = smallest integer k
+                                      such that 2^k >= N )
+           If ICOMPQ = 2, the dimension of IWORK must be at least
+                          3 + 5*N.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    qstore_dim1 = *ldqs;
+    qstore_offset = 1 + qstore_dim1;
+    qstore -= qstore_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*icompq < 0 || *icompq > 2) {
+	*info = -1;
+    } else if (*icompq == 1 && *qsiz < max(0,*n)) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ldq < max(1,*n)) {
+	*info = -7;
+    } else if (*ldqs < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED0", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "SLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       Determine the size and placement of the submatrices, and save in
+       the leading elements of IWORK.
+*/
+
+    iwork[1] = *n;
+    subpbs = 1;
+    tlvls = 0;
+L10:
+    if (iwork[subpbs] > smlsiz) {
+	for (j = subpbs; j >= 1; --j) {
+	    iwork[j * 2] = (iwork[j] + 1) / 2;
+	    iwork[(j << 1) - 1] = iwork[j] / 2;
+/* L20: */
+	}
+	++tlvls;
+	subpbs <<= 1;
+	goto L10;
+    }
+    i__1 = subpbs;
+    for (j = 2; j <= i__1; ++j) {
+	iwork[j] += iwork[j - 1];
+/* L30: */
+    }
+
+/*
+       Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
+       using rank-1 modifications (cuts).
+*/
+
+    spm1 = subpbs - 1;
+    i__1 = spm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	submat = iwork[i__] + 1;
+	smm1 = submat - 1;
+	d__[smm1] -= (r__1 = e[smm1], dabs(r__1));
+	d__[submat] -= (r__1 = e[smm1], dabs(r__1));
+/* L40: */
+    }
+
+    indxq = (*n << 2) + 3;
+    if (*icompq != 2) {
+
+/*
+          Set up workspaces for eigenvalues only/accumulate new vectors
+          routine
+*/
+
+	temp = log((real) (*n)) / log(2.f);
+	lgn = (integer) temp;
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	iprmpt = indxq + *n + 1;
+	iperm = iprmpt + *n * lgn;
+	iqptr = iperm + *n * lgn;
+	igivpt = iqptr + *n + 2;
+	igivcl = igivpt + *n * lgn;
+
+	igivnm = 1;
+	iq = igivnm + (*n << 1) * lgn;
+/* Computing 2nd power */
+	i__1 = *n;
+	iwrem = iq + i__1 * i__1 + 1;
+
+/*        Initialize pointers */
+
+	i__1 = subpbs;
+	for (i__ = 0; i__ <= i__1; ++i__) {
+	    iwork[iprmpt + i__] = 1;
+	    iwork[igivpt + i__] = 1;
+/* L50: */
+	}
+	iwork[iqptr] = 1;
+    }
+
+/*
+       Solve each submatrix eigenproblem at the bottom of the divide and
+       conquer tree.
+*/
+
+    curr = 0;
+    i__1 = spm1;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+	if (i__ == 0) {
+	    submat = 1;
+	    matsiz = iwork[1];
+	} else {
+	    submat = iwork[i__] + 1;
+	    matsiz = iwork[i__ + 1] - iwork[i__];
+	}
+	if (*icompq == 2) {
+	    ssteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat +
+		    submat * q_dim1], ldq, &work[1], info);
+	    if (*info != 0) {
+		goto L130;
+	    }
+	} else {
+	    ssteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 +
+		    iwork[iqptr + curr]], &matsiz, &work[1], info);
+	    if (*info != 0) {
+		goto L130;
+	    }
+	    if (*icompq == 1) {
+		sgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b871, &q[submat *
+			q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]],
+			 &matsiz, &c_b1101, &qstore[submat * qstore_dim1 + 1],
+			 ldqs);
+	    }
+/* Computing 2nd power */
+	    i__2 = matsiz;
+	    iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
+	    ++curr;
+	}
+	k = 1;
+	i__2 = iwork[i__ + 1];
+	for (j = submat; j <= i__2; ++j) {
+	    iwork[indxq + j] = k;
+	    ++k;
+/* L60: */
+	}
+/* L70: */
+    }
+
+/*
+       Successively merge eigensystems of adjacent submatrices
+       into eigensystem for the corresponding larger matrix.
+
+       while ( SUBPBS > 1 )
+*/
+
+    curlvl = 1;
+L80:
+    if (subpbs > 1) {
+	spm2 = subpbs - 2;
+	i__1 = spm2;
+	for (i__ = 0; i__ <= i__1; i__ += 2) {
+	    if (i__ == 0) {
+		submat = 1;
+		matsiz = iwork[2];
+		msd2 = iwork[1];
+		curprb = 0;
+	    } else {
+		submat = iwork[i__] + 1;
+		matsiz = iwork[i__ + 2] - iwork[i__];
+		msd2 = matsiz / 2;
+		++curprb;
+	    }
+
+/*
+       Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
+       into an eigensystem of size MATSIZ.
+       SLAED1 is used only for the full eigensystem of a tridiagonal
+       matrix.
+       SLAED7 handles the cases in which eigenvalues only or eigenvalues
+       and eigenvectors of a full symmetric matrix (which was reduced to
+       tridiagonal form) are desired.
+*/
+
+	    if (*icompq == 2) {
+		slaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1],
+			ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], &
+			msd2, &work[1], &iwork[subpbs + 1], info);
+	    } else {
+		slaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[
+			submat], &qstore[submat * qstore_dim1 + 1], ldqs, &
+			iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &
+			work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm]
+			, &iwork[igivpt], &iwork[igivcl], &work[igivnm], &
+			work[iwrem], &iwork[subpbs + 1], info);
+	    }
+	    if (*info != 0) {
+		goto L130;
+	    }
+	    iwork[i__ / 2 + 1] = iwork[i__ + 2];
+/* L90: */
+	}
+	subpbs /= 2;
+	++curlvl;
+	goto L80;
+    }
+
+/*
+       end while
+
+       Re-merge the eigenvalues/vectors which were deflated at the final
+       merge step.
+*/
+
+    if (*icompq == 1) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    j = iwork[indxq + i__];
+	    work[i__] = d__[j];
+	    scopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1
+		    + 1], &c__1);
+/* L100: */
+	}
+	scopy_(n, &work[1], &c__1, &d__[1], &c__1);
+    } else if (*icompq == 2) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    j = iwork[indxq + i__];
+	    work[i__] = d__[j];
+	    scopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
+/* L110: */
+	}
+	scopy_(n, &work[1], &c__1, &d__[1], &c__1);
+	slacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq);
+    } else {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    j = iwork[indxq + i__];
+	    work[i__] = d__[j];
+/* L120: */
+	}
+	scopy_(n, &work[1], &c__1, &d__[1], &c__1);
+    }
+    goto L140;
+
+L130:
+    *info = submat * (*n + 1) + submat + matsiz - 1;
+
+L140:
+    return 0;
+
+/*     End of SLAED0 */
+
+} /* slaed0_ */
+
+/* Subroutine */ int slaed1_(integer *n, real *d__, real *q, integer *ldq,
+	integer *indxq, real *rho, integer *cutpnt, real *work, integer *
+	iwork, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, k, n1, n2, is, iw, iz, iq2, cpp1, indx, indxc, indxp;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), slaed2_(integer *, integer *, integer *, real *, real
+	    *, integer *, integer *, real *, real *, real *, real *, real *,
+	    integer *, integer *, integer *, integer *, integer *), slaed3_(
+	    integer *, integer *, integer *, real *, real *, integer *, real *
+	    , real *, real *, integer *, integer *, real *, real *, integer *)
+	    ;
+    static integer idlmda;
+    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
+	    integer *, integer *, real *, integer *, integer *, integer *);
+    static integer coltyp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLAED1 computes the updated eigensystem of a diagonal
+    matrix after modification by a rank-one symmetric matrix.  This
+    routine is used only for the eigenproblem which requires all
+    eigenvalues and eigenvectors of a tridiagonal matrix.  SLAED7 handles
+    the case in which eigenvalues only or eigenvalues and eigenvectors
+    of a full symmetric matrix (which was reduced to tridiagonal form)
+    are desired.
+
+      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+
+       where Z = Q'u, u is a vector of length N with ones in the
+       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+
+       The eigenvectors of the original matrix are stored in Q, and the
+       eigenvalues are in D.  The algorithm consists of three stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple eigenvalues or if there is a zero in
+          the Z vector.  For each such occurence the dimension of the
+          secular equation problem is reduced by one.  This stage is
+          performed by the routine SLAED2.
+
+          The second stage consists of calculating the updated
+          eigenvalues. This is done by finding the roots of the secular
+          equation via the routine SLAED4 (as called by SLAED3).
+          This routine also calculates the eigenvectors of the current
+          problem.
+
+          The final stage consists of computing the updated eigenvectors
+          directly using the updated eigenvalues.  The eigenvectors for
+          the current problem are multiplied with the eigenvectors from
+          the overall problem.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, the eigenvalues of the rank-1-perturbed matrix.
+           On exit, the eigenvalues of the repaired matrix.
+
+    Q      (input/output) REAL array, dimension (LDQ,N)
+           On entry, the eigenvectors of the rank-1-perturbed matrix.
+           On exit, the eigenvectors of the repaired tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (input/output) INTEGER array, dimension (N)
+           On entry, the permutation which separately sorts the two
+           subproblems in D into ascending order.
+           On exit, the permutation which will reintegrate the
+           subproblems back into sorted order,
+           i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
+
+    RHO    (input) REAL
+           The subdiagonal entry used to create the rank-1 modification.
+
+    CUTPNT (input) INTEGER
+           The location of the last eigenvalue in the leading sub-matrix.
+           min(1,N) <= CUTPNT <= N/2.
+
+    WORK   (workspace) REAL array, dimension (4*N + N**2)
+
+    IWORK  (workspace) INTEGER array, dimension (4*N)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ldq < max(1,*n)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MIN */
+	i__1 = 1, i__2 = *n / 2;
+	if (min(i__1,i__2) > *cutpnt || *n / 2 < *cutpnt) {
+	    *info = -7;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED1", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*
+       The following values are integer pointers which indicate
+       the portion of the workspace
+       used by a particular array in SLAED2 and SLAED3.
+*/
+
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq2 = iw + *n;
+
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
+
+
+/*
+       Form the z-vector which consists of the last row of Q_1 and the
+       first row of Q_2.
+*/
+
+    scopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
+    cpp1 = *cutpnt + 1;
+    i__1 = *n - *cutpnt;
+    scopy_(&i__1, &q[cpp1 + cpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);
+
+/*     Deflate eigenvalues. */
+
+    slaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[
+	    iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[
+	    indxc], &iwork[indxp], &iwork[coltyp], info);
+
+    if (*info != 0) {
+	goto L20;
+    }
+
+/*     Solve Secular Equation. */
+
+    if (k != 0) {
+	is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp +
+		1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
+	slaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda],
+		 &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[
+		is], info);
+	if (*info != 0) {
+	    goto L20;
+	}
+
+/*     Prepare the INDXQ sorting permutation. */
+
+	n1 = k;
+	n2 = *n - k;
+	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    indxq[i__] = i__;
+/* L10: */
+	}
+    }
+
+L20:
+    return 0;
+
+/*     End of SLAED1 */
+
+} /* slaed1_ */
+
+/* Subroutine */ int slaed2_(integer *k, integer *n, integer *n1, real *d__,
+	real *q, integer *ldq, integer *indxq, real *rho, real *z__, real *
+	dlamda, real *w, real *q2, integer *indx, integer *indxc, integer *
+	indxp, integer *coltyp, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+    real r__1, r__2, r__3, r__4;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real c__;
+    static integer i__, j;
+    static real s, t;
+    static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
+    static real eps, tau, tol;
+    static integer psm[4], imax, jmax, ctot[4];
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *), sscal_(integer *, real *, real *,
+	    integer *), scopy_(integer *, real *, integer *, real *, integer *
+	    );
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
+	    *, integer *, integer *), slacpy_(char *, integer *, integer *,
+	    real *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SLAED2 merges the two sets of eigenvalues together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    eigenvalues are close together or if there is a tiny entry in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    Arguments
+    =========
+
+    K      (output) INTEGER
+           The number of non-deflated eigenvalues, and the order of the
+           related secular equation. 0 <= K <=N.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    N1     (input) INTEGER
+           The location of the last eigenvalue in the leading sub-matrix.
+           min(1,N) <= N1 <= N/2.
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, D contains the eigenvalues of the two submatrices to
+           be combined.
+           On exit, D contains the trailing (N-K) updated eigenvalues
+           (those which were deflated) sorted into increasing order.
+
+    Q      (input/output) REAL array, dimension (LDQ, N)
+           On entry, Q contains the eigenvectors of two submatrices in
+           the two square blocks with corners at (1,1), (N1,N1)
+           and (N1+1, N1+1), (N,N).
+           On exit, Q contains the trailing (N-K) updated eigenvectors
+           (those which were deflated) in its last N-K columns.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (input/output) INTEGER array, dimension (N)
+           The permutation which separately sorts the two sub-problems
+           in D into ascending order.  Note that elements in the second
+           half of this permutation must first have N1 added to their
+           values. Destroyed on exit.
+
+    RHO    (input/output) REAL
+           On entry, the off-diagonal element associated with the rank-1
+           cut which originally split the two submatrices which are now
+           being recombined.
+           On exit, RHO has been modified to the value required by
+           SLAED3.
+
+    Z      (input) REAL array, dimension (N)
+           On entry, Z contains the updating vector (the last
+           row of the first sub-eigenvector matrix and the first row of
+           the second sub-eigenvector matrix).
+           On exit, the contents of Z have been destroyed by the updating
+           process.
+
+    DLAMDA (output) REAL array, dimension (N)
+           A copy of the first K eigenvalues which will be used by
+           SLAED3 to form the secular equation.
+
+    W      (output) REAL array, dimension (N)
+           The first k values of the final deflation-altered z-vector
+           which will be passed to SLAED3.
+
+    Q2     (output) REAL array, dimension (N1**2+(N-N1)**2)
+           A copy of the first K eigenvectors which will be used by
+           SLAED3 in a matrix multiply (SGEMM) to solve for the new
+           eigenvectors.
+
+    INDX   (workspace) INTEGER array, dimension (N)
+           The permutation used to sort the contents of DLAMDA into
+           ascending order.
+
+    INDXC  (output) INTEGER array, dimension (N)
+           The permutation used to arrange the columns of the deflated
+           Q matrix into three groups:  the first group contains non-zero
+           elements only at and above N1, the second contains
+           non-zero elements only below N1, and the third is dense.
+
+    INDXP  (workspace) INTEGER array, dimension (N)
+           The permutation used to place deflated values of D at the end
+           of the array.  INDXP(1:K) points to the nondeflated D-values
+           and INDXP(K+1:N) points to the deflated eigenvalues.
+
+    COLTYP (workspace/output) INTEGER array, dimension (N)
+           During execution, a label which will indicate which of the
+           following types a column in the Q2 matrix is:
+           1 : non-zero in the upper half only;
+           2 : dense;
+           3 : non-zero in the lower half only;
+           4 : deflated.
+           On exit, COLTYP(i) is the number of columns of type i,
+           for i=1 to 4 only.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --z__;
+    --dlamda;
+    --w;
+    --q2;
+    --indx;
+    --indxc;
+    --indxp;
+    --coltyp;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -2;
+    } else if (*ldq < max(1,*n)) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MIN */
+	i__1 = 1, i__2 = *n / 2;
+	if (min(i__1,i__2) > *n1 || *n / 2 < *n1) {
+	    *info = -3;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    n2 = *n - *n1;
+    n1p1 = *n1 + 1;
+
+    if (*rho < 0.f) {
+	sscal_(&n2, &c_b1150, &z__[n1p1], &c__1);
+    }
+
+/*
+       Normalize z so that norm(z) = 1.  Since z is the concatenation of
+       two normalized vectors, norm2(z) = sqrt(2).
+*/
+
+    t = 1.f / sqrt(2.f);
+    sscal_(n, &t, &z__[1], &c__1);
+
+/*     RHO = ABS( norm(z)**2 * RHO ) */
+
+    *rho = (r__1 = *rho * 2.f, dabs(r__1));
+
+/*     Sort the eigenvalues into increasing order */
+
+    i__1 = *n;
+    for (i__ = n1p1; i__ <= i__1; ++i__) {
+	indxq[i__] += *n1;
+/* L10: */
+    }
+
+/*     re-integrate the deflated parts from the last pass */
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = d__[indxq[i__]];
+/* L20: */
+    }
+    slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	indx[i__] = indxq[indxc[i__]];
+/* L30: */
+    }
+
+/*     Calculate the allowable deflation tolerance */
+
+    imax = isamax_(n, &z__[1], &c__1);
+    jmax = isamax_(n, &d__[1], &c__1);
+    eps = slamch_("Epsilon");
+/* Computing MAX */
+    r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs(
+	    r__2));
+    tol = eps * 8.f * dmax(r__3,r__4);
+
+/*
+       If the rank-1 modifier is small enough, no more needs to be done
+       except to reorganize Q so that its columns correspond with the
+       elements in D.
+*/
+
+    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
+	*k = 0;
+	iq2 = 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__ = indx[j];
+	    scopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
+	    dlamda[j] = d__[i__];
+	    iq2 += *n;
+/* L40: */
+	}
+	slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
+	scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
+	goto L190;
+    }
+
+/*
+       If there are multiple eigenvalues then the problem deflates.  Here
+       the number of equal eigenvalues are found.  As each equal
+       eigenvalue is found, an elementary reflector is computed to rotate
+       the corresponding eigensubspace so that the corresponding
+       components of Z are zero in this new basis.
+*/
+
+    i__1 = *n1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	coltyp[i__] = 1;
+/* L50: */
+    }
+    i__1 = *n;
+    for (i__ = n1p1; i__ <= i__1; ++i__) {
+	coltyp[i__] = 3;
+/* L60: */
+    }
+
+
+    *k = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	nj = indx[j];
+	if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    coltyp[nj] = 4;
+	    indxp[k2] = nj;
+	    if (j == *n) {
+		goto L100;
+	    }
+	} else {
+	    pj = nj;
+	    goto L80;
+	}
+/* L70: */
+    }
+L80:
+    ++j;
+    nj = indx[j];
+    if (j > *n) {
+	goto L100;
+    }
+    if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	coltyp[nj] = 4;
+	indxp[k2] = nj;
+    } else {
+
+/*        Check if eigenvalues are close enough to allow deflation. */
+
+	s = z__[pj];
+	c__ = z__[nj];
+
+/*
+          Find sqrt(a**2+b**2) without overflow or
+          destructive underflow.
+*/
+
+	tau = slapy2_(&c__, &s);
+	t = d__[nj] - d__[pj];
+	c__ /= tau;
+	s = -s / tau;
+	if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    z__[nj] = tau;
+	    z__[pj] = 0.f;
+	    if (coltyp[nj] != coltyp[pj]) {
+		coltyp[nj] = 2;
+	    }
+	    coltyp[pj] = 4;
+	    srot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
+		    c__, &s);
+/* Computing 2nd power */
+	    r__1 = c__;
+/* Computing 2nd power */
+	    r__2 = s;
+	    t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
+/* Computing 2nd power */
+	    r__1 = s;
+/* Computing 2nd power */
+	    r__2 = c__;
+	    d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
+	    d__[pj] = t;
+	    --k2;
+	    i__ = 1;
+L90:
+	    if (k2 + i__ <= *n) {
+		if (d__[pj] < d__[indxp[k2 + i__]]) {
+		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+		    indxp[k2 + i__] = pj;
+		    ++i__;
+		    goto L90;
+		} else {
+		    indxp[k2 + i__ - 1] = pj;
+		}
+	    } else {
+		indxp[k2 + i__ - 1] = pj;
+	    }
+	    pj = nj;
+	} else {
+	    ++(*k);
+	    dlamda[*k] = d__[pj];
+	    w[*k] = z__[pj];
+	    indxp[*k] = pj;
+	    pj = nj;
+	}
+    }
+    goto L80;
+L100:
+
+/*     Record the last eigenvalue. */
+
+    ++(*k);
+    dlamda[*k] = d__[pj];
+    w[*k] = z__[pj];
+    indxp[*k] = pj;
+
+/*
+       Count up the total number of the various types of columns, then
+       form a permutation which positions the four column types into
+       four uniform groups (although one or more of these groups may be
+       empty).
+*/
+
+    for (j = 1; j <= 4; ++j) {
+	ctot[j - 1] = 0;
+/* L110: */
+    }
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	ct = coltyp[j];
+	++ctot[ct - 1];
+/* L120: */
+    }
+
+/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
+
+    psm[0] = 1;
+    psm[1] = ctot[0] + 1;
+    psm[2] = psm[1] + ctot[1];
+    psm[3] = psm[2] + ctot[2];
+    *k = *n - ctot[3];
+
+/*
+       Fill out the INDXC array so that the permutation which it induces
+       will place all type-1 columns first, all type-2 columns next,
+       then all type-3's, and finally all type-4's.
+*/
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	js = indxp[j];
+	ct = coltyp[js];
+	indx[psm[ct - 1]] = js;
+	indxc[psm[ct - 1]] = j;
+	++psm[ct - 1];
+/* L130: */
+    }
+
+/*
+       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+       and Q2 respectively.  The eigenvalues/vectors which were not
+       deflated go into the first K slots of DLAMDA and Q2 respectively,
+       while those which were deflated go into the last N - K slots.
+*/
+
+    i__ = 1;
+    iq1 = 1;
+    iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
+    i__1 = ctot[0];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
+	z__[i__] = d__[js];
+	++i__;
+	iq1 += *n1;
+/* L140: */
+    }
+
+    i__1 = ctot[1];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
+	scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
+	z__[i__] = d__[js];
+	++i__;
+	iq1 += *n1;
+	iq2 += n2;
+/* L150: */
+    }
+
+    i__1 = ctot[2];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
+	z__[i__] = d__[js];
+	++i__;
+	iq2 += n2;
+/* L160: */
+    }
+
+    iq1 = iq2;
+    i__1 = ctot[3];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	scopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
+	iq2 += *n;
+	z__[i__] = d__[js];
+	++i__;
+/* L170: */
+    }
+
+/*
+       The deflated eigenvalues and their corresponding vectors go back
+       into the last N - K slots of D and Q respectively.
+*/
+
+    slacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
+    i__1 = *n - *k;
+    scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
+
+/*     Copy CTOT into COLTYP for referencing in SLAED3. */
+
+    for (j = 1; j <= 4; ++j) {
+	coltyp[j] = ctot[j - 1];
+/* L180: */
+    }
+
+L190:
+    return 0;
+
+/*     End of SLAED2 */
+
+} /* slaed2_ */
+
+/* Subroutine */ int slaed3_(integer *k, integer *n, integer *n1, real *d__,
+	real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer *
+	indx, integer *ctot, real *w, real *s, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static integer i__, j, n2, n12, ii, n23, iq2;
+    static real temp;
+    extern doublereal snrm2_(integer *, real *, integer *);
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *), scopy_(integer *, real *,
+	    integer *, real *, integer *), slaed4_(integer *, integer *, real
+	    *, real *, real *, real *, real *, integer *);
+    extern doublereal slamc3_(real *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), slacpy_(
+	    char *, integer *, integer *, real *, integer *, real *, integer *
+	    ), slaset_(char *, integer *, integer *, real *, real *,
+	    real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLAED3 finds the roots of the secular equation, as defined by the
+    values in D, W, and RHO, between 1 and K.  It makes the
+    appropriate calls to SLAED4 and then updates the eigenvectors by
+    multiplying the matrix of eigenvectors of the pair of eigensystems
+    being combined by the matrix of eigenvectors of the K-by-K system
+    which is solved here.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    K       (input) INTEGER
+            The number of terms in the rational function to be solved by
+            SLAED4.  K >= 0.
+
+    N       (input) INTEGER
+            The number of rows and columns in the Q matrix.
+            N >= K (deflation may result in N>K).
+
+    N1      (input) INTEGER
+            The location of the last eigenvalue in the leading submatrix.
+            min(1,N) <= N1 <= N/2.
+
+    D       (output) REAL array, dimension (N)
+            D(I) contains the updated eigenvalues for
+            1 <= I <= K.
+
+    Q       (output) REAL array, dimension (LDQ,N)
+            Initially the first K columns are used as workspace.
+            On output the columns 1 to K contain
+            the updated eigenvectors.
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    RHO     (input) REAL
+            The value of the parameter in the rank one update equation.
+            RHO >= 0 required.
+
+    DLAMDA  (input/output) REAL array, dimension (K)
+            The first K elements of this array contain the old roots
+            of the deflated updating problem.  These are the poles
+            of the secular equation. May be changed on output by
+            having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
+            Cray-2, or Cray C-90, as described above.
+
+    Q2      (input) REAL array, dimension (LDQ2, N)
+            The first K columns of this matrix contain the non-deflated
+            eigenvectors for the split problem.
+
+    INDX    (input) INTEGER array, dimension (N)
+            The permutation used to arrange the columns of the deflated
+            Q matrix into three groups (see SLAED2).
+            The rows of the eigenvectors found by SLAED4 must be likewise
+            permuted before the matrix multiply can take place.
+
+    CTOT    (input) INTEGER array, dimension (4)
+            A count of the total number of the various types of columns
+            in Q, as described in INDX.  The fourth column type is any
+            column which has been deflated.
+
+    W       (input/output) REAL array, dimension (K)
+            The first K elements of this array contain the components
+            of the deflation-adjusted updating vector. Destroyed on
+            output.
+
+    S       (workspace) REAL array, dimension (N1 + 1)*K
+            Will contain the eigenvectors of the repaired matrix which
+            will be multiplied by the previously accumulated eigenvectors
+            to update the system.
+
+    LDS     (input) INTEGER
+            The leading dimension of S.  LDS >= max(1,K).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dlamda;
+    --q2;
+    --indx;
+    --ctot;
+    --w;
+    --s;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*k < 0) {
+	*info = -1;
+    } else if (*n < *k) {
+	*info = -2;
+    } else if (*ldq < max(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED3", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 0) {
+	return 0;
+    }
+
+/*
+       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DLAMDA(I) if it is 1; this makes the subsequent
+       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DLAMDA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DLAMDA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
+/* L10: */
+    }
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
+		info);
+
+/*        If the zero finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    goto L120;
+	}
+/* L20: */
+    }
+
+    if (*k == 1) {
+	goto L110;
+    }
+    if (*k == 2) {
+	i__1 = *k;
+	for (j = 1; j <= i__1; ++j) {
+	    w[1] = q[j * q_dim1 + 1];
+	    w[2] = q[j * q_dim1 + 2];
+	    ii = indx[1];
+	    q[j * q_dim1 + 1] = w[ii];
+	    ii = indx[2];
+	    q[j * q_dim1 + 2] = w[ii];
+/* L30: */
+	}
+	goto L110;
+    }
+
+/*     Compute updated W. */
+
+    scopy_(k, &w[1], &c__1, &s[1], &c__1);
+
+/*     Initialize W(I) = Q(I,I) */
+
+    i__1 = *ldq + 1;
+    scopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L40: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L50: */
+	}
+/* L60: */
+    }
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__1 = sqrt(-w[i__]);
+	w[i__] = r_sign(&r__1, &s[i__]);
+/* L70: */
+    }
+
+/*     Compute eigenvectors of the modified rank-1 modification. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    s[i__] = w[i__] / q[i__ + j * q_dim1];
+/* L80: */
+	}
+	temp = snrm2_(k, &s[1], &c__1);
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    ii = indx[i__];
+	    q[i__ + j * q_dim1] = s[ii] / temp;
+/* L90: */
+	}
+/* L100: */
+    }
+
+/*     Compute the updated eigenvectors. */
+
+L110:
+
+    n2 = *n - *n1;
+    n12 = ctot[1] + ctot[2];
+    n23 = ctot[2] + ctot[3];
+
+    slacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23);
+    iq2 = *n1 * n12 + 1;
+    if (n23 != 0) {
+	sgemm_("N", "N", &n2, k, &n23, &c_b871, &q2[iq2], &n2, &s[1], &n23, &
+		c_b1101, &q[*n1 + 1 + q_dim1], ldq);
+    } else {
+	slaset_("A", &n2, k, &c_b1101, &c_b1101, &q[*n1 + 1 + q_dim1], ldq);
+    }
+
+    slacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12);
+    if (n12 != 0) {
+	sgemm_("N", "N", n1, k, &n12, &c_b871, &q2[1], n1, &s[1], &n12, &
+		c_b1101, &q[q_offset], ldq);
+    } else {
+	slaset_("A", n1, k, &c_b1101, &c_b1101, &q[q_dim1 + 1], ldq);
+    }
+
+
+L120:
+    return 0;
+
+/*     End of SLAED3 */
+
+} /* slaed3_ */
+
+/* Subroutine */ int slaed4_(integer *n, integer *i__, real *d__, real *z__,
+	real *delta, real *rho, real *dlam, integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real a, b, c__;
+    static integer j;
+    static real w;
+    static integer ii;
+    static real dw, zz[3];
+    static integer ip1;
+    static real del, eta, phi, eps, tau, psi;
+    static integer iim1, iip1;
+    static real dphi, dpsi;
+    static integer iter;
+    static real temp, prew, temp1, dltlb, dltub, midpt;
+    static integer niter;
+    static logical swtch;
+    extern /* Subroutine */ int slaed5_(integer *, real *, real *, real *,
+	    real *, real *), slaed6_(integer *, logical *, real *, real *,
+	    real *, real *, real *, integer *);
+    static logical swtch3;
+    extern doublereal slamch_(char *);
+    static logical orgati;
+    static real erretm, rhoinv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       December 23, 1999
+
+
+    Purpose
+    =======
+
+    This subroutine computes the I-th updated eigenvalue of a symmetric
+    rank-one modification to a diagonal matrix whose elements are
+    given in the array d, and that
+
+               D(i) < D(j)  for  i < j
+
+    and that RHO > 0.  This is arranged by the calling routine, and is
+    no loss in generality.  The rank-one modified system is thus
+
+               diag( D )  +  RHO *  Z * Z_transpose.
+
+    where we assume the Euclidean norm of Z is 1.
+
+    The method consists of approximating the rational functions in the
+    secular equation by simpler interpolating rational functions.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The length of all arrays.
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  1 <= I <= N.
+
+    D      (input) REAL array, dimension (N)
+           The original eigenvalues.  It is assumed that they are in
+           order, D(I) < D(J)  for I < J.
+
+    Z      (input) REAL array, dimension (N)
+           The components of the updating vector.
+
+    DELTA  (output) REAL array, dimension (N)
+           If N .ne. 1, DELTA contains (D(j) - lambda_I) in its  j-th
+           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
+           contains the information necessary to construct the
+           eigenvectors.
+
+    RHO    (input) REAL
+           The scalar in the symmetric updating formula.
+
+    DLAM   (output) REAL
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit
+           > 0:  if INFO = 1, the updating process failed.
+
+    Internal Parameters
+    ===================
+
+    Logical variable ORGATI (origin-at-i?) is used for distinguishing
+    whether D(i) or D(i+1) is treated as the origin.
+
+              ORGATI = .true.    origin at i
+              ORGATI = .false.   origin at i+1
+
+     Logical variable SWTCH3 (switch-for-3-poles?) is for noting
+     if we are working with THREE poles!
+
+     MAXIT is the maximum number of iterations allowed for each
+     eigenvalue.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Since this routine is called in an inner loop, we do no argument
+       checking.
+
+       Quick return for N=1 and 2.
+*/
+
+    /* Parameter adjustments */
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    if (*n == 1) {
+
+/*         Presumably, I=1 upon entry */
+
+	*dlam = d__[1] + *rho * z__[1] * z__[1];
+	delta[1] = 1.f;
+	return 0;
+    }
+    if (*n == 2) {
+	slaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
+	return 0;
+    }
+
+/*     Compute machine epsilon */
+
+    eps = slamch_("Epsilon");
+    rhoinv = 1.f / *rho;
+
+/*     The case I = N */
+
+    if (*i__ == *n) {
+
+/*        Initialize some basic variables */
+
+	ii = *n - 1;
+	niter = 1;
+
+/*        Calculate initial guess */
+
+	midpt = *rho / 2.f;
+
+/*
+          If ||Z||_2 is not one, then TEMP should be set to
+          RHO * ||Z||_2^2 / TWO
+*/
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - midpt;
+/* L10: */
+	}
+
+	psi = 0.f;
+	i__1 = *n - 2;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / delta[j];
+/* L20: */
+	}
+
+	c__ = rhoinv + psi;
+	w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*
+		n];
+
+	if (w <= 0.f) {
+	    temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho)
+		    + z__[*n] * z__[*n] / *rho;
+	    if (c__ <= temp) {
+		tau = *rho;
+	    } else {
+		del = d__[*n] - d__[*n - 1];
+		a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]
+			;
+		b = z__[*n] * z__[*n] * del;
+		if (a < 0.f) {
+		    tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
+		} else {
+		    tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
+		}
+	    }
+
+/*
+             It can be proved that
+                 D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO
+*/
+
+	    dltlb = midpt;
+	    dltub = *rho;
+	} else {
+	    del = d__[*n] - d__[*n - 1];
+	    a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
+	    b = z__[*n] * z__[*n] * del;
+	    if (a < 0.f) {
+		tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
+	    } else {
+		tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
+	    }
+
+/*
+             It can be proved that
+                 D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
+*/
+
+	    dltlb = 0.f;
+	    dltub = midpt;
+	}
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - tau;
+/* L30: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L40: */
+	}
+	erretm = dabs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / delta[*n];
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
+		dpsi + dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Test for convergence */
+
+	if (dabs(w) <= eps * erretm) {
+	    *dlam = d__[*i__] + tau;
+	    goto L250;
+	}
+
+	if (w <= 0.f) {
+	    dltlb = dmax(dltlb,tau);
+	} else {
+	    dltub = dmin(dltub,tau);
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
+	a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (
+		dpsi + dphi);
+	b = delta[*n - 1] * delta[*n] * w;
+	if (c__ < 0.f) {
+	    c__ = dabs(c__);
+	}
+	if (c__ == 0.f) {
+/*
+            ETA = B/A
+             ETA = RHO - TAU
+*/
+	    eta = dltub - tau;
+	} else if (a >= 0.f) {
+	    eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
+		    c__ * 2.f);
+	} else {
+	    eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+		    r__1))));
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta > 0.f) {
+	    eta = -w / (dpsi + dphi);
+	}
+	temp = tau + eta;
+	if (temp > dltub || temp < dltlb) {
+	    if (w < 0.f) {
+		eta = (dltub - tau) / 2.f;
+	    } else {
+		eta = (dltlb - tau) / 2.f;
+	    }
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] -= eta;
+/* L50: */
+	}
+
+	tau += eta;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L60: */
+	}
+	erretm = dabs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / delta[*n];
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
+		dpsi + dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Main loop to update the values of the array   DELTA */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 30; ++niter) {
+
+/*           Test for convergence */
+
+	    if (dabs(w) <= eps * erretm) {
+		*dlam = d__[*i__] + tau;
+		goto L250;
+	    }
+
+	    if (w <= 0.f) {
+		dltlb = dmax(dltlb,tau);
+	    } else {
+		dltub = dmin(dltub,tau);
+	    }
+
+/*           Calculate the new step */
+
+	    c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
+	    a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] *
+		    (dpsi + dphi);
+	    b = delta[*n - 1] * delta[*n] * w;
+	    if (a >= 0.f) {
+		eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
+			 (c__ * 2.f);
+	    } else {
+		eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+			r__1))));
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta > 0.f) {
+		eta = -w / (dpsi + dphi);
+	    }
+	    temp = tau + eta;
+	    if (temp > dltub || temp < dltlb) {
+		if (w < 0.f) {
+		    eta = (dltub - tau) / 2.f;
+		} else {
+		    eta = (dltlb - tau) / 2.f;
+		}
+	    }
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] -= eta;
+/* L70: */
+	    }
+
+	    tau += eta;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.f;
+	    psi = 0.f;
+	    erretm = 0.f;
+	    i__1 = ii;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / delta[j];
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L80: */
+	    }
+	    erretm = dabs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    temp = z__[*n] / delta[*n];
+	    phi = z__[*n] * temp;
+	    dphi = temp * temp;
+	    erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) *
+		    (dpsi + dphi);
+
+	    w = rhoinv + phi + psi;
+/* L90: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+	*dlam = d__[*i__] + tau;
+	goto L250;
+
+/*        End for the case I = N */
+
+    } else {
+
+/*        The case for I < N */
+
+	niter = 1;
+	ip1 = *i__ + 1;
+
+/*        Calculate initial guess */
+
+	del = d__[ip1] - d__[*i__];
+	midpt = del / 2.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - midpt;
+/* L100: */
+	}
+
+	psi = 0.f;
+	i__1 = *i__ - 1;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / delta[j];
+/* L110: */
+	}
+
+	phi = 0.f;
+	i__1 = *i__ + 2;
+	for (j = *n; j >= i__1; --j) {
+	    phi += z__[j] * z__[j] / delta[j];
+/* L120: */
+	}
+	c__ = rhoinv + psi + phi;
+	w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] /
+		delta[ip1];
+
+	if (w > 0.f) {
+
+/*
+             d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
+
+             We choose d(i) as origin.
+*/
+
+	    orgati = TRUE_;
+	    a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
+	    b = z__[*i__] * z__[*i__] * del;
+	    if (a > 0.f) {
+		tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+			r__1))));
+	    } else {
+		tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
+			 (c__ * 2.f);
+	    }
+	    dltlb = 0.f;
+	    dltub = midpt;
+	} else {
+
+/*
+             (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
+
+             We choose d(i+1) as origin.
+*/
+
+	    orgati = FALSE_;
+	    a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
+	    b = z__[ip1] * z__[ip1] * del;
+	    if (a < 0.f) {
+		tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs(
+			r__1))));
+	    } else {
+		tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1))))
+			/ (c__ * 2.f);
+	    }
+	    dltlb = -midpt;
+	    dltub = 0.f;
+	}
+
+	if (orgati) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] = d__[j] - d__[*i__] - tau;
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] = d__[j] - d__[ip1] - tau;
+/* L140: */
+	    }
+	}
+	if (orgati) {
+	    ii = *i__;
+	} else {
+	    ii = *i__ + 1;
+	}
+	iim1 = ii - 1;
+	iip1 = ii + 1;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L150: */
+	}
+	erretm = dabs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.f;
+	phi = 0.f;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / delta[j];
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L160: */
+	}
+
+	w = rhoinv + phi + psi;
+
+/*
+          W is the value of the secular function with
+          its ii-th element removed.
+*/
+
+	swtch3 = FALSE_;
+	if (orgati) {
+	    if (w < 0.f) {
+		swtch3 = TRUE_;
+	    }
+	} else {
+	    if (w > 0.f) {
+		swtch3 = TRUE_;
+	    }
+	}
+	if (ii == 1 || ii == *n) {
+	    swtch3 = FALSE_;
+	}
+
+	temp = z__[ii] / delta[ii];
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w += temp;
+	erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
+		+ dabs(tau) * dw;
+
+/*        Test for convergence */
+
+	if (dabs(w) <= eps * erretm) {
+	    if (orgati) {
+		*dlam = d__[*i__] + tau;
+	    } else {
+		*dlam = d__[ip1] + tau;
+	    }
+	    goto L250;
+	}
+
+	if (w <= 0.f) {
+	    dltlb = dmax(dltlb,tau);
+	} else {
+	    dltub = dmin(dltub,tau);
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	if (! swtch3) {
+	    if (orgati) {
+/* Computing 2nd power */
+		r__1 = z__[*i__] / delta[*i__];
+		c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (r__1 *
+			r__1);
+	    } else {
+/* Computing 2nd power */
+		r__1 = z__[ip1] / delta[ip1];
+		c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (r__1 *
+			r__1);
+	    }
+	    a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] *
+		    dw;
+	    b = delta[*i__] * delta[ip1] * w;
+	    if (c__ == 0.f) {
+		if (a == 0.f) {
+		    if (orgati) {
+			a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] *
+				(dpsi + dphi);
+		    } else {
+			a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] *
+				(dpsi + dphi);
+		    }
+		}
+		eta = b / a;
+	    } else if (a <= 0.f) {
+		eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
+			 (c__ * 2.f);
+	    } else {
+		eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+			r__1))));
+	    }
+	} else {
+
+/*           Interpolation using THREE most relevant poles */
+
+	    temp = rhoinv + psi + phi;
+	    if (orgati) {
+		temp1 = z__[iim1] / delta[iim1];
+		temp1 *= temp1;
+		c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[
+			iip1]) * temp1;
+		zz[0] = z__[iim1] * z__[iim1];
+		zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
+	    } else {
+		temp1 = z__[iip1] / delta[iip1];
+		temp1 *= temp1;
+		c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[
+			iim1]) * temp1;
+		zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
+		zz[2] = z__[iip1] * z__[iip1];
+	    }
+	    zz[1] = z__[ii] * z__[ii];
+	    slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
+	    if (*info != 0) {
+		goto L250;
+	    }
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta >= 0.f) {
+	    eta = -w / dw;
+	}
+	temp = tau + eta;
+	if (temp > dltub || temp < dltlb) {
+	    if (w < 0.f) {
+		eta = (dltub - tau) / 2.f;
+	    } else {
+		eta = (dltlb - tau) / 2.f;
+	    }
+	}
+
+	prew = w;
+
+/* L170: */
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] -= eta;
+/* L180: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L190: */
+	}
+	erretm = dabs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.f;
+	phi = 0.f;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / delta[j];
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L200: */
+	}
+
+	temp = z__[ii] / delta[ii];
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w = rhoinv + phi + psi + temp;
+	erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
+		+ (r__1 = tau + eta, dabs(r__1)) * dw;
+
+	swtch = FALSE_;
+	if (orgati) {
+	    if (-w > dabs(prew) / 10.f) {
+		swtch = TRUE_;
+	    }
+	} else {
+	    if (w > dabs(prew) / 10.f) {
+		swtch = TRUE_;
+	    }
+	}
+
+	tau += eta;
+
+/*        Main loop to update the values of the array   DELTA */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 30; ++niter) {
+
+/*           Test for convergence */
+
+	    if (dabs(w) <= eps * erretm) {
+		if (orgati) {
+		    *dlam = d__[*i__] + tau;
+		} else {
+		    *dlam = d__[ip1] + tau;
+		}
+		goto L250;
+	    }
+
+	    if (w <= 0.f) {
+		dltlb = dmax(dltlb,tau);
+	    } else {
+		dltub = dmin(dltub,tau);
+	    }
+
+/*           Calculate the new step */
+
+	    if (! swtch3) {
+		if (! swtch) {
+		    if (orgati) {
+/* Computing 2nd power */
+			r__1 = z__[*i__] / delta[*i__];
+			c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (
+				r__1 * r__1);
+		    } else {
+/* Computing 2nd power */
+			r__1 = z__[ip1] / delta[ip1];
+			c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) *
+				(r__1 * r__1);
+		    }
+		} else {
+		    temp = z__[ii] / delta[ii];
+		    if (orgati) {
+			dpsi += temp * temp;
+		    } else {
+			dphi += temp * temp;
+		    }
+		    c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi;
+		}
+		a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1]
+			* dw;
+		b = delta[*i__] * delta[ip1] * w;
+		if (c__ == 0.f) {
+		    if (a == 0.f) {
+			if (! swtch) {
+			    if (orgati) {
+				a = z__[*i__] * z__[*i__] + delta[ip1] *
+					delta[ip1] * (dpsi + dphi);
+			    } else {
+				a = z__[ip1] * z__[ip1] + delta[*i__] * delta[
+					*i__] * (dpsi + dphi);
+			    }
+			} else {
+			    a = delta[*i__] * delta[*i__] * dpsi + delta[ip1]
+				    * delta[ip1] * dphi;
+			}
+		    }
+		    eta = b / a;
+		} else if (a <= 0.f) {
+		    eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1))
+			    )) / (c__ * 2.f);
+		} else {
+		    eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__,
+			    dabs(r__1))));
+		}
+	    } else {
+
+/*              Interpolation using THREE most relevant poles */
+
+		temp = rhoinv + psi + phi;
+		if (swtch) {
+		    c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
+		    zz[0] = delta[iim1] * delta[iim1] * dpsi;
+		    zz[2] = delta[iip1] * delta[iip1] * dphi;
+		} else {
+		    if (orgati) {
+			temp1 = z__[iim1] / delta[iim1];
+			temp1 *= temp1;
+			c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1]
+				- d__[iip1]) * temp1;
+			zz[0] = z__[iim1] * z__[iim1];
+			zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 +
+				dphi);
+		    } else {
+			temp1 = z__[iip1] / delta[iip1];
+			temp1 *= temp1;
+			c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1]
+				- d__[iim1]) * temp1;
+			zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi -
+				temp1));
+			zz[2] = z__[iip1] * z__[iip1];
+		    }
+		}
+		slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta,
+			info);
+		if (*info != 0) {
+		    goto L250;
+		}
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta >= 0.f) {
+		eta = -w / dw;
+	    }
+	    temp = tau + eta;
+	    if (temp > dltub || temp < dltlb) {
+		if (w < 0.f) {
+		    eta = (dltub - tau) / 2.f;
+		} else {
+		    eta = (dltlb - tau) / 2.f;
+		}
+	    }
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] -= eta;
+/* L210: */
+	    }
+
+	    tau += eta;
+	    prew = w;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.f;
+	    psi = 0.f;
+	    erretm = 0.f;
+	    i__1 = iim1;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / delta[j];
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L220: */
+	    }
+	    erretm = dabs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    dphi = 0.f;
+	    phi = 0.f;
+	    i__1 = iip1;
+	    for (j = *n; j >= i__1; --j) {
+		temp = z__[j] / delta[j];
+		phi += z__[j] * temp;
+		dphi += temp * temp;
+		erretm += phi;
+/* L230: */
+	    }
+
+	    temp = z__[ii] / delta[ii];
+	    dw = dpsi + dphi + temp * temp;
+	    temp = z__[ii] * temp;
+	    w = rhoinv + phi + psi + temp;
+	    erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) *
+		    3.f + dabs(tau) * dw;
+	    if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) {
+		swtch = ! swtch;
+	    }
+
+/* L240: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+	if (orgati) {
+	    *dlam = d__[*i__] + tau;
+	} else {
+	    *dlam = d__[ip1] + tau;
+	}
+
+    }
+
+L250:
+
+    return 0;
+
+/*     End of SLAED4 */
+
+} /* slaed4_ */
+
+/* Subroutine */ int slaed5_(integer *i__, real *d__, real *z__, real *delta,
+	real *rho, real *dlam)
+{
+    /* System generated locals */
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real b, c__, w, del, tau, temp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    This subroutine computes the I-th eigenvalue of a symmetric rank-one
+    modification of a 2-by-2 diagonal matrix
+
+               diag( D )  +  RHO *  Z * transpose(Z) .
+
+    The diagonal elements in the array D are assumed to satisfy
+
+               D(i) < D(j)  for  i < j .
+
+    We also assume RHO > 0 and that the Euclidean norm of the vector
+    Z is one.
+
+    Arguments
+    =========
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  I = 1 or I = 2.
+
+    D      (input) REAL array, dimension (2)
+           The original eigenvalues.  We assume D(1) < D(2).
+
+    Z      (input) REAL array, dimension (2)
+           The components of the updating vector.
+
+    DELTA  (output) REAL array, dimension (2)
+           The vector DELTA contains the information necessary
+           to construct the eigenvectors.
+
+    RHO    (input) REAL
+           The scalar in the symmetric updating formula.
+
+    DLAM   (output) REAL
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    del = d__[2] - d__[1];
+    if (*i__ == 1) {
+	w = *rho * 2.f * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.f;
+	if (w > 0.f) {
+	    b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[1] * z__[1] * del;
+
+/*           B > ZERO, always */
+
+	    tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1))
+		    ));
+	    *dlam = d__[1] + tau;
+	    delta[1] = -z__[1] / tau;
+	    delta[2] = z__[2] / (del - tau);
+	} else {
+	    b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[2] * z__[2] * del;
+	    if (b > 0.f) {
+		tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f));
+	    } else {
+		tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f;
+	    }
+	    *dlam = d__[2] + tau;
+	    delta[1] = -z__[1] / (del + tau);
+	    delta[2] = -z__[2] / tau;
+	}
+	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
+	delta[1] /= temp;
+	delta[2] /= temp;
+    } else {
+
+/*     Now I=2 */
+
+	b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	c__ = *rho * z__[2] * z__[2] * del;
+	if (b > 0.f) {
+	    tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f;
+	} else {
+	    tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f));
+	}
+	*dlam = d__[2] + tau;
+	delta[1] = -z__[1] / (del + tau);
+	delta[2] = -z__[2] / tau;
+	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
+	delta[1] /= temp;
+	delta[2] /= temp;
+    }
+    return 0;
+
+/*     End OF SLAED5 */
+
+} /* slaed5_ */
+
+/* Subroutine */ int slaed6_(integer *kniter, logical *orgati, real *rho,
+	real *d__, real *z__, real *finit, real *tau, integer *info)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2, r__3, r__4;
+
+    /* Builtin functions */
+    double sqrt(doublereal), log(doublereal), pow_ri(real *, integer *);
+
+    /* Local variables */
+    static real a, b, c__, f;
+    static integer i__;
+    static real fc, df, ddf, eta, eps, base;
+    static integer iter;
+    static real temp, temp1, temp2, temp3, temp4;
+    static logical scale;
+    static integer niter;
+    static real small1, small2, sminv1, sminv2, dscale[3], sclfac;
+    extern doublereal slamch_(char *);
+    static real zscale[3], erretm, sclinv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLAED6 computes the positive or negative root (closest to the origin)
+    of
+                     z(1)        z(2)        z(3)
+    f(x) =   rho + --------- + ---------- + ---------
+                    d(1)-x      d(2)-x      d(3)-x
+
+    It is assumed that
+
+          if ORGATI = .true. the root is between d(2) and d(3);
+          otherwise it is between d(1) and d(2)
+
+    This routine will be called by SLAED4 when necessary. In most cases,
+    the root sought is the smallest in magnitude, though it might not be
+    in some extremely rare situations.
+
+    Arguments
+    =========
+
+    KNITER       (input) INTEGER
+                 Refer to SLAED4 for its significance.
+
+    ORGATI       (input) LOGICAL
+                 If ORGATI is true, the needed root is between d(2) and
+                 d(3); otherwise it is between d(1) and d(2).  See
+                 SLAED4 for further details.
+
+    RHO          (input) REAL
+                 Refer to the equation f(x) above.
+
+    D            (input) REAL array, dimension (3)
+                 D satisfies d(1) < d(2) < d(3).
+
+    Z            (input) REAL array, dimension (3)
+                 Each of the elements in z must be positive.
+
+    FINIT        (input) REAL
+                 The value of f at 0. It is more accurate than the one
+                 evaluated inside this routine (if someone wants to do
+                 so).
+
+    TAU          (output) REAL
+                 The root of the equation f(x).
+
+    INFO         (output) INTEGER
+                 = 0: successful exit
+                 > 0: if INFO = 1, failure to converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+*/
+
+    /* Parameter adjustments */
+    --z__;
+    --d__;
+
+    /* Function Body */
+
+    *info = 0;
+
+    niter = 1;
+    *tau = 0.f;
+    if (*kniter == 2) {
+	if (*orgati) {
+	    temp = (d__[3] - d__[2]) / 2.f;
+	    c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
+	    a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
+	    b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
+	} else {
+	    temp = (d__[1] - d__[2]) / 2.f;
+	    c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
+	    a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
+	    b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
+	}
+/* Computing MAX */
+	r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs(
+		c__);
+	temp = dmax(r__1,r__2);
+	a /= temp;
+	b /= temp;
+	c__ /= temp;
+	if (c__ == 0.f) {
+	    *tau = b / a;
+	} else if (a <= 0.f) {
+	    *tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
+		    c__ * 2.f);
+	} else {
+	    *tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+		    r__1))));
+	}
+	temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) +
+		z__[3] / (d__[3] - *tau);
+	if (dabs(*finit) <= dabs(temp)) {
+	    *tau = 0.f;
+	}
+    }
+
+/*
+       On first call to routine, get machine parameters for
+       possible scaling to avoid overflow
+*/
+
+    if (first) {
+	eps = slamch_("Epsilon");
+	base = slamch_("Base");
+	i__1 = (integer) (log(slamch_("SafMin")) / log(base) / 3.f)
+		;
+	small1 = pow_ri(&base, &i__1);
+	sminv1 = 1.f / small1;
+	small2 = small1 * small1;
+	sminv2 = sminv1 * sminv1;
+	first = FALSE_;
+    }
+
+/*
+       Determine if scaling of inputs necessary to avoid overflow
+       when computing 1/TEMP**3
+*/
+
+    if (*orgati) {
+/* Computing MIN */
+	r__3 = (r__1 = d__[2] - *tau, dabs(r__1)), r__4 = (r__2 = d__[3] - *
+		tau, dabs(r__2));
+	temp = dmin(r__3,r__4);
+    } else {
+/* Computing MIN */
+	r__3 = (r__1 = d__[1] - *tau, dabs(r__1)), r__4 = (r__2 = d__[2] - *
+		tau, dabs(r__2));
+	temp = dmin(r__3,r__4);
+    }
+    scale = FALSE_;
+    if (temp <= small1) {
+	scale = TRUE_;
+	if (temp <= small2) {
+
+/*        Scale up by power of radix nearest 1/SAFMIN**(2/3) */
+
+	    sclfac = sminv2;
+	    sclinv = small2;
+	} else {
+
+/*        Scale up by power of radix nearest 1/SAFMIN**(1/3) */
+
+	    sclfac = sminv1;
+	    sclinv = small1;
+	}
+
+/*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
+
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    dscale[i__ - 1] = d__[i__] * sclfac;
+	    zscale[i__ - 1] = z__[i__] * sclfac;
+/* L10: */
+	}
+	*tau *= sclfac;
+    } else {
+
+/*        Copy D and Z to DSCALE and ZSCALE */
+
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    dscale[i__ - 1] = d__[i__];
+	    zscale[i__ - 1] = z__[i__];
+/* L20: */
+	}
+    }
+
+    fc = 0.f;
+    df = 0.f;
+    ddf = 0.f;
+    for (i__ = 1; i__ <= 3; ++i__) {
+	temp = 1.f / (dscale[i__ - 1] - *tau);
+	temp1 = zscale[i__ - 1] * temp;
+	temp2 = temp1 * temp;
+	temp3 = temp2 * temp;
+	fc += temp1 / dscale[i__ - 1];
+	df += temp2;
+	ddf += temp3;
+/* L30: */
+    }
+    f = *finit + *tau * fc;
+
+    if (dabs(f) <= 0.f) {
+	goto L60;
+    }
+
+/*
+          Iteration begins
+
+       It is not hard to see that
+
+             1) Iterations will go up monotonically
+                if FINIT < 0;
+
+             2) Iterations will go down monotonically
+                if FINIT > 0.
+*/
+
+    iter = niter + 1;
+
+    for (niter = iter; niter <= 20; ++niter) {
+
+	if (*orgati) {
+	    temp1 = dscale[1] - *tau;
+	    temp2 = dscale[2] - *tau;
+	} else {
+	    temp1 = dscale[0] - *tau;
+	    temp2 = dscale[1] - *tau;
+	}
+	a = (temp1 + temp2) * f - temp1 * temp2 * df;
+	b = temp1 * temp2 * f;
+	c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
+/* Computing MAX */
+	r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs(
+		c__);
+	temp = dmax(r__1,r__2);
+	a /= temp;
+	b /= temp;
+	c__ /= temp;
+	if (c__ == 0.f) {
+	    eta = b / a;
+	} else if (a <= 0.f) {
+	    eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
+		    c__ * 2.f);
+	} else {
+	    eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+		    r__1))));
+	}
+	if (f * eta >= 0.f) {
+	    eta = -f / df;
+	}
+
+	temp = eta + *tau;
+	if (*orgati) {
+	    if (eta > 0.f && temp >= dscale[2]) {
+		eta = (dscale[2] - *tau) / 2.f;
+	    }
+	    if (eta < 0.f && temp <= dscale[1]) {
+		eta = (dscale[1] - *tau) / 2.f;
+	    }
+	} else {
+	    if (eta > 0.f && temp >= dscale[1]) {
+		eta = (dscale[1] - *tau) / 2.f;
+	    }
+	    if (eta < 0.f && temp <= dscale[0]) {
+		eta = (dscale[0] - *tau) / 2.f;
+	    }
+	}
+	*tau += eta;
+
+	fc = 0.f;
+	erretm = 0.f;
+	df = 0.f;
+	ddf = 0.f;
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    temp = 1.f / (dscale[i__ - 1] - *tau);
+	    temp1 = zscale[i__ - 1] * temp;
+	    temp2 = temp1 * temp;
+	    temp3 = temp2 * temp;
+	    temp4 = temp1 / dscale[i__ - 1];
+	    fc += temp4;
+	    erretm += dabs(temp4);
+	    df += temp2;
+	    ddf += temp3;
+/* L40: */
+	}
+	f = *finit + *tau * fc;
+	erretm = (dabs(*finit) + dabs(*tau) * erretm) * 8.f + dabs(*tau) * df;
+	if (dabs(f) <= eps * erretm) {
+	    goto L60;
+	}
+/* L50: */
+    }
+    *info = 1;
+L60:
+
+/*     Undo scaling */
+
+    if (scale) {
+	*tau *= sclinv;
+    }
+    return 0;
+
+/*     End of SLAED6 */
+
+} /* slaed6_ */
+
+/* Subroutine */ int slaed7_(integer *icompq, integer *n, integer *qsiz,
+	integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q,
+	integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *
+	qstore, integer *qptr, integer *prmptr, integer *perm, integer *
+	givptr, integer *givcol, real *givnum, real *work, integer *iwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr,
+	    indxc;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer indxp;
+    extern /* Subroutine */ int slaed8_(integer *, integer *, integer *,
+	    integer *, real *, real *, integer *, integer *, real *, integer *
+	    , real *, real *, real *, integer *, real *, integer *, integer *,
+	     integer *, real *, integer *, integer *, integer *), slaed9_(
+	    integer *, integer *, integer *, integer *, real *, real *,
+	    integer *, real *, real *, real *, real *, integer *, integer *),
+	    slaeda_(integer *, integer *, integer *, integer *, integer *,
+	    integer *, integer *, integer *, real *, real *, integer *, real *
+	    , real *, integer *);
+    static integer idlmda;
+    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
+	    integer *, integer *, real *, integer *, integer *, integer *);
+    static integer coltyp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLAED7 computes the updated eigensystem of a diagonal
+    matrix after modification by a rank-one symmetric matrix. This
+    routine is used only for the eigenproblem which requires all
+    eigenvalues and optionally eigenvectors of a dense symmetric matrix
+    that has been reduced to tridiagonal form.  SLAED1 handles
+    the case in which all eigenvalues and eigenvectors of a symmetric
+    tridiagonal matrix are desired.
+
+      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+
+       where Z = Q'u, u is a vector of length N with ones in the
+       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+
+       The eigenvectors of the original matrix are stored in Q, and the
+       eigenvalues are in D.  The algorithm consists of three stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple eigenvalues or if there is a zero in
+          the Z vector.  For each such occurence the dimension of the
+          secular equation problem is reduced by one.  This stage is
+          performed by the routine SLAED8.
+
+          The second stage consists of calculating the updated
+          eigenvalues. This is done by finding the roots of the secular
+          equation via the routine SLAED4 (as called by SLAED9).
+          This routine also calculates the eigenvectors of the current
+          problem.
+
+          The final stage consists of computing the updated eigenvectors
+          directly using the updated eigenvalues.  The eigenvectors for
+          the current problem are multiplied with the eigenvectors from
+          the overall problem.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
+
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= CURLVL <= TLVLS.
+
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, the eigenvalues of the rank-1-perturbed matrix.
+           On exit, the eigenvalues of the repaired matrix.
+
+    Q      (input/output) REAL array, dimension (LDQ, N)
+           On entry, the eigenvectors of the rank-1-perturbed matrix.
+           On exit, the eigenvectors of the repaired tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (output) INTEGER array, dimension (N)
+           The permutation which will reintegrate the subproblem just
+           solved back into sorted order, i.e., D( INDXQ( I = 1, N ) )
+           will be in ascending order.
+
+    RHO    (input) REAL
+           The subdiagonal element used to create the rank-1
+           modification.
+
+    CUTPNT (input) INTEGER
+           Contains the location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
+
+    QSTORE (input/output) REAL array, dimension (N**2+1)
+           Stores eigenvectors of submatrices encountered during
+           divide and conquer, packed together. QPTR points to
+           beginning of the submatrices.
+
+    QPTR   (input/output) INTEGER array, dimension (N+2)
+           List of indices pointing to beginning of submatrices stored
+           in QSTORE. The submatrices are numbered starting at the
+           bottom left of the divide and conquer tree, from left to
+           right and bottom to top.
+
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and also the size of
+           the full, non-deflated problem.
+
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
+
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (input) REAL array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    WORK   (workspace) REAL array, dimension (3*N+QSIZ*N)
+
+    IWORK  (workspace) INTEGER array, dimension (4*N)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --qstore;
+    --qptr;
+    --prmptr;
+    --perm;
+    --givptr;
+    givcol -= 3;
+    givnum -= 3;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*icompq < 0 || *icompq > 1) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*icompq == 1 && *qsiz < *n) {
+	*info = -4;
+    } else if (*ldq < max(1,*n)) {
+	*info = -9;
+    } else if (min(1,*n) > *cutpnt || *n < *cutpnt) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED7", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in SLAED8 and SLAED9.
+*/
+
+    if (*icompq == 1) {
+	ldq2 = *qsiz;
+    } else {
+	ldq2 = *n;
+    }
+
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq2 = iw + *n;
+    is = iq2 + *n * ldq2;
+
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
+
+/*
+       Form the z-vector which consists of the last row of Q_1 and the
+       first row of Q_2.
+*/
+
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = *tlvls - i__;
+	ptr += pow_ii(&c__2, &i__2);
+/* L10: */
+    }
+    curr = ptr + *curpbm;
+    slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
+	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz
+	    + *n], info);
+
+/*
+       When solving the final problem, we no longer need the stored data,
+       so we will overwrite the data from this level onto the previously
+       used storage space.
+*/
+
+    if (*curlvl == *tlvls) {
+	qptr[curr] = 1;
+	prmptr[curr] = 1;
+	givptr[curr] = 1;
+    }
+
+/*     Sort and Deflate eigenvalues. */
+
+    slaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho,
+	    cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], &
+	    perm[prmptr[curr]], &givptr[curr + 1], &givcol[(givptr[curr] << 1)
+	     + 1], &givnum[(givptr[curr] << 1) + 1], &iwork[indxp], &iwork[
+	    indx], info);
+    prmptr[curr + 1] = prmptr[curr] + *n;
+    givptr[curr + 1] += givptr[curr];
+
+/*     Solve Secular Equation. */
+
+    if (k != 0) {
+	slaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda],
+		&work[iw], &qstore[qptr[curr]], &k, info);
+	if (*info != 0) {
+	    goto L30;
+	}
+	if (*icompq == 1) {
+	    sgemm_("N", "N", qsiz, &k, &k, &c_b871, &work[iq2], &ldq2, &
+		    qstore[qptr[curr]], &k, &c_b1101, &q[q_offset], ldq);
+	}
+/* Computing 2nd power */
+	i__1 = k;
+	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
+
+/*     Prepare the INDXQ sorting permutation. */
+
+	n1 = k;
+	n2 = *n - k;
+	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+	qptr[curr + 1] = qptr[curr];
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    indxq[i__] = i__;
+/* L20: */
+	}
+    }
+
+L30:
+    return 0;
+
+/*     End of SLAED7 */
+
+} /* slaed7_ */
+
+/* Subroutine */ int slaed8_(integer *icompq, integer *k, integer *n, integer
+	*qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho,
+	integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2,
+	real *w, integer *perm, integer *givptr, integer *givcol, real *
+	givnum, integer *indxp, integer *indx, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real c__;
+    static integer i__, j;
+    static real s, t;
+    static integer k2, n1, n2, jp, n1p1;
+    static real eps, tau, tol;
+    static integer jlam, imax, jmax;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *), sscal_(integer *, real *, real *,
+	    integer *), scopy_(integer *, real *, integer *, real *, integer *
+	    );
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
+	    *, integer *, integer *), slacpy_(char *, integer *, integer *,
+	    real *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLAED8 merges the two sets of eigenvalues together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    eigenvalues are close together or if there is a tiny element in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+
+    K      (output) INTEGER
+           The number of non-deflated eigenvalues, and the order of the
+           related secular equation.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, the eigenvalues of the two submatrices to be
+           combined.  On exit, the trailing (N-K) updated eigenvalues
+           (those which were deflated) sorted into increasing order.
+
+    Q      (input/output) REAL array, dimension (LDQ,N)
+           If ICOMPQ = 0, Q is not referenced.  Otherwise,
+           on entry, Q contains the eigenvectors of the partially solved
+           system which has been previously updated in matrix
+           multiplies with other partially solved eigensystems.
+           On exit, Q contains the trailing (N-K) updated eigenvectors
+           (those which were deflated) in its last N-K columns.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (input) INTEGER array, dimension (N)
+           The permutation which separately sorts the two sub-problems
+           in D into ascending order.  Note that elements in the second
+           half of this permutation must first have CUTPNT added to
+           their values in order to be accurate.
+
+    RHO    (input/output) REAL
+           On entry, the off-diagonal element associated with the rank-1
+           cut which originally split the two submatrices which are now
+           being recombined.
+           On exit, RHO has been modified to the value required by
+           SLAED3.
+
+    CUTPNT (input) INTEGER
+           The location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
+
+    Z      (input) REAL array, dimension (N)
+           On entry, Z contains the updating vector (the last row of
+           the first sub-eigenvector matrix and the first row of the
+           second sub-eigenvector matrix).
+           On exit, the contents of Z are destroyed by the updating
+           process.
+
+    DLAMDA (output) REAL array, dimension (N)
+           A copy of the first K eigenvalues which will be used by
+           SLAED3 to form the secular equation.
+
+    Q2     (output) REAL array, dimension (LDQ2,N)
+           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
+           a copy of the first K eigenvectors which will be used by
+           SLAED7 in a matrix multiply (SGEMM) to update the new
+           eigenvectors.
+
+    LDQ2   (input) INTEGER
+           The leading dimension of the array Q2.  LDQ2 >= max(1,N).
+
+    W      (output) REAL array, dimension (N)
+           The first k values of the final deflation-altered z-vector and
+           will be passed to SLAED3.
+
+    PERM   (output) INTEGER array, dimension (N)
+           The permutations (from deflation and sorting) to be applied
+           to each eigenblock.
+
+    GIVPTR (output) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem.
+
+    GIVCOL (output) INTEGER array, dimension (2, N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (output) REAL array, dimension (2, N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    INDXP  (workspace) INTEGER array, dimension (N)
+           The permutation used to place deflated values of D at the end
+           of the array.  INDXP(1:K) points to the nondeflated D-values
+           and INDXP(K+1:N) points to the deflated eigenvalues.
+
+    INDX   (workspace) INTEGER array, dimension (N)
+           The permutation used to sort the contents of D into ascending
+           order.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --z__;
+    --dlamda;
+    q2_dim1 = *ldq2;
+    q2_offset = 1 + q2_dim1;
+    q2 -= q2_offset;
+    --w;
+    --perm;
+    givcol -= 3;
+    givnum -= 3;
+    --indxp;
+    --indx;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*icompq < 0 || *icompq > 1) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*icompq == 1 && *qsiz < *n) {
+	*info = -4;
+    } else if (*ldq < max(1,*n)) {
+	*info = -7;
+    } else if (*cutpnt < min(1,*n) || *cutpnt > *n) {
+	*info = -10;
+    } else if (*ldq2 < max(1,*n)) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED8", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    n1 = *cutpnt;
+    n2 = *n - n1;
+    n1p1 = n1 + 1;
+
+    if (*rho < 0.f) {
+	sscal_(&n2, &c_b1150, &z__[n1p1], &c__1);
+    }
+
+/*     Normalize z so that norm(z) = 1 */
+
+    t = 1.f / sqrt(2.f);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	indx[j] = j;
+/* L10: */
+    }
+    sscal_(n, &t, &z__[1], &c__1);
+    *rho = (r__1 = *rho * 2.f, dabs(r__1));
+
+/*     Sort the eigenvalues into increasing order */
+
+    i__1 = *n;
+    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
+	indxq[i__] += *cutpnt;
+/* L20: */
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = d__[indxq[i__]];
+	w[i__] = z__[indxq[i__]];
+/* L30: */
+    }
+    i__ = 1;
+    j = *cutpnt + 1;
+    slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__[i__] = dlamda[indx[i__]];
+	z__[i__] = w[indx[i__]];
+/* L40: */
+    }
+
+/*     Calculate the allowable deflation tolerence */
+
+    imax = isamax_(n, &z__[1], &c__1);
+    jmax = isamax_(n, &d__[1], &c__1);
+    eps = slamch_("Epsilon");
+    tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1));
+
+/*
+       If the rank-1 modifier is small enough, no more needs to be done
+       except to reorganize Q so that its columns correspond with the
+       elements in D.
+*/
+
+    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
+	*k = 0;
+	if (*icompq == 0) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		perm[j] = indxq[indx[j]];
+/* L50: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		perm[j] = indxq[indx[j]];
+		scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1
+			+ 1], &c__1);
+/* L60: */
+	    }
+	    slacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
+	}
+	return 0;
+    }
+
+/*
+       If there are multiple eigenvalues then the problem deflates.  Here
+       the number of equal eigenvalues are found.  As each equal
+       eigenvalue is found, an elementary reflector is computed to rotate
+       the corresponding eigensubspace so that the corresponding
+       components of Z are zero in this new basis.
+*/
+
+    *k = 0;
+    *givptr = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    indxp[k2] = j;
+	    if (j == *n) {
+		goto L110;
+	    }
+	} else {
+	    jlam = j;
+	    goto L80;
+	}
+/* L70: */
+    }
+L80:
+    ++j;
+    if (j > *n) {
+	goto L100;
+    }
+    if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	indxp[k2] = j;
+    } else {
+
+/*        Check if eigenvalues are close enough to allow deflation. */
+
+	s = z__[jlam];
+	c__ = z__[j];
+
+/*
+          Find sqrt(a**2+b**2) without overflow or
+          destructive underflow.
+*/
+
+	tau = slapy2_(&c__, &s);
+	t = d__[j] - d__[jlam];
+	c__ /= tau;
+	s = -s / tau;
+	if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    z__[j] = tau;
+	    z__[jlam] = 0.f;
+
+/*           Record the appropriate Givens rotation */
+
+	    ++(*givptr);
+	    givcol[(*givptr << 1) + 1] = indxq[indx[jlam]];
+	    givcol[(*givptr << 1) + 2] = indxq[indx[j]];
+	    givnum[(*givptr << 1) + 1] = c__;
+	    givnum[(*givptr << 1) + 2] = s;
+	    if (*icompq == 1) {
+		srot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[
+			indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
+	    }
+	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
+	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
+	    d__[jlam] = t;
+	    --k2;
+	    i__ = 1;
+L90:
+	    if (k2 + i__ <= *n) {
+		if (d__[jlam] < d__[indxp[k2 + i__]]) {
+		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+		    indxp[k2 + i__] = jlam;
+		    ++i__;
+		    goto L90;
+		} else {
+		    indxp[k2 + i__ - 1] = jlam;
+		}
+	    } else {
+		indxp[k2 + i__ - 1] = jlam;
+	    }
+	    jlam = j;
+	} else {
+	    ++(*k);
+	    w[*k] = z__[jlam];
+	    dlamda[*k] = d__[jlam];
+	    indxp[*k] = jlam;
+	    jlam = j;
+	}
+    }
+    goto L80;
+L100:
+
+/*     Record the last eigenvalue. */
+
+    ++(*k);
+    w[*k] = z__[jlam];
+    dlamda[*k] = d__[jlam];
+    indxp[*k] = jlam;
+
+L110:
+
+/*
+       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+       and Q2 respectively.  The eigenvalues/vectors which were not
+       deflated go into the first K slots of DLAMDA and Q2 respectively,
+       while those which were deflated go into the last N - K slots.
+*/
+
+    if (*icompq == 0) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    jp = indxp[j];
+	    dlamda[j] = d__[jp];
+	    perm[j] = indxq[indx[jp]];
+/* L120: */
+	}
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    jp = indxp[j];
+	    dlamda[j] = d__[jp];
+	    perm[j] = indxq[indx[jp]];
+	    scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
+		    , &c__1);
+/* L130: */
+	}
+    }
+
+/*
+       The deflated eigenvalues and their corresponding vectors go back
+       into the last N - K slots of D and Q respectively.
+*/
+
+    if (*k < *n) {
+	if (*icompq == 0) {
+	    i__1 = *n - *k;
+	    scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	} else {
+	    i__1 = *n - *k;
+	    scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	    i__1 = *n - *k;
+	    slacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*
+		    k + 1) * q_dim1 + 1], ldq);
+	}
+    }
+
+    return 0;
+
+/*     End of SLAED8 */
+
+} /* slaed8_ */
+
+/* Subroutine */ int slaed9_(integer *k, integer *kstart, integer *kstop,
+	integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda,
+	 real *w, real *s, integer *lds, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2;
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static integer i__, j;
+    static real temp;
+    extern doublereal snrm2_(integer *, real *, integer *);
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), slaed4_(integer *, integer *, real *, real *, real *,
+	    real *, real *, integer *);
+    extern doublereal slamc3_(real *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLAED9 finds the roots of the secular equation, as defined by the
+    values in D, Z, and RHO, between KSTART and KSTOP.  It makes the
+    appropriate calls to SLAED4 and then stores the new matrix of
+    eigenvectors for use in calculating the next level of Z vectors.
+
+    Arguments
+    =========
+
+    K       (input) INTEGER
+            The number of terms in the rational function to be solved by
+            SLAED4.  K >= 0.
+
+    KSTART  (input) INTEGER
+    KSTOP   (input) INTEGER
+            The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP
+            are to be computed.  1 <= KSTART <= KSTOP <= K.
+
+    N       (input) INTEGER
+            The number of rows and columns in the Q matrix.
+            N >= K (delation may result in N > K).
+
+    D       (output) REAL array, dimension (N)
+            D(I) contains the updated eigenvalues
+            for KSTART <= I <= KSTOP.
+
+    Q       (workspace) REAL array, dimension (LDQ,N)
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max( 1, N ).
+
+    RHO     (input) REAL
+            The value of the parameter in the rank one update equation.
+            RHO >= 0 required.
+
+    DLAMDA  (input) REAL array, dimension (K)
+            The first K elements of this array contain the old roots
+            of the deflated updating problem.  These are the poles
+            of the secular equation.
+
+    W       (input) REAL array, dimension (K)
+            The first K elements of this array contain the components
+            of the deflation-adjusted updating vector.
+
+    S       (output) REAL array, dimension (LDS, K)
+            Will contain the eigenvectors of the repaired matrix which
+            will be stored for subsequent Z vector calculation and
+            multiplied by the previously accumulated eigenvectors
+            to update the system.
+
+    LDS     (input) INTEGER
+            The leading dimension of S.  LDS >= max( 1, K ).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dlamda;
+    --w;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1;
+    s -= s_offset;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*k < 0) {
+	*info = -1;
+    } else if (*kstart < 1 || *kstart > max(1,*k)) {
+	*info = -2;
+    } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) {
+	*info = -3;
+    } else if (*n < *k) {
+	*info = -4;
+    } else if (*ldq < max(1,*k)) {
+	*info = -7;
+    } else if (*lds < max(1,*k)) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED9", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 0) {
+	return 0;
+    }
+
+/*
+       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DLAMDA(I) if it is 1; this makes the subsequent
+       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DLAMDA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DLAMDA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
+/* L10: */
+    }
+
+    i__1 = *kstop;
+    for (j = *kstart; j <= i__1; ++j) {
+	slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
+		info);
+
+/*        If the zero finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    goto L120;
+	}
+/* L20: */
+    }
+
+    if (*k == 1 || *k == 2) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = *k;
+	    for (j = 1; j <= i__2; ++j) {
+		s[j + i__ * s_dim1] = q[j + i__ * q_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+	goto L120;
+    }
+
+/*     Compute updated W. */
+
+    scopy_(k, &w[1], &c__1, &s[s_offset], &c__1);
+
+/*     Initialize W(I) = Q(I,I) */
+
+    i__1 = *ldq + 1;
+    scopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L50: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L60: */
+	}
+/* L70: */
+    }
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__1 = sqrt(-w[i__]);
+	w[i__] = r_sign(&r__1, &s[i__ + s_dim1]);
+/* L80: */
+    }
+
+/*     Compute eigenvectors of the modified rank-1 modification. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1];
+/* L90: */
+	}
+	temp = snrm2_(k, &q[j * q_dim1 + 1], &c__1);
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp;
+/* L100: */
+	}
+/* L110: */
+    }
+
+L120:
+    return 0;
+
+/*     End of SLAED9 */
+
+} /* slaed9_ */
+
+/* Subroutine */ int slaeda_(integer *n, integer *tlvls, integer *curlvl,
+	integer *curpbm, integer *prmptr, integer *perm, integer *givptr,
+	integer *givcol, real *givnum, real *q, integer *qptr, real *z__,
+	real *ztemp, integer *info)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, k, mid, ptr, curr;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *);
+    static integer bsiz1, bsiz2, psiz1, psiz2, zptr1;
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *),
+	    xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLAEDA computes the Z vector corresponding to the merge step in the
+    CURLVLth step of the merge process with TLVLS steps for the CURPBMth
+    problem.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
+
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= curlvl <= tlvls.
+
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
+
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and incidentally the
+           size of the full, non-deflated problem.
+
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
+
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (input) REAL array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    Q      (input) REAL array, dimension (N**2)
+           Contains the square eigenblocks from previous levels, the
+           starting positions for blocks are given by QPTR.
+
+    QPTR   (input) INTEGER array, dimension (N+2)
+           Contains a list of pointers which indicate where in Q an
+           eigenblock is stored.  SQRT( QPTR(i+1) - QPTR(i) ) indicates
+           the size of the block.
+
+    Z      (output) REAL array, dimension (N)
+           On output this vector contains the updating vector (the last
+           row of the first sub-eigenvector matrix and the first row of
+           the second sub-eigenvector matrix).
+
+    ZTEMP  (workspace) REAL array, dimension (N)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --ztemp;
+    --z__;
+    --qptr;
+    --q;
+    givnum -= 3;
+    givcol -= 3;
+    --givptr;
+    --perm;
+    --prmptr;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -1;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAEDA", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine location of first number in second half. */
+
+    mid = *n / 2 + 1;
+
+/*     Gather last/first rows of appropriate eigenblocks into center of Z */
+
+    ptr = 1;
+
+/*
+       Determine location of lowest level subproblem in the full storage
+       scheme
+*/
+
+    i__1 = *curlvl - 1;
+    curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
+
+/*
+       Determine size of these matrices.  We add HALF to the value of
+       the SQRT in case the machine underestimates one of these square
+       roots.
+*/
+
+    bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
+    bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f);
+    i__1 = mid - bsiz1 - 1;
+    for (k = 1; k <= i__1; ++k) {
+	z__[k] = 0.f;
+/* L10: */
+    }
+    scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], &
+	    c__1);
+    scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
+    i__1 = *n;
+    for (k = mid + bsiz2; k <= i__1; ++k) {
+	z__[k] = 0.f;
+/* L20: */
+    }
+
+/*
+       Loop thru remaining levels 1 -> CURLVL applying the Givens
+       rotations and permutation and then multiplying the center matrices
+       against the current Z.
+*/
+
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (k = 1; k <= i__1; ++k) {
+	i__2 = *curlvl - k;
+	i__3 = *curlvl - k - 1;
+	curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
+		1;
+	psiz1 = prmptr[curr + 1] - prmptr[curr];
+	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
+	zptr1 = mid - psiz1;
+
+/*       Apply Givens at CURR and CURR+1 */
+
+	i__2 = givptr[curr + 1] - 1;
+	for (i__ = givptr[curr]; i__ <= i__2; ++i__) {
+	    srot_(&c__1, &z__[zptr1 + givcol[(i__ << 1) + 1] - 1], &c__1, &
+		    z__[zptr1 + givcol[(i__ << 1) + 2] - 1], &c__1, &givnum[(
+		    i__ << 1) + 1], &givnum[(i__ << 1) + 2]);
+/* L30: */
+	}
+	i__2 = givptr[curr + 2] - 1;
+	for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) {
+	    srot_(&c__1, &z__[mid - 1 + givcol[(i__ << 1) + 1]], &c__1, &z__[
+		    mid - 1 + givcol[(i__ << 1) + 2]], &c__1, &givnum[(i__ <<
+		    1) + 1], &givnum[(i__ << 1) + 2]);
+/* L40: */
+	}
+	psiz1 = prmptr[curr + 1] - prmptr[curr];
+	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
+	i__2 = psiz1 - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
+/* L50: */
+	}
+	i__2 = psiz2 - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] -
+		    1];
+/* L60: */
+	}
+
+/*
+          Multiply Blocks at CURR and CURR+1
+
+          Determine size of these matrices.  We add HALF to the value of
+          the SQRT in case the machine underestimates one of these
+          square roots.
+*/
+
+	bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
+	bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) +
+		.5f);
+	if (bsiz1 > 0) {
+	    sgemv_("T", &bsiz1, &bsiz1, &c_b871, &q[qptr[curr]], &bsiz1, &
+		    ztemp[1], &c__1, &c_b1101, &z__[zptr1], &c__1);
+	}
+	i__2 = psiz1 - bsiz1;
+	scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
+	if (bsiz2 > 0) {
+	    sgemv_("T", &bsiz2, &bsiz2, &c_b871, &q[qptr[curr + 1]], &bsiz2, &
+		    ztemp[psiz1 + 1], &c__1, &c_b1101, &z__[mid], &c__1);
+	}
+	i__2 = psiz2 - bsiz2;
+	scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], &
+		c__1);
+
+	i__2 = *tlvls - k;
+	ptr += pow_ii(&c__2, &i__2);
+/* L70: */
+    }
+
+    return 0;
+
+/*     End of SLAEDA */
+
+} /* slaeda_ */
+
+/* Subroutine */ int slaev2_(real *a, real *b, real *c__, real *rt1, real *
+	rt2, real *cs1, real *sn1)
+{
+    /* System generated locals */
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real ab, df, cs, ct, tb, sm, tn, rt, adf, acs;
+    static integer sgn1, sgn2;
+    static real acmn, acmx;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
+       [  A   B  ]
+       [  B   C  ].
+    On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
+    eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
+    eigenvector for RT1, giving the decomposition
+
+       [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
+       [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
+
+    Arguments
+    =========
+
+    A       (input) REAL
+            The (1,1) element of the 2-by-2 matrix.
+
+    B       (input) REAL
+            The (1,2) element and the conjugate of the (2,1) element of
+            the 2-by-2 matrix.
+
+    C       (input) REAL
+            The (2,2) element of the 2-by-2 matrix.
+
+    RT1     (output) REAL
+            The eigenvalue of larger absolute value.
+
+    RT2     (output) REAL
+            The eigenvalue of smaller absolute value.
+
+    CS1     (output) REAL
+    SN1     (output) REAL
+            The vector (CS1, SN1) is a unit right eigenvector for RT1.
+
+    Further Details
+    ===============
+
+    RT1 is accurate to a few ulps barring over/underflow.
+
+    RT2 may be inaccurate if there is massive cancellation in the
+    determinant A*C-B*B; higher precision or correctly rounded or
+    correctly truncated arithmetic would be needed to compute RT2
+    accurately in all cases.
+
+    CS1 and SN1 are accurate to a few ulps barring over/underflow.
+
+    Overflow is possible only if RT1 is within a factor of 5 of overflow.
+    Underflow is harmless if the input data is 0 or exceeds
+       underflow_threshold / macheps.
+
+   =====================================================================
+
+
+       Compute the eigenvalues
+*/
+
+    sm = *a + *c__;
+    df = *a - *c__;
+    adf = dabs(df);
+    tb = *b + *b;
+    ab = dabs(tb);
+    if (dabs(*a) > dabs(*c__)) {
+	acmx = *a;
+	acmn = *c__;
+    } else {
+	acmx = *c__;
+	acmn = *a;
+    }
+    if (adf > ab) {
+/* Computing 2nd power */
+	r__1 = ab / adf;
+	rt = adf * sqrt(r__1 * r__1 + 1.f);
+    } else if (adf < ab) {
+/* Computing 2nd power */
+	r__1 = adf / ab;
+	rt = ab * sqrt(r__1 * r__1 + 1.f);
+    } else {
+
+/*        Includes case AB=ADF=0 */
+
+	rt = ab * sqrt(2.f);
+    }
+    if (sm < 0.f) {
+	*rt1 = (sm - rt) * .5f;
+	sgn1 = -1;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else if (sm > 0.f) {
+	*rt1 = (sm + rt) * .5f;
+	sgn1 = 1;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else {
+
+/*        Includes case RT1 = RT2 = 0 */
+
+	*rt1 = rt * .5f;
+	*rt2 = rt * -.5f;
+	sgn1 = 1;
+    }
+
+/*     Compute the eigenvector */
+
+    if (df >= 0.f) {
+	cs = df + rt;
+	sgn2 = 1;
+    } else {
+	cs = df - rt;
+	sgn2 = -1;
+    }
+    acs = dabs(cs);
+    if (acs > ab) {
+	ct = -tb / cs;
+	*sn1 = 1.f / sqrt(ct * ct + 1.f);
+	*cs1 = ct * *sn1;
+    } else {
+	if (ab == 0.f) {
+	    *cs1 = 1.f;
+	    *sn1 = 0.f;
+	} else {
+	    tn = -cs / tb;
+	    *cs1 = 1.f / sqrt(tn * tn + 1.f);
+	    *sn1 = tn * *cs1;
+	}
+    }
+    if (sgn1 == sgn2) {
+	tn = *cs1;
+	*cs1 = -(*sn1);
+	*sn1 = tn;
+    }
+    return 0;
+
+/*     End of SLAEV2 */
+
+} /* slaev2_ */
+
+/* Subroutine */ int slahqr_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real *
+	wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *
+	info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static integer i__, j, k, l, m;
+    static real s, v[3];
+    static integer i1, i2;
+    static real t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22, h33,
+	    h44;
+    static integer nh;
+    static real cs;
+    static integer nr;
+    static real sn;
+    static integer nz;
+    static real ave, h33s, h44s;
+    static integer itn, its;
+    static real ulp, sum, tst1, h43h34, disc, unfl, ovfl, work[1];
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *), scopy_(integer *, real *, integer *,
+	    real *, integer *), slanv2_(real *, real *, real *, real *, real *
+	    , real *, real *, real *, real *, real *), slabad_(real *, real *)
+	    ;
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
+	    real *);
+    extern doublereal slanhs_(char *, integer *, real *, integer *, real *);
+    static real smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLAHQR is an auxiliary routine called by SHSEQR to update the
+    eigenvalues and Schur decomposition already computed by SHSEQR, by
+    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+
+    Arguments
+    =========
+
+    WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
+
+    WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
+
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper quasi-triangular in
+            rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless
+            ILO = 1). SLAHQR works primarily with the Hessenberg
+            submatrix in rows and columns ILO to IHI, but applies
+            transformations to all of H if WANTT is .TRUE..
+            1 <= ILO <= max(1,IHI); IHI <= N.
+
+    H       (input/output) REAL array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if WANTT is .TRUE., H is upper quasi-triangular in
+            rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in
+            standard form. If WANTT is .FALSE., the contents of H are
+            unspecified on exit.
+
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
+
+    WR      (output) REAL array, dimension (N)
+    WI      (output) REAL array, dimension (N)
+            The real and imaginary parts, respectively, of the computed
+            eigenvalues ILO to IHI are stored in the corresponding
+            elements of WR and WI. If two eigenvalues are computed as a
+            complex conjugate pair, they are stored in consecutive
+            elements of WR and WI, say the i-th and (i+1)th, with
+            WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
+            eigenvalues are stored in the same order as on the diagonal
+            of the Schur form returned in H, with WR(i) = H(i,i), and, if
+            H(i:i+1,i:i+1) is a 2-by-2 diagonal block,
+            WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i).
+
+    ILOZ    (input) INTEGER
+    IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE..
+            1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
+
+    Z       (input/output) REAL array, dimension (LDZ,N)
+            If WANTZ is .TRUE., on entry Z must contain the current
+            matrix Z of transformations accumulated by SHSEQR, and on
+            exit Z has been updated; transformations are applied only to
+            the submatrix Z(ILOZ:IHIZ,ILO:IHI).
+            If WANTZ is .FALSE., Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z. LDZ >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            > 0: SLAHQR failed to compute all the eigenvalues ILO to IHI
+                 in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
+                 elements i+1:ihi of WR and WI contain those eigenvalues
+                 which have been successfully computed.
+
+    Further Details
+    ===============
+
+    2-96 Based on modifications by
+       David Day, Sandia National Laboratory, USA
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
+	wi[*ilo] = 0.f;
+	return 0;
+    }
+
+    nh = *ihi - *ilo + 1;
+    nz = *ihiz - *iloz + 1;
+
+/*
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
+
+    unfl = slamch_("Safe minimum");
+    ovfl = 1.f / unfl;
+    slabad_(&unfl, &ovfl);
+    ulp = slamch_("Precision");
+    smlnum = unfl * (nh / ulp);
+
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are set inside the main loop.
+*/
+
+    if (*wantt) {
+	i1 = 1;
+	i2 = *n;
+    }
+
+/*     ITN is the total number of QR iterations allowed. */
+
+    itn = nh * 30;
+
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of 1 or 2. Each iteration of the loop works
+       with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
+
+    i__ = *ihi;
+L10:
+    l = *ilo;
+    if (i__ < *ilo) {
+	goto L150;
+    }
+
+/*
+       Perform QR iterations on rows and columns ILO to I until a
+       submatrix of order 1 or 2 splits off at the bottom because a
+       subdiagonal element has become negligible.
+*/
+
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2
+		    = h__[k + k * h_dim1], dabs(r__2));
+	    if (tst1 == 0.f) {
+		i__3 = i__ - l + 1;
+		tst1 = slanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work);
+	    }
+/* Computing MAX */
+	    r__2 = ulp * tst1;
+	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2,
+		    smlnum)) {
+		goto L30;
+	    }
+/* L20: */
+	}
+L30:
+	l = k;
+	if (l > *ilo) {
+
+/*           H(L,L-1) is negligible */
+
+	    h__[l + (l - 1) * h_dim1] = 0.f;
+	}
+
+/*        Exit from loop if a submatrix of order 1 or 2 has split off. */
+
+	if (l >= i__ - 1) {
+	    goto L140;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! (*wantt)) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if (its == 10 || its == 20) {
+
+/*           Exceptional shift. */
+
+	    s = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)) + (r__2 =
+		    h__[i__ - 1 + (i__ - 2) * h_dim1], dabs(r__2));
+	    h44 = s * .75f + h__[i__ + i__ * h_dim1];
+	    h33 = h44;
+	    h43h34 = s * -.4375f * s;
+	} else {
+
+/*
+             Prepare to use Francis' double shift
+             (i.e. 2nd degree generalized Rayleigh quotient)
+*/
+
+	    h44 = h__[i__ + i__ * h_dim1];
+	    h33 = h__[i__ - 1 + (i__ - 1) * h_dim1];
+	    h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ *
+		    h_dim1];
+	    s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) *
+		    h_dim1];
+	    disc = (h33 - h44) * .5f;
+	    disc = disc * disc + h43h34;
+	    if (disc > 0.f) {
+
+/*              Real roots: use Wilkinson's shift twice */
+
+		disc = sqrt(disc);
+		ave = (h33 + h44) * .5f;
+		if (dabs(h33) - dabs(h44) > 0.f) {
+		    h33 = h33 * h44 - h43h34;
+		    h44 = h33 / (r_sign(&disc, &ave) + ave);
+		} else {
+		    h44 = r_sign(&disc, &ave) + ave;
+		}
+		h33 = h44;
+		h43h34 = 0.f;
+	    }
+	}
+
+/*        Look for two consecutive small subdiagonal elements. */
+
+	i__2 = l;
+	for (m = i__ - 2; m >= i__2; --m) {
+/*
+             Determine the effect of starting the double-shift QR
+             iteration at row M, and see if this would make H(M,M-1)
+             negligible.
+*/
+
+	    h11 = h__[m + m * h_dim1];
+	    h22 = h__[m + 1 + (m + 1) * h_dim1];
+	    h21 = h__[m + 1 + m * h_dim1];
+	    h12 = h__[m + (m + 1) * h_dim1];
+	    h44s = h44 - h11;
+	    h33s = h33 - h11;
+	    v1 = (h33s * h44s - h43h34) / h21 + h12;
+	    v2 = h22 - h11 - h33s - h44s;
+	    v3 = h__[m + 2 + (m + 1) * h_dim1];
+	    s = dabs(v1) + dabs(v2) + dabs(v3);
+	    v1 /= s;
+	    v2 /= s;
+	    v3 /= s;
+	    v[0] = v1;
+	    v[1] = v2;
+	    v[2] = v3;
+	    if (m == l) {
+		goto L50;
+	    }
+	    h00 = h__[m - 1 + (m - 1) * h_dim1];
+	    h10 = h__[m + (m - 1) * h_dim1];
+	    tst1 = dabs(v1) * (dabs(h00) + dabs(h11) + dabs(h22));
+	    if (dabs(h10) * (dabs(v2) + dabs(v3)) <= ulp * tst1) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+
+/*        Double-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = m; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix. NR is the order of G.
+
+   Computing MIN
+*/
+	    i__3 = 3, i__4 = i__ - k + 1;
+	    nr = min(i__3,i__4);
+	    if (k > m) {
+		scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    slarfg_(&nr, v, &v[1], &c__1, &t1);
+	    if (k > m) {
+		h__[k + (k - 1) * h_dim1] = v[0];
+		h__[k + 1 + (k - 1) * h_dim1] = 0.f;
+		if (k < i__ - 1) {
+		    h__[k + 2 + (k - 1) * h_dim1] = 0.f;
+		}
+	    } else if (m > l) {
+		h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1];
+	    }
+	    v2 = v[1];
+	    t2 = t1 * v2;
+	    if (nr == 3) {
+		v3 = v[2];
+		t3 = t1 * v3;
+
+/*
+                Apply G from the left to transform the rows of the matrix
+                in columns K to I2.
+*/
+
+		i__3 = i2;
+		for (j = k; j <= i__3; ++j) {
+		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]
+			    + v3 * h__[k + 2 + j * h_dim1];
+		    h__[k + j * h_dim1] -= sum * t1;
+		    h__[k + 1 + j * h_dim1] -= sum * t2;
+		    h__[k + 2 + j * h_dim1] -= sum * t3;
+/* L60: */
+		}
+
+/*
+                Apply G from the right to transform the columns of the
+                matrix in rows I1 to min(K+3,I).
+
+   Computing MIN
+*/
+		i__4 = k + 3;
+		i__3 = min(i__4,i__);
+		for (j = i1; j <= i__3; ++j) {
+		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
+			     + v3 * h__[j + (k + 2) * h_dim1];
+		    h__[j + k * h_dim1] -= sum * t1;
+		    h__[j + (k + 1) * h_dim1] -= sum * t2;
+		    h__[j + (k + 2) * h_dim1] -= sum * t3;
+/* L70: */
+		}
+
+		if (*wantz) {
+
+/*                 Accumulate transformations in the matrix Z */
+
+		    i__3 = *ihiz;
+		    for (j = *iloz; j <= i__3; ++j) {
+			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
+				z_dim1] + v3 * z__[j + (k + 2) * z_dim1];
+			z__[j + k * z_dim1] -= sum * t1;
+			z__[j + (k + 1) * z_dim1] -= sum * t2;
+			z__[j + (k + 2) * z_dim1] -= sum * t3;
+/* L80: */
+		    }
+		}
+	    } else if (nr == 2) {
+
+/*
+                Apply G from the left to transform the rows of the matrix
+                in columns K to I2.
+*/
+
+		i__3 = i2;
+		for (j = k; j <= i__3; ++j) {
+		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1];
+		    h__[k + j * h_dim1] -= sum * t1;
+		    h__[k + 1 + j * h_dim1] -= sum * t2;
+/* L90: */
+		}
+
+/*
+                Apply G from the right to transform the columns of the
+                matrix in rows I1 to min(K+3,I).
+*/
+
+		i__3 = i__;
+		for (j = i1; j <= i__3; ++j) {
+		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
+			    ;
+		    h__[j + k * h_dim1] -= sum * t1;
+		    h__[j + (k + 1) * h_dim1] -= sum * t2;
+/* L100: */
+		}
+
+		if (*wantz) {
+
+/*                 Accumulate transformations in the matrix Z */
+
+		    i__3 = *ihiz;
+		    for (j = *iloz; j <= i__3; ++j) {
+			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
+				z_dim1];
+			z__[j + k * z_dim1] -= sum * t1;
+			z__[j + (k + 1) * z_dim1] -= sum * t2;
+/* L110: */
+		    }
+		}
+	    }
+/* L120: */
+	}
+
+/* L130: */
+    }
+
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
+    return 0;
+
+L140:
+
+    if (l == i__) {
+
+/*        H(I,I-1) is negligible: one eigenvalue has converged. */
+
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.f;
+    } else if (l == i__ - 1) {
+
+/*
+          H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
+
+          Transform the 2-by-2 submatrix to standard Schur form,
+          and compute and store the eigenvalues.
+*/
+
+	slanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ *
+		h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ *
+		h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs,
+		&sn);
+
+	if (*wantt) {
+
+/*           Apply the transformation to the rest of H. */
+
+	    if (i2 > i__) {
+		i__1 = i2 - i__;
+		srot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[
+			i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn);
+	    }
+	    i__1 = i__ - i1 - 1;
+	    srot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ *
+		     h_dim1], &c__1, &cs, &sn);
+	}
+	if (*wantz) {
+
+/*           Apply the transformation to Z. */
+
+	    srot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz +
+		    i__ * z_dim1], &c__1, &cs, &sn);
+	}
+    }
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L10;
+
+L150:
+    return 0;
+
+/*     End of SLAHQR */
+
+} /* slahqr_ */
+
+/* Subroutine */ int slahrd_(integer *n, integer *k, integer *nb, real *a,
+	integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+    real r__1;
+
+    /* Local variables */
+    static integer i__;
+    static real ei;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *), scopy_(
+	    integer *, real *, integer *, real *, integer *), saxpy_(integer *
+	    , real *, real *, integer *, real *, integer *), strmv_(char *,
+	    char *, char *, integer *, real *, integer *, real *, integer *), slarfg_(integer *, real *, real *,
+	    integer *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLAHRD reduces the first NB columns of a real general n-by-(n-k+1)
+    matrix A so that elements below the k-th subdiagonal are zero. The
+    reduction is performed by an orthogonal similarity transformation
+    Q' * A * Q. The routine returns the matrices V and T which determine
+    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+
+    This is an auxiliary routine called by SGEHRD.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.
+
+    K       (input) INTEGER
+            The offset for the reduction. Elements below the k-th
+            subdiagonal in the first NB columns are reduced to zero.
+
+    NB      (input) INTEGER
+            The number of columns to be reduced.
+
+    A       (input/output) REAL array, dimension (LDA,N-K+1)
+            On entry, the n-by-(n-k+1) general matrix A.
+            On exit, the elements on and above the k-th subdiagonal in
+            the first NB columns are overwritten with the corresponding
+            elements of the reduced matrix; the elements below the k-th
+            subdiagonal, with the array TAU, represent the matrix Q as a
+            product of elementary reflectors. The other columns of A are
+            unchanged. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) REAL array, dimension (NB)
+            The scalar factors of the elementary reflectors. See Further
+            Details.
+
+    T       (output) REAL array, dimension (LDT,NB)
+            The upper triangular matrix T.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T.  LDT >= NB.
+
+    Y       (output) REAL array, dimension (LDY,NB)
+            The n-by-nb matrix Y.
+
+    LDY     (input) INTEGER
+            The leading dimension of the array Y. LDY >= N.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of nb elementary reflectors
+
+       Q = H(1) H(2) . . . H(nb).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
+    A(i+k+1:n,i), and tau in TAU(i).
+
+    The elements of the vectors v together form the (n-k+1)-by-nb matrix
+    V which is needed, with T and Y, to apply the transformation to the
+    unreduced part of the matrix, using an update of the form:
+    A := (I - V*T*V') * (A - Y*V').
+
+    The contents of A on exit are illustrated by the following example
+    with n = 7, k = 3 and nb = 2:
+
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( h   h   a   a   a )
+       ( v1  h   a   a   a )
+       ( v1  v2  a   a   a )
+       ( v1  v2  a   a   a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    --tau;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+
+    /* Function Body */
+    if (*n <= 1) {
+	return 0;
+    }
+
+    i__1 = *nb;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (i__ > 1) {
+
+/*
+             Update A(1:n,i)
+
+             Compute i-th column of A - Y * V'
+*/
+
+	    i__2 = i__ - 1;
+	    sgemv_("No transpose", n, &i__2, &c_b1150, &y[y_offset], ldy, &a[*
+		    k + i__ - 1 + a_dim1], lda, &c_b871, &a[i__ * a_dim1 + 1],
+		     &c__1);
+
+/*
+             Apply I - V * T' * V' to this column (call it b) from the
+             left, using the last column of T as workspace
+
+             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
+                      ( V2 )             ( b2 )
+
+             where V1 is unit lower triangular
+
+             w := V1' * b1
+*/
+
+	    i__2 = i__ - 1;
+	    scopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
+		    1], &c__1);
+	    i__2 = i__ - 1;
+	    strmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1],
+		    lda, &t[*nb * t_dim1 + 1], &c__1);
+
+/*           w := w + V2'*b2 */
+
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[*k + i__ + a_dim1],
+		    lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b871, &t[*nb *
+		     t_dim1 + 1], &c__1);
+
+/*           w := T'*w */
+
+	    i__2 = i__ - 1;
+	    strmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		     &t[*nb * t_dim1 + 1], &c__1);
+
+/*           b2 := b2 - V2*w */
+
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[*k + i__ +
+		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b871, &a[*k
+		    + i__ + i__ * a_dim1], &c__1);
+
+/*           b1 := b1 - V1*w */
+
+	    i__2 = i__ - 1;
+	    strmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
+		    , lda, &t[*nb * t_dim1 + 1], &c__1);
+	    i__2 = i__ - 1;
+	    saxpy_(&i__2, &c_b1150, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 +
+		    i__ * a_dim1], &c__1);
+
+	    a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei;
+	}
+
+/*
+          Generate the elementary reflector H(i) to annihilate
+          A(k+i+1:n,i)
+*/
+
+	i__2 = *n - *k - i__ + 1;
+/* Computing MIN */
+	i__3 = *k + i__ + 1;
+	slarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		a_dim1], &c__1, &tau[i__]);
+	ei = a[*k + i__ + i__ * a_dim1];
+	a[*k + i__ + i__ * a_dim1] = 1.f;
+
+/*        Compute  Y(1:n,i) */
+
+	i__2 = *n - *k - i__ + 1;
+	sgemv_("No transpose", n, &i__2, &c_b871, &a[(i__ + 1) * a_dim1 + 1],
+		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b1101, &y[i__ *
+		y_dim1 + 1], &c__1);
+	i__2 = *n - *k - i__ + 1;
+	i__3 = i__ - 1;
+	sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[*k + i__ + a_dim1], lda,
+		 &a[*k + i__ + i__ * a_dim1], &c__1, &c_b1101, &t[i__ *
+		t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	sgemv_("No transpose", n, &i__2, &c_b1150, &y[y_offset], ldy, &t[i__ *
+		 t_dim1 + 1], &c__1, &c_b871, &y[i__ * y_dim1 + 1], &c__1);
+	sscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+
+/*        Compute T(1:i,i) */
+
+	i__2 = i__ - 1;
+	r__1 = -tau[i__];
+	sscal_(&i__2, &r__1, &t[i__ * t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		&t[i__ * t_dim1 + 1], &c__1)
+		;
+	t[i__ + i__ * t_dim1] = tau[i__];
+
+/* L10: */
+    }
+    a[*k + *nb + *nb * a_dim1] = ei;
+
+    return 0;
+
+/*     End of SLAHRD */
+
+} /* slahrd_ */
+
+/* Subroutine */ int slaln2_(logical *ltrans, integer *na, integer *nw, real *
+	smin, real *ca, real *a, integer *lda, real *d1, real *d2, real *b,
+	integer *ldb, real *wr, real *wi, real *x, integer *ldx, real *scale,
+	real *xnorm, integer *info)
+{
+    /* Initialized data */
+
+    static logical cswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
+    static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
+    static integer ipivot[16]	/* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2,
+	    4,3,2,1 };
+
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset;
+    real r__1, r__2, r__3, r__4, r__5, r__6;
+    static real equiv_0[4], equiv_1[4];
+
+    /* Local variables */
+    static integer j;
+#define ci (equiv_0)
+#define cr (equiv_1)
+    static real bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, cr21,
+	    cr22, li21, csi, ui11, lr21, ui12, ui22;
+#define civ (equiv_0)
+    static real csr, ur11, ur12, ur22;
+#define crv (equiv_1)
+    static real bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs;
+    static integer icmax;
+    static real bnorm, cnorm, smini;
+    extern doublereal slamch_(char *);
+    static real bignum;
+    extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real *
+	    , real *);
+    static real smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLALN2 solves a system of the form  (ca A - w D ) X = s B
+    or (ca A' - w D) X = s B   with possible scaling ("s") and
+    perturbation of A.  (A' means A-transpose.)
+
+    A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
+    real diagonal matrix, w is a real or complex value, and X and B are
+    NA x 1 matrices -- real if w is real, complex if w is complex.  NA
+    may be 1 or 2.
+
+    If w is complex, X and B are represented as NA x 2 matrices,
+    the first column of each being the real part and the second
+    being the imaginary part.
+
+    "s" is a scaling factor (.LE. 1), computed by SLALN2, which is
+    so chosen that X can be computed without overflow.  X is further
+    scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
+    than overflow.
+
+    If both singular values of (ca A - w D) are less than SMIN,
+    SMIN*identity will be used instead of (ca A - w D).  If only one
+    singular value is less than SMIN, one element of (ca A - w D) will be
+    perturbed enough to make the smallest singular value roughly SMIN.
+    If both singular values are at least SMIN, (ca A - w D) will not be
+    perturbed.  In any case, the perturbation will be at most some small
+    multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
+    are computed by infinity-norm approximations, and thus will only be
+    correct to a factor of 2 or so.
+
+    Note: all input quantities are assumed to be smaller than overflow
+    by a reasonable factor.  (See BIGNUM.)
+
+    Arguments
+    ==========
+
+    LTRANS  (input) LOGICAL
+            =.TRUE.:  A-transpose will be used.
+            =.FALSE.: A will be used (not transposed.)
+
+    NA      (input) INTEGER
+            The size of the matrix A.  It may (only) be 1 or 2.
+
+    NW      (input) INTEGER
+            1 if "w" is real, 2 if "w" is complex.  It may only be 1
+            or 2.
+
+    SMIN    (input) REAL
+            The desired lower bound on the singular values of A.  This
+            should be a safe distance away from underflow or overflow,
+            say, between (underflow/machine precision) and  (machine
+            precision * overflow ).  (See BIGNUM and ULP.)
+
+    CA      (input) REAL
+            The coefficient c, which A is multiplied by.
+
+    A       (input) REAL array, dimension (LDA,NA)
+            The NA x NA matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of A.  It must be at least NA.
+
+    D1      (input) REAL
+            The 1,1 element in the diagonal matrix D.
+
+    D2      (input) REAL
+            The 2,2 element in the diagonal matrix D.  Not used if NW=1.
+
+    B       (input) REAL array, dimension (LDB,NW)
+            The NA x NW matrix B (right-hand side).  If NW=2 ("w" is
+            complex), column 1 contains the real part of B and column 2
+            contains the imaginary part.
+
+    LDB     (input) INTEGER
+            The leading dimension of B.  It must be at least NA.
+
+    WR      (input) REAL
+            The real part of the scalar "w".
+
+    WI      (input) REAL
+            The imaginary part of the scalar "w".  Not used if NW=1.
+
+    X       (output) REAL array, dimension (LDX,NW)
+            The NA x NW matrix X (unknowns), as computed by SLALN2.
+            If NW=2 ("w" is complex), on exit, column 1 will contain
+            the real part of X and column 2 will contain the imaginary
+            part.
+
+    LDX     (input) INTEGER
+            The leading dimension of X.  It must be at least NA.
+
+    SCALE   (output) REAL
+            The scale factor that B must be multiplied by to insure
+            that overflow does not occur when computing X.  Thus,
+            (ca A - w D) X  will be SCALE*B, not B (ignoring
+            perturbations of A.)  It will be at most 1.
+
+    XNORM   (output) REAL
+            The infinity-norm of X, when X is regarded as an NA x NW
+            real matrix.
+
+    INFO    (output) INTEGER
+            An error flag.  It will be set to zero if no error occurs,
+            a negative number if an argument is in error, or a positive
+            number if  ca A - w D  had to be perturbed.
+            The possible values are:
+            = 0: No error occurred, and (ca A - w D) did not have to be
+                   perturbed.
+            = 1: (ca A - w D) had to be perturbed to make its smallest
+                 (or only) singular value greater than SMIN.
+            NOTE: In the interests of speed, this routine does not
+                  check the inputs for errors.
+
+   =====================================================================
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+
+    /* Function Body */
+
+/*     Compute BIGNUM */
+
+    smlnum = 2.f * slamch_("Safe minimum");
+    bignum = 1.f / smlnum;
+    smini = dmax(*smin,smlnum);
+
+/*     Don't check for input errors */
+
+    *info = 0;
+
+/*     Standard Initializations */
+
+    *scale = 1.f;
+
+    if (*na == 1) {
+
+/*        1 x 1  (i.e., scalar) system   C X = B */
+
+	if (*nw == 1) {
+
+/*
+             Real 1x1 system.
+
+             C = ca A - w D
+*/
+
+	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
+	    cnorm = dabs(csr);
+
+/*           If | C | < SMINI, use C = SMINI */
+
+	    if (cnorm < smini) {
+		csr = smini;
+		cnorm = smini;
+		*info = 1;
+	    }
+
+/*           Check scaling for  X = B / C */
+
+	    bnorm = (r__1 = b[b_dim1 + 1], dabs(r__1));
+	    if (cnorm < 1.f && bnorm > 1.f) {
+		if (bnorm > bignum * cnorm) {
+		    *scale = 1.f / bnorm;
+		}
+	    }
+
+/*           Compute X */
+
+	    x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr;
+	    *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1));
+	} else {
+
+/*
+             Complex 1x1 system (w is complex)
+
+             C = ca A - w D
+*/
+
+	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
+	    csi = -(*wi) * *d1;
+	    cnorm = dabs(csr) + dabs(csi);
+
+/*           If | C | < SMINI, use C = SMINI */
+
+	    if (cnorm < smini) {
+		csr = smini;
+		csi = 0.f;
+		cnorm = smini;
+		*info = 1;
+	    }
+
+/*           Check scaling for  X = B / C */
+
+	    bnorm = (r__1 = b[b_dim1 + 1], dabs(r__1)) + (r__2 = b[(b_dim1 <<
+		    1) + 1], dabs(r__2));
+	    if (cnorm < 1.f && bnorm > 1.f) {
+		if (bnorm > bignum * cnorm) {
+		    *scale = 1.f / bnorm;
+		}
+	    }
+
+/*           Compute X */
+
+	    r__1 = *scale * b[b_dim1 + 1];
+	    r__2 = *scale * b[(b_dim1 << 1) + 1];
+	    sladiv_(&r__1, &r__2, &csr, &csi, &x[x_dim1 + 1], &x[(x_dim1 << 1)
+		     + 1]);
+	    *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1)) + (r__2 = x[(x_dim1 <<
+		     1) + 1], dabs(r__2));
+	}
+
+    } else {
+
+/*
+          2x2 System
+
+          Compute the real part of  C = ca A - w D  (or  ca A' - w D )
+*/
+
+	cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1;
+	cr[3] = *ca * a[(a_dim1 << 1) + 2] - *wr * *d2;
+	if (*ltrans) {
+	    cr[2] = *ca * a[a_dim1 + 2];
+	    cr[1] = *ca * a[(a_dim1 << 1) + 1];
+	} else {
+	    cr[1] = *ca * a[a_dim1 + 2];
+	    cr[2] = *ca * a[(a_dim1 << 1) + 1];
+	}
+
+	if (*nw == 1) {
+
+/*
+             Real 2x2 system  (w is real)
+
+             Find the largest element in C
+*/
+
+	    cmax = 0.f;
+	    icmax = 0;
+
+	    for (j = 1; j <= 4; ++j) {
+		if ((r__1 = crv[j - 1], dabs(r__1)) > cmax) {
+		    cmax = (r__1 = crv[j - 1], dabs(r__1));
+		    icmax = j;
+		}
+/* L10: */
+	    }
+
+/*           If norm(C) < SMINI, use SMINI*identity. */
+
+	    if (cmax < smini) {
+/* Computing MAX */
+		r__3 = (r__1 = b[b_dim1 + 1], dabs(r__1)), r__4 = (r__2 = b[
+			b_dim1 + 2], dabs(r__2));
+		bnorm = dmax(r__3,r__4);
+		if (smini < 1.f && bnorm > 1.f) {
+		    if (bnorm > bignum * smini) {
+			*scale = 1.f / bnorm;
+		    }
+		}
+		temp = *scale / smini;
+		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
+		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
+		*xnorm = temp * bnorm;
+		*info = 1;
+		return 0;
+	    }
+
+/*           Gaussian elimination with complete pivoting. */
+
+	    ur11 = crv[icmax - 1];
+	    cr21 = crv[ipivot[(icmax << 2) - 3] - 1];
+	    ur12 = crv[ipivot[(icmax << 2) - 2] - 1];
+	    cr22 = crv[ipivot[(icmax << 2) - 1] - 1];
+	    ur11r = 1.f / ur11;
+	    lr21 = ur11r * cr21;
+	    ur22 = cr22 - ur12 * lr21;
+
+/*           If smaller pivot < SMINI, use SMINI */
+
+	    if (dabs(ur22) < smini) {
+		ur22 = smini;
+		*info = 1;
+	    }
+	    if (rswap[icmax - 1]) {
+		br1 = b[b_dim1 + 2];
+		br2 = b[b_dim1 + 1];
+	    } else {
+		br1 = b[b_dim1 + 1];
+		br2 = b[b_dim1 + 2];
+	    }
+	    br2 -= lr21 * br1;
+/* Computing MAX */
+	    r__2 = (r__1 = br1 * (ur22 * ur11r), dabs(r__1)), r__3 = dabs(br2)
+		    ;
+	    bbnd = dmax(r__2,r__3);
+	    if (bbnd > 1.f && dabs(ur22) < 1.f) {
+		if (bbnd >= bignum * dabs(ur22)) {
+		    *scale = 1.f / bbnd;
+		}
+	    }
+
+	    xr2 = br2 * *scale / ur22;
+	    xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12);
+	    if (cswap[icmax - 1]) {
+		x[x_dim1 + 1] = xr2;
+		x[x_dim1 + 2] = xr1;
+	    } else {
+		x[x_dim1 + 1] = xr1;
+		x[x_dim1 + 2] = xr2;
+	    }
+/* Computing MAX */
+	    r__1 = dabs(xr1), r__2 = dabs(xr2);
+	    *xnorm = dmax(r__1,r__2);
+
+/*           Further scaling if  norm(A) norm(X) > overflow */
+
+	    if (*xnorm > 1.f && cmax > 1.f) {
+		if (*xnorm > bignum / cmax) {
+		    temp = cmax / bignum;
+		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
+		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
+		    *xnorm = temp * *xnorm;
+		    *scale = temp * *scale;
+		}
+	    }
+	} else {
+
+/*
+             Complex 2x2 system  (w is complex)
+
+             Find the largest element in C
+*/
+
+	    ci[0] = -(*wi) * *d1;
+	    ci[1] = 0.f;
+	    ci[2] = 0.f;
+	    ci[3] = -(*wi) * *d2;
+	    cmax = 0.f;
+	    icmax = 0;
+
+	    for (j = 1; j <= 4; ++j) {
+		if ((r__1 = crv[j - 1], dabs(r__1)) + (r__2 = civ[j - 1],
+			dabs(r__2)) > cmax) {
+		    cmax = (r__1 = crv[j - 1], dabs(r__1)) + (r__2 = civ[j -
+			    1], dabs(r__2));
+		    icmax = j;
+		}
+/* L20: */
+	    }
+
+/*           If norm(C) < SMINI, use SMINI*identity. */
+
+	    if (cmax < smini) {
+/* Computing MAX */
+		r__5 = (r__1 = b[b_dim1 + 1], dabs(r__1)) + (r__2 = b[(b_dim1
+			<< 1) + 1], dabs(r__2)), r__6 = (r__3 = b[b_dim1 + 2],
+			 dabs(r__3)) + (r__4 = b[(b_dim1 << 1) + 2], dabs(
+			r__4));
+		bnorm = dmax(r__5,r__6);
+		if (smini < 1.f && bnorm > 1.f) {
+		    if (bnorm > bignum * smini) {
+			*scale = 1.f / bnorm;
+		    }
+		}
+		temp = *scale / smini;
+		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
+		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
+		x[(x_dim1 << 1) + 1] = temp * b[(b_dim1 << 1) + 1];
+		x[(x_dim1 << 1) + 2] = temp * b[(b_dim1 << 1) + 2];
+		*xnorm = temp * bnorm;
+		*info = 1;
+		return 0;
+	    }
+
+/*           Gaussian elimination with complete pivoting. */
+
+	    ur11 = crv[icmax - 1];
+	    ui11 = civ[icmax - 1];
+	    cr21 = crv[ipivot[(icmax << 2) - 3] - 1];
+	    ci21 = civ[ipivot[(icmax << 2) - 3] - 1];
+	    ur12 = crv[ipivot[(icmax << 2) - 2] - 1];
+	    ui12 = civ[ipivot[(icmax << 2) - 2] - 1];
+	    cr22 = crv[ipivot[(icmax << 2) - 1] - 1];
+	    ci22 = civ[ipivot[(icmax << 2) - 1] - 1];
+	    if (icmax == 1 || icmax == 4) {
+
+/*              Code when off-diagonals of pivoted C are real */
+
+		if (dabs(ur11) > dabs(ui11)) {
+		    temp = ui11 / ur11;
+/* Computing 2nd power */
+		    r__1 = temp;
+		    ur11r = 1.f / (ur11 * (r__1 * r__1 + 1.f));
+		    ui11r = -temp * ur11r;
+		} else {
+		    temp = ur11 / ui11;
+/* Computing 2nd power */
+		    r__1 = temp;
+		    ui11r = -1.f / (ui11 * (r__1 * r__1 + 1.f));
+		    ur11r = -temp * ui11r;
+		}
+		lr21 = cr21 * ur11r;
+		li21 = cr21 * ui11r;
+		ur12s = ur12 * ur11r;
+		ui12s = ur12 * ui11r;
+		ur22 = cr22 - ur12 * lr21;
+		ui22 = ci22 - ur12 * li21;
+	    } else {
+
+/*              Code when diagonals of pivoted C are real */
+
+		ur11r = 1.f / ur11;
+		ui11r = 0.f;
+		lr21 = cr21 * ur11r;
+		li21 = ci21 * ur11r;
+		ur12s = ur12 * ur11r;
+		ui12s = ui12 * ur11r;
+		ur22 = cr22 - ur12 * lr21 + ui12 * li21;
+		ui22 = -ur12 * li21 - ui12 * lr21;
+	    }
+	    u22abs = dabs(ur22) + dabs(ui22);
+
+/*           If smaller pivot < SMINI, use SMINI */
+
+	    if (u22abs < smini) {
+		ur22 = smini;
+		ui22 = 0.f;
+		*info = 1;
+	    }
+	    if (rswap[icmax - 1]) {
+		br2 = b[b_dim1 + 1];
+		br1 = b[b_dim1 + 2];
+		bi2 = b[(b_dim1 << 1) + 1];
+		bi1 = b[(b_dim1 << 1) + 2];
+	    } else {
+		br1 = b[b_dim1 + 1];
+		br2 = b[b_dim1 + 2];
+		bi1 = b[(b_dim1 << 1) + 1];
+		bi2 = b[(b_dim1 << 1) + 2];
+	    }
+	    br2 = br2 - lr21 * br1 + li21 * bi1;
+	    bi2 = bi2 - li21 * br1 - lr21 * bi1;
+/* Computing MAX */
+	    r__1 = (dabs(br1) + dabs(bi1)) * (u22abs * (dabs(ur11r) + dabs(
+		    ui11r))), r__2 = dabs(br2) + dabs(bi2);
+	    bbnd = dmax(r__1,r__2);
+	    if (bbnd > 1.f && u22abs < 1.f) {
+		if (bbnd >= bignum * u22abs) {
+		    *scale = 1.f / bbnd;
+		    br1 = *scale * br1;
+		    bi1 = *scale * bi1;
+		    br2 = *scale * br2;
+		    bi2 = *scale * bi2;
+		}
+	    }
+
+	    sladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2);
+	    xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2;
+	    xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2;
+	    if (cswap[icmax - 1]) {
+		x[x_dim1 + 1] = xr2;
+		x[x_dim1 + 2] = xr1;
+		x[(x_dim1 << 1) + 1] = xi2;
+		x[(x_dim1 << 1) + 2] = xi1;
+	    } else {
+		x[x_dim1 + 1] = xr1;
+		x[x_dim1 + 2] = xr2;
+		x[(x_dim1 << 1) + 1] = xi1;
+		x[(x_dim1 << 1) + 2] = xi2;
+	    }
+/* Computing MAX */
+	    r__1 = dabs(xr1) + dabs(xi1), r__2 = dabs(xr2) + dabs(xi2);
+	    *xnorm = dmax(r__1,r__2);
+
+/*           Further scaling if  norm(A) norm(X) > overflow */
+
+	    if (*xnorm > 1.f && cmax > 1.f) {
+		if (*xnorm > bignum / cmax) {
+		    temp = cmax / bignum;
+		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
+		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
+		    x[(x_dim1 << 1) + 1] = temp * x[(x_dim1 << 1) + 1];
+		    x[(x_dim1 << 1) + 2] = temp * x[(x_dim1 << 1) + 2];
+		    *xnorm = temp * *xnorm;
+		    *scale = temp * *scale;
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SLALN2 */
+
+} /* slaln2_ */
+
+#undef crv
+#undef civ
+#undef cr
+#undef ci
+
+
+doublereal slamch_(char *cmach)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+
+    /* System generated locals */
+    integer i__1;
+    real ret_val;
+
+    /* Builtin functions */
+    double pow_ri(real *, integer *);
+
+    /* Local variables */
+    static real t;
+    static integer it;
+    static real rnd, eps, base;
+    static integer beta;
+    static real emin, prec, emax;
+    static integer imin, imax;
+    static logical lrnd;
+    static real rmin, rmax, rmach;
+    extern logical lsame_(char *, char *);
+    static real small, sfmin;
+    extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real
+	    *, integer *, real *, integer *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAMCH determines single precision machine parameters.
+
+    Arguments
+    =========
+
+    CMACH   (input) CHARACTER*1
+            Specifies the value to be returned by SLAMCH:
+            = 'E' or 'e',   SLAMCH := eps
+            = 'S' or 's ,   SLAMCH := sfmin
+            = 'B' or 'b',   SLAMCH := base
+            = 'P' or 'p',   SLAMCH := eps*base
+            = 'N' or 'n',   SLAMCH := t
+            = 'R' or 'r',   SLAMCH := rnd
+            = 'M' or 'm',   SLAMCH := emin
+            = 'U' or 'u',   SLAMCH := rmin
+            = 'L' or 'l',   SLAMCH := emax
+            = 'O' or 'o',   SLAMCH := rmax
+
+            where
+
+            eps   = relative machine precision
+            sfmin = safe minimum, such that 1/sfmin does not overflow
+            base  = base of the machine
+            prec  = eps*base
+            t     = number of (base) digits in the mantissa
+            rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise
+            emin  = minimum exponent before (gradual) underflow
+            rmin  = underflow threshold - base**(emin-1)
+            emax  = largest exponent before overflow
+            rmax  = overflow threshold  - (base**emax)*(1-eps)
+
+   =====================================================================
+*/
+
+
+    if (first) {
+	first = FALSE_;
+	slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
+	base = (real) beta;
+	t = (real) it;
+	if (lrnd) {
+	    rnd = 1.f;
+	    i__1 = 1 - it;
+	    eps = pow_ri(&base, &i__1) / 2;
+	} else {
+	    rnd = 0.f;
+	    i__1 = 1 - it;
+	    eps = pow_ri(&base, &i__1);
+	}
+	prec = eps * base;
+	emin = (real) imin;
+	emax = (real) imax;
+	sfmin = rmin;
+	small = 1.f / rmax;
+	if (small >= sfmin) {
+
+/*
+             Use SMALL plus a bit, to avoid the possibility of rounding
+             causing overflow when computing  1/sfmin.
+*/
+
+	    sfmin = small * (eps + 1.f);
+	}
+    }
+
+    if (lsame_(cmach, "E")) {
+	rmach = eps;
+    } else if (lsame_(cmach, "S")) {
+	rmach = sfmin;
+    } else if (lsame_(cmach, "B")) {
+	rmach = base;
+    } else if (lsame_(cmach, "P")) {
+	rmach = prec;
+    } else if (lsame_(cmach, "N")) {
+	rmach = t;
+    } else if (lsame_(cmach, "R")) {
+	rmach = rnd;
+    } else if (lsame_(cmach, "M")) {
+	rmach = emin;
+    } else if (lsame_(cmach, "U")) {
+	rmach = rmin;
+    } else if (lsame_(cmach, "L")) {
+	rmach = emax;
+    } else if (lsame_(cmach, "O")) {
+	rmach = rmax;
+    }
+
+    ret_val = rmach;
+    return ret_val;
+
+/*     End of SLAMCH */
+
+} /* slamch_ */
+
+
+/* *********************************************************************** */
+
+/* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical
+	*ieee1)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+
+    /* System generated locals */
+    real r__1, r__2;
+
+    /* Local variables */
+    static real a, b, c__, f, t1, t2;
+    static integer lt;
+    static real one, qtr;
+    static logical lrnd;
+    static integer lbeta;
+    static real savec;
+    static logical lieee1;
+    extern doublereal slamc3_(real *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAMC1 determines the machine parameters given by BETA, T, RND, and
+    IEEE1.
+
+    Arguments
+    =========
+
+    BETA    (output) INTEGER
+            The base of the machine.
+
+    T       (output) INTEGER
+            The number of ( BETA ) digits in the mantissa.
+
+    RND     (output) LOGICAL
+            Specifies whether proper rounding  ( RND = .TRUE. )  or
+            chopping  ( RND = .FALSE. )  occurs in addition. This may not
+            be a reliable guide to the way in which the machine performs
+            its arithmetic.
+
+    IEEE1   (output) LOGICAL
+            Specifies whether rounding appears to be done in the IEEE
+            'round to nearest' style.
+
+    Further Details
+    ===============
+
+    The routine is based on the routine  ENVRON  by Malcolm and
+    incorporates suggestions by Gentleman and Marovich. See
+
+       Malcolm M. A. (1972) Algorithms to reveal properties of
+          floating-point arithmetic. Comms. of the ACM, 15, 949-951.
+
+       Gentleman W. M. and Marovich S. B. (1974) More on algorithms
+          that reveal properties of floating point arithmetic units.
+          Comms. of the ACM, 17, 276-277.
+
+   =====================================================================
+*/
+
+
+    if (first) {
+	first = FALSE_;
+	one = 1.f;
+
+/*
+          LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BETA,
+          IEEE1, T and RND.
+
+          Throughout this routine  we use the function  SLAMC3  to ensure
+          that relevant values are  stored and not held in registers,  or
+          are not affected by optimizers.
+
+          Compute  a = 2.0**m  with the  smallest positive integer m such
+          that
+
+             fl( a + 1.0 ) = a.
+*/
+
+	a = 1.f;
+	c__ = 1.f;
+
+/* +       WHILE( C.EQ.ONE )LOOP */
+L10:
+	if (c__ == one) {
+	    a *= 2;
+	    c__ = slamc3_(&a, &one);
+	    r__1 = -a;
+	    c__ = slamc3_(&c__, &r__1);
+	    goto L10;
+	}
+/*
+   +       END WHILE
+
+          Now compute  b = 2.0**m  with the smallest positive integer m
+          such that
+
+             fl( a + b ) .gt. a.
+*/
+
+	b = 1.f;
+	c__ = slamc3_(&a, &b);
+
+/* +       WHILE( C.EQ.A )LOOP */
+L20:
+	if (c__ == a) {
+	    b *= 2;
+	    c__ = slamc3_(&a, &b);
+	    goto L20;
+	}
+/*
+   +       END WHILE
+
+          Now compute the base.  a and c  are neighbouring floating point
+          numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and so
+          their difference is beta. Adding 0.25 to c is to ensure that it
+          is truncated to beta and not ( beta - 1 ).
+*/
+
+	qtr = one / 4;
+	savec = c__;
+	r__1 = -a;
+	c__ = slamc3_(&c__, &r__1);
+	lbeta = c__ + qtr;
+
+/*
+          Now determine whether rounding or chopping occurs,  by adding a
+          bit  less  than  beta/2  and a  bit  more  than  beta/2  to  a.
+*/
+
+	b = (real) lbeta;
+	r__1 = b / 2;
+	r__2 = -b / 100;
+	f = slamc3_(&r__1, &r__2);
+	c__ = slamc3_(&f, &a);
+	if (c__ == a) {
+	    lrnd = TRUE_;
+	} else {
+	    lrnd = FALSE_;
+	}
+	r__1 = b / 2;
+	r__2 = b / 100;
+	f = slamc3_(&r__1, &r__2);
+	c__ = slamc3_(&f, &a);
+	if (lrnd && c__ == a) {
+	    lrnd = FALSE_;
+	}
+
+/*
+          Try and decide whether rounding is done in the  IEEE  'round to
+          nearest' style. B/2 is half a unit in the last place of the two
+          numbers A and SAVEC. Furthermore, A is even, i.e. has last  bit
+          zero, and SAVEC is odd. Thus adding B/2 to A should not  change
+          A, but adding B/2 to SAVEC should change SAVEC.
+*/
+
+	r__1 = b / 2;
+	t1 = slamc3_(&r__1, &a);
+	r__1 = b / 2;
+	t2 = slamc3_(&r__1, &savec);
+	lieee1 = t1 == a && t2 > savec && lrnd;
+
+/*
+          Now find  the  mantissa, t.  It should  be the  integer part of
+          log to the base beta of a,  however it is safer to determine  t
+          by powering.  So we find t as the smallest positive integer for
+          which
+
+             fl( beta**t + 1.0 ) = 1.0.
+*/
+
+	lt = 0;
+	a = 1.f;
+	c__ = 1.f;
+
+/* +       WHILE( C.EQ.ONE )LOOP */
+L30:
+	if (c__ == one) {
+	    ++lt;
+	    a *= lbeta;
+	    c__ = slamc3_(&a, &one);
+	    r__1 = -a;
+	    c__ = slamc3_(&c__, &r__1);
+	    goto L30;
+	}
+/* +       END WHILE */
+
+    }
+
+    *beta = lbeta;
+    *t = lt;
+    *rnd = lrnd;
+    *ieee1 = lieee1;
+    return 0;
+
+/*     End of SLAMC1 */
+
+} /* slamc1_ */
+
+
+/* *********************************************************************** */
+
+/* Subroutine */ int slamc2_(integer *beta, integer *t, logical *rnd, real *
+	eps, integer *emin, real *rmin, integer *emax, real *rmax)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+    static logical iwarn = FALSE_;
+
+    /* Format strings */
+    static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre"
+	    "ct:-\002,\002  EMIN = \002,i8,/\002 If, after inspection, the va"
+	    "lue EMIN looks\002,\002 acceptable please comment out \002,/\002"
+	    " the IF block as marked within the code of routine\002,\002 SLAM"
+	    "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)";
+
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2, r__3, r__4, r__5;
+
+    /* Builtin functions */
+    double pow_ri(real *, integer *);
+    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);
+
+    /* Local variables */
+    static real a, b, c__;
+    static integer i__, lt;
+    static real one, two;
+    static logical ieee;
+    static real half;
+    static logical lrnd;
+    static real leps, zero;
+    static integer lbeta;
+    static real rbase;
+    static integer lemin, lemax, gnmin;
+    static real small;
+    static integer gpmin;
+    static real third, lrmin, lrmax, sixth;
+    static logical lieee1;
+    extern /* Subroutine */ int slamc1_(integer *, integer *, logical *,
+	    logical *);
+    extern doublereal slamc3_(real *, real *);
+    extern /* Subroutine */ int slamc4_(integer *, real *, integer *),
+	    slamc5_(integer *, integer *, integer *, logical *, integer *,
+	    real *);
+    static integer ngnmin, ngpmin;
+
+    /* Fortran I/O blocks */
+    static cilist io___2878 = { 0, 6, 0, fmt_9999, 0 };
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAMC2 determines the machine parameters specified in its argument
+    list.
+
+    Arguments
+    =========
+
+    BETA    (output) INTEGER
+            The base of the machine.
+
+    T       (output) INTEGER
+            The number of ( BETA ) digits in the mantissa.
+
+    RND     (output) LOGICAL
+            Specifies whether proper rounding  ( RND = .TRUE. )  or
+            chopping  ( RND = .FALSE. )  occurs in addition. This may not
+            be a reliable guide to the way in which the machine performs
+            its arithmetic.
+
+    EPS     (output) REAL
+            The smallest positive number such that
+
+               fl( 1.0 - EPS ) .LT. 1.0,
+
+            where fl denotes the computed value.
+
+    EMIN    (output) INTEGER
+            The minimum exponent before (gradual) underflow occurs.
+
+    RMIN    (output) REAL
+            The smallest normalized number for the machine, given by
+            BASE**( EMIN - 1 ), where  BASE  is the floating point value
+            of BETA.
+
+    EMAX    (output) INTEGER
+            The maximum exponent before overflow occurs.
+
+    RMAX    (output) REAL
+            The largest positive number for the machine, given by
+            BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point
+            value of BETA.
+
+    Further Details
+    ===============
+
+    The computation of  EPS  is based on a routine PARANOIA by
+    W. Kahan of the University of California at Berkeley.
+
+   =====================================================================
+*/
+
+
+    if (first) {
+	first = FALSE_;
+	zero = 0.f;
+	one = 1.f;
+	two = 2.f;
+
+/*
+          LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values of
+          BETA, T, RND, EPS, EMIN and RMIN.
+
+          Throughout this routine  we use the function  SLAMC3  to ensure
+          that relevant values are stored  and not held in registers,  or
+          are not affected by optimizers.
+
+          SLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1.
+*/
+
+	slamc1_(&lbeta, &lt, &lrnd, &lieee1);
+
+/*        Start to find EPS. */
+
+	b = (real) lbeta;
+	i__1 = -lt;
+	a = pow_ri(&b, &i__1);
+	leps = a;
+
+/*        Try some tricks to see whether or not this is the correct  EPS. */
+
+	b = two / 3;
+	half = one / 2;
+	r__1 = -half;
+	sixth = slamc3_(&b, &r__1);
+	third = slamc3_(&sixth, &sixth);
+	r__1 = -half;
+	b = slamc3_(&third, &r__1);
+	b = slamc3_(&b, &sixth);
+	b = dabs(b);
+	if (b < leps) {
+	    b = leps;
+	}
+
+	leps = 1.f;
+
+/* +       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
+L10:
+	if (leps > b && b > zero) {
+	    leps = b;
+	    r__1 = half * leps;
+/* Computing 5th power */
+	    r__3 = two, r__4 = r__3, r__3 *= r__3;
+/* Computing 2nd power */
+	    r__5 = leps;
+	    r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5);
+	    c__ = slamc3_(&r__1, &r__2);
+	    r__1 = -c__;
+	    c__ = slamc3_(&half, &r__1);
+	    b = slamc3_(&half, &c__);
+	    r__1 = -b;
+	    c__ = slamc3_(&half, &r__1);
+	    b = slamc3_(&half, &c__);
+	    goto L10;
+	}
+/* +       END WHILE */
+
+	if (a < leps) {
+	    leps = a;
+	}
+
+/*
+          Computation of EPS complete.
+
+          Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3)).
+          Keep dividing  A by BETA until (gradual) underflow occurs. This
+          is detected when we cannot recover the previous A.
+*/
+
+	rbase = one / lbeta;
+	small = one;
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    r__1 = small * rbase;
+	    small = slamc3_(&r__1, &zero);
+/* L20: */
+	}
+	a = slamc3_(&one, &small);
+	slamc4_(&ngpmin, &one, &lbeta);
+	r__1 = -one;
+	slamc4_(&ngnmin, &r__1, &lbeta);
+	slamc4_(&gpmin, &a, &lbeta);
+	r__1 = -a;
+	slamc4_(&gnmin, &r__1, &lbeta);
+	ieee = FALSE_;
+
+	if (ngpmin == ngnmin && gpmin == gnmin) {
+	    if (ngpmin == gpmin) {
+		lemin = ngpmin;
+/*
+              ( Non twos-complement machines, no gradual underflow;
+                e.g.,  VAX )
+*/
+	    } else if (gpmin - ngpmin == 3) {
+		lemin = ngpmin - 1 + lt;
+		ieee = TRUE_;
+/*
+              ( Non twos-complement machines, with gradual underflow;
+                e.g., IEEE standard followers )
+*/
+	    } else {
+		lemin = min(ngpmin,gpmin);
+/*            ( A guess; no known machine ) */
+		iwarn = TRUE_;
+	    }
+
+	} else if (ngpmin == gpmin && ngnmin == gnmin) {
+	    if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
+		lemin = max(ngpmin,ngnmin);
+/*
+              ( Twos-complement machines, no gradual underflow;
+                e.g., CYBER 205 )
+*/
+	    } else {
+		lemin = min(ngpmin,ngnmin);
+/*            ( A guess; no known machine ) */
+		iwarn = TRUE_;
+	    }
+
+	} else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
+		 {
+	    if (gpmin - min(ngpmin,ngnmin) == 3) {
+		lemin = max(ngpmin,ngnmin) - 1 + lt;
+/*
+              ( Twos-complement machines with gradual underflow;
+                no known machine )
+*/
+	    } else {
+		lemin = min(ngpmin,ngnmin);
+/*            ( A guess; no known machine ) */
+		iwarn = TRUE_;
+	    }
+
+	} else {
+/* Computing MIN */
+	    i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
+	    lemin = min(i__1,gnmin);
+/*         ( A guess; no known machine ) */
+	    iwarn = TRUE_;
+	}
+/*
+   **
+   Comment out this if block if EMIN is ok
+*/
+	if (iwarn) {
+	    first = TRUE_;
+	    s_wsfe(&io___2878);
+	    do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer));
+	    e_wsfe();
+	}
+/*
+   **
+
+          Assume IEEE arithmetic if we found denormalised  numbers above,
+          or if arithmetic seems to round in the  IEEE style,  determined
+          in routine SLAMC1. A true IEEE machine should have both  things
+          true; however, faulty machines may have one or the other.
+*/
+
+	ieee = ieee || lieee1;
+
+/*
+          Compute  RMIN by successive division by  BETA. We could compute
+          RMIN as BASE**( EMIN - 1 ),  but some machines underflow during
+          this computation.
+*/
+
+	lrmin = 1.f;
+	i__1 = 1 - lemin;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    r__1 = lrmin * rbase;
+	    lrmin = slamc3_(&r__1, &zero);
+/* L30: */
+	}
+
+/*        Finally, call SLAMC5 to compute EMAX and RMAX. */
+
+	slamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
+    }
+
+    *beta = lbeta;
+    *t = lt;
+    *rnd = lrnd;
+    *eps = leps;
+    *emin = lemin;
+    *rmin = lrmin;
+    *emax = lemax;
+    *rmax = lrmax;
+
+    return 0;
+
+
+/*     End of SLAMC2 */
+
+} /* slamc2_ */
+
+
+/* *********************************************************************** */
+
+doublereal slamc3_(real *a, real *b)
+{
+    /* System generated locals */
+    real ret_val;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAMC3  is intended to force  A  and  B  to be stored prior to doing
+    the addition of  A  and  B ,  for use in situations where optimizers
+    might hold one of these in a register.
+
+    Arguments
+    =========
+
+    A, B    (input) REAL
+            The values A and B.
+
+   =====================================================================
+*/
+
+
+    ret_val = *a + *b;
+
+    return ret_val;
+
+/*     End of SLAMC3 */
+
+} /* slamc3_ */
+
+
+/* *********************************************************************** */
+
+/* Subroutine */ int slamc4_(integer *emin, real *start, integer *base)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1;
+
+    /* Local variables */
+    static real a;
+    static integer i__;
+    static real b1, b2, c1, c2, d1, d2, one, zero, rbase;
+    extern doublereal slamc3_(real *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAMC4 is a service routine for SLAMC2.
+
+    Arguments
+    =========
+
+    EMIN    (output) EMIN
+            The minimum exponent before (gradual) underflow, computed by
+            setting A = START and dividing by BASE until the previous A
+            can not be recovered.
+
+    START   (input) REAL
+            The starting point for determining EMIN.
+
+    BASE    (input) INTEGER
+            The base of the machine.
+
+   =====================================================================
+*/
+
+
+    a = *start;
+    one = 1.f;
+    rbase = one / *base;
+    zero = 0.f;
+    *emin = 1;
+    r__1 = a * rbase;
+    b1 = slamc3_(&r__1, &zero);
+    c1 = a;
+    c2 = a;
+    d1 = a;
+    d2 = a;
+/*
+   +    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.
+      $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP
+*/
+L10:
+    if (c1 == a && c2 == a && d1 == a && d2 == a) {
+	--(*emin);
+	a = b1;
+	r__1 = a / *base;
+	b1 = slamc3_(&r__1, &zero);
+	r__1 = b1 * *base;
+	c1 = slamc3_(&r__1, &zero);
+	d1 = zero;
+	i__1 = *base;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    d1 += b1;
+/* L20: */
+	}
+	r__1 = a * rbase;
+	b2 = slamc3_(&r__1, &zero);
+	r__1 = b2 / rbase;
+	c2 = slamc3_(&r__1, &zero);
+	d2 = zero;
+	i__1 = *base;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    d2 += b2;
+/* L30: */
+	}
+	goto L10;
+    }
+/* +    END WHILE */
+
+    return 0;
+
+/*     End of SLAMC4 */
+
+} /* slamc4_ */
+
+
+/* *********************************************************************** */
+
+/* Subroutine */ int slamc5_(integer *beta, integer *p, integer *emin,
+	logical *ieee, integer *emax, real *rmax)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1;
+
+    /* Local variables */
+    static integer i__;
+    static real y, z__;
+    static integer try__, lexp;
+    static real oldy;
+    static integer uexp, nbits;
+    extern doublereal slamc3_(real *, real *);
+    static real recbas;
+    static integer exbits, expsum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAMC5 attempts to compute RMAX, the largest machine floating-point
+    number, without overflow.  It assumes that EMAX + abs(EMIN) sum
+    approximately to a power of 2.  It will fail on machines where this
+    assumption does not hold, for example, the Cyber 205 (EMIN = -28625,
+    EMAX = 28718).  It will also fail if the value supplied for EMIN is
+    too large (i.e. too close to zero), probably with overflow.
+
+    Arguments
+    =========
+
+    BETA    (input) INTEGER
+            The base of floating-point arithmetic.
+
+    P       (input) INTEGER
+            The number of base BETA digits in the mantissa of a
+            floating-point value.
+
+    EMIN    (input) INTEGER
+            The minimum exponent before (gradual) underflow.
+
+    IEEE    (input) LOGICAL
+            A logical flag specifying whether or not the arithmetic
+            system is thought to comply with the IEEE standard.
+
+    EMAX    (output) INTEGER
+            The largest exponent before overflow
+
+    RMAX    (output) REAL
+            The largest machine floating-point number.
+
+   =====================================================================
+
+
+       First compute LEXP and UEXP, two powers of 2 that bound
+       abs(EMIN). We then assume that EMAX + abs(EMIN) will sum
+       approximately to the bound that is closest to abs(EMIN).
+       (EMAX is the exponent of the required number RMAX).
+*/
+
+    lexp = 1;
+    exbits = 1;
+L10:
+    try__ = lexp << 1;
+    if (try__ <= -(*emin)) {
+	lexp = try__;
+	++exbits;
+	goto L10;
+    }
+    if (lexp == -(*emin)) {
+	uexp = lexp;
+    } else {
+	uexp = try__;
+	++exbits;
+    }
+
+/*
+       Now -LEXP is less than or equal to EMIN, and -UEXP is greater
+       than or equal to EMIN. EXBITS is the number of bits needed to
+       store the exponent.
+*/
+
+    if (uexp + *emin > -lexp - *emin) {
+	expsum = lexp << 1;
+    } else {
+	expsum = uexp << 1;
+    }
+
+/*
+       EXPSUM is the exponent range, approximately equal to
+       EMAX - EMIN + 1 .
+*/
+
+    *emax = expsum + *emin - 1;
+    nbits = exbits + 1 + *p;
+
+/*
+       NBITS is the total number of bits needed to store a
+       floating-point number.
+*/
+
+    if (nbits % 2 == 1 && *beta == 2) {
+
+/*
+          Either there are an odd number of bits used to store a
+          floating-point number, which is unlikely, or some bits are
+          not used in the representation of numbers, which is possible,
+          (e.g. Cray machines) or the mantissa has an implicit bit,
+          (e.g. IEEE machines, Dec Vax machines), which is perhaps the
+          most likely. We have to assume the last alternative.
+          If this is true, then we need to reduce EMAX by one because
+          there must be some way of representing zero in an implicit-bit
+          system. On machines like Cray, we are reducing EMAX by one
+          unnecessarily.
+*/
+
+	--(*emax);
+    }
+
+    if (*ieee) {
+
+/*
+          Assume we are on an IEEE machine which reserves one exponent
+          for infinity and NaN.
+*/
+
+	--(*emax);
+    }
+
+/*
+       Now create RMAX, the largest machine number, which should
+       be equal to (1.0 - BETA**(-P)) * BETA**EMAX .
+
+       First compute 1.0 - BETA**(-P), being careful that the
+       result is less than 1.0 .
+*/
+
+    recbas = 1.f / *beta;
+    z__ = *beta - 1.f;
+    y = 0.f;
+    i__1 = *p;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	z__ *= recbas;
+	if (y < 1.f) {
+	    oldy = y;
+	}
+	y = slamc3_(&y, &z__);
+/* L20: */
+    }
+    if (y >= 1.f) {
+	y = oldy;
+    }
+
+/*     Now multiply by BETA**EMAX to get RMAX. */
+
+    i__1 = *emax;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__1 = y * *beta;
+	y = slamc3_(&r__1, &c_b1101);
+/* L30: */
+    }
+
+    *rmax = y;
+    return 0;
+
+/*     End of SLAMC5 */
+
+} /* slamc5_ */
+
+/* Subroutine */ int slamrg_(integer *n1, integer *n2, real *a, integer *
+	strd1, integer *strd2, integer *index)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, ind1, ind2, n1sv, n2sv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLAMRG will create a permutation list which will merge the elements
+    of A (which is composed of two independently sorted sets) into a
+    single set which is sorted in ascending order.
+
+    Arguments
+    =========
+
+    N1     (input) INTEGER
+    N2     (input) INTEGER
+           These arguements contain the respective lengths of the two
+           sorted lists to be merged.
+
+    A      (input) REAL array, dimension (N1+N2)
+           The first N1 elements of A contain a list of numbers which
+           are sorted in either ascending or descending order.  Likewise
+           for the final N2 elements.
+
+    STRD1  (input) INTEGER
+    STRD2  (input) INTEGER
+           These are the strides to be taken through the array A.
+           Allowable strides are 1 and -1.  They indicate whether a
+           subset of A is sorted in ascending (STRDx = 1) or descending
+           (STRDx = -1) order.
+
+    INDEX  (output) INTEGER array, dimension (N1+N2)
+           On exit this array will contain a permutation such that
+           if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be
+           sorted in ascending order.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --index;
+    --a;
+
+    /* Function Body */
+    n1sv = *n1;
+    n2sv = *n2;
+    if (*strd1 > 0) {
+	ind1 = 1;
+    } else {
+	ind1 = *n1;
+    }
+    if (*strd2 > 0) {
+	ind2 = *n1 + 1;
+    } else {
+	ind2 = *n1 + *n2;
+    }
+    i__ = 1;
+/*     while ( (N1SV > 0) & (N2SV > 0) ) */
+L10:
+    if (n1sv > 0 && n2sv > 0) {
+	if (a[ind1] <= a[ind2]) {
+	    index[i__] = ind1;
+	    ++i__;
+	    ind1 += *strd1;
+	    --n1sv;
+	} else {
+	    index[i__] = ind2;
+	    ++i__;
+	    ind2 += *strd2;
+	    --n2sv;
+	}
+	goto L10;
+    }
+/*     end while */
+    if (n1sv == 0) {
+	i__1 = n2sv;
+	for (n1sv = 1; n1sv <= i__1; ++n1sv) {
+	    index[i__] = ind2;
+	    ++i__;
+	    ind2 += *strd2;
+/* L20: */
+	}
+    } else {
+/*     N2SV .EQ. 0 */
+	i__1 = n1sv;
+	for (n2sv = 1; n2sv <= i__1; ++n2sv) {
+	    index[i__] = ind1;
+	    ++i__;
+	    ind1 += *strd1;
+/* L30: */
+	}
+    }
+
+    return 0;
+
+/*     End of SLAMRG */
+
+} /* slamrg_ */
+
+doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda,
+	real *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real ret_val, r__1, r__2, r__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static real sum, scale;
+    extern logical lsame_(char *, char *);
+    static real value;
+    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
+	    real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLANGE  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    real matrix A.
+
+    Description
+    ===========
+
+    SLANGE returns the value
+
+       SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in SLANGE as described
+            above.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.  When M = 0,
+            SLANGE is set to zero.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.  When N = 0,
+            SLANGE is set to zero.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(M,1).
+
+    WORK    (workspace) REAL array, dimension (LWORK),
+            where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (min(*m,*n) == 0) {
+	value = 0.f;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		value = dmax(r__2,r__3);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1') {
+
+/*        Find norm1(A). */
+
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.f;
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+/* L30: */
+	    }
+	    value = dmax(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.f;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.f;
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    r__1 = value, r__2 = work[i__];
+	    value = dmax(r__1,r__2);
+/* L80: */
+	}
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    slassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of SLANGE */
+
+} /* slange_ */
+
+doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    real ret_val, r__1, r__2, r__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static real sum, scale;
+    extern logical lsame_(char *, char *);
+    static real value;
+    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
+	    real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLANHS  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    Hessenberg matrix A.
+
+    Description
+    ===========
+
+    SLANHS returns the value
+
+       SLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in SLANHS as described
+            above.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, SLANHS is
+            set to zero.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The n by n upper Hessenberg matrix A; the part of A below the
+            first sub-diagonal is not referenced.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) REAL array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.f;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		value = dmax(r__2,r__3);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1') {
+
+/*        Find norm1(A). */
+
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.f;
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+/* L30: */
+	    }
+	    value = dmax(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.f;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.f;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    r__1 = value, r__2 = work[i__];
+	    value = dmax(r__1,r__2);
+/* L80: */
+	}
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    slassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of SLANHS */
+
+} /* slanhs_ */
+
+doublereal slanst_(char *norm, integer *n, real *d__, real *e)
+{
+    /* System generated locals */
+    integer i__1;
+    real ret_val, r__1, r__2, r__3, r__4, r__5;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__;
+    static real sum, scale;
+    extern logical lsame_(char *, char *);
+    static real anorm;
+    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
+	    real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLANST  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    real symmetric tridiagonal matrix A.
+
+    Description
+    ===========
+
+    SLANST returns the value
+
+       SLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in SLANST as described
+            above.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, SLANST is
+            set to zero.
+
+    D       (input) REAL array, dimension (N)
+            The diagonal elements of A.
+
+    E       (input) REAL array, dimension (N-1)
+            The (n-1) sub-diagonal or super-diagonal elements of A.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --e;
+    --d__;
+
+    /* Function Body */
+    if (*n <= 0) {
+	anorm = 0.f;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	anorm = (r__1 = d__[*n], dabs(r__1));
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    r__2 = anorm, r__3 = (r__1 = d__[i__], dabs(r__1));
+	    anorm = dmax(r__2,r__3);
+/* Computing MAX */
+	    r__2 = anorm, r__3 = (r__1 = e[i__], dabs(r__1));
+	    anorm = dmax(r__2,r__3);
+/* L10: */
+	}
+    } else if (lsame_(norm, "O") || *(unsigned char *)
+	    norm == '1' || lsame_(norm, "I")) {
+
+/*        Find norm1(A). */
+
+	if (*n == 1) {
+	    anorm = dabs(d__[1]);
+	} else {
+/* Computing MAX */
+	    r__3 = dabs(d__[1]) + dabs(e[1]), r__4 = (r__1 = e[*n - 1], dabs(
+		    r__1)) + (r__2 = d__[*n], dabs(r__2));
+	    anorm = dmax(r__3,r__4);
+	    i__1 = *n - 1;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+/* Computing MAX */
+		r__4 = anorm, r__5 = (r__1 = d__[i__], dabs(r__1)) + (r__2 =
+			e[i__], dabs(r__2)) + (r__3 = e[i__ - 1], dabs(r__3));
+		anorm = dmax(r__4,r__5);
+/* L20: */
+	    }
+	}
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	if (*n > 1) {
+	    i__1 = *n - 1;
+	    slassq_(&i__1, &e[1], &c__1, &scale, &sum);
+	    sum *= 2;
+	}
+	slassq_(n, &d__[1], &c__1, &scale, &sum);
+	anorm = scale * sqrt(sum);
+    }
+
+    ret_val = anorm;
+    return ret_val;
+
+/*     End of SLANST */
+
+} /* slanst_ */
+
+doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
+	real *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real ret_val, r__1, r__2, r__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static real sum, absa, scale;
+    extern logical lsame_(char *, char *);
+    static real value;
+    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
+	    real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLANSY  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    real symmetric matrix A.
+
+    Description
+    ===========
+
+    SLANSY returns the value
+
+       SLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in SLANSY as described
+            above.
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is to be referenced.
+            = 'U':  Upper triangular part of A is referenced
+            = 'L':  Lower triangular part of A is referenced
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, SLANSY is
+            set to zero.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The symmetric matrix A.  If UPLO = 'U', the leading n by n
+            upper triangular part of A contains the upper triangular part
+            of the matrix A, and the strictly lower triangular part of A
+            is not referenced.  If UPLO = 'L', the leading n by n lower
+            triangular part of A contains the lower triangular part of
+            the matrix A, and the strictly upper triangular part of A is
+            not referenced.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) REAL array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+            WORK is not referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.f;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.f;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(
+			    r__1));
+		    value = dmax(r__2,r__3);
+/* L10: */
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(
+			    r__1));
+		    value = dmax(r__2,r__3);
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+    } else if (lsame_(norm, "I") || lsame_(norm, "O") || *(unsigned char *)norm == '1') {
+
+/*        Find normI(A) ( = norm1(A), since A is symmetric). */
+
+	value = 0.f;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		sum = 0.f;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    absa = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		    sum += absa;
+		    work[i__] += absa;
+/* L50: */
+		}
+		work[j] = sum + (r__1 = a[j + j * a_dim1], dabs(r__1));
+/* L60: */
+	    }
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+		r__1 = value, r__2 = work[i__];
+		value = dmax(r__1,r__2);
+/* L70: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[i__] = 0.f;
+/* L80: */
+	    }
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		sum = work[j] + (r__1 = a[j + j * a_dim1], dabs(r__1));
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    absa = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		    sum += absa;
+		    work[i__] += absa;
+/* L90: */
+		}
+		value = dmax(value,sum);
+/* L100: */
+	    }
+	}
+    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		i__2 = j - 1;
+		slassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L110: */
+	    }
+	} else {
+	    i__1 = *n - 1;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n - j;
+		slassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum);
+/* L120: */
+	    }
+	}
+	sum *= 2;
+	i__1 = *lda + 1;
+	slassq_(n, &a[a_offset], &i__1, &scale, &sum);
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of SLANSY */
+
+} /* slansy_ */
+
+/* Subroutine */ int slanv2_(real *a, real *b, real *c__, real *d__, real *
+	rt1r, real *rt1i, real *rt2r, real *rt2i, real *cs, real *sn)
+{
+    /* System generated locals */
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double r_sign(real *, real *), sqrt(doublereal);
+
+    /* Local variables */
+    static real p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, temp,
+	    scale, bcmax, bcmis, sigma;
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric
+    matrix in standard form:
+
+         [ A  B ] = [ CS -SN ] [ AA  BB ] [ CS  SN ]
+         [ C  D ]   [ SN  CS ] [ CC  DD ] [-SN  CS ]
+
+    where either
+    1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or
+    2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex
+    conjugate eigenvalues.
+
+    Arguments
+    =========
+
+    A       (input/output) REAL
+    B       (input/output) REAL
+    C       (input/output) REAL
+    D       (input/output) REAL
+            On entry, the elements of the input matrix.
+            On exit, they are overwritten by the elements of the
+            standardised Schur form.
+
+    RT1R    (output) REAL
+    RT1I    (output) REAL
+    RT2R    (output) REAL
+    RT2I    (output) REAL
+            The real and imaginary parts of the eigenvalues. If the
+            eigenvalues are a complex conjugate pair, RT1I > 0.
+
+    CS      (output) REAL
+    SN      (output) REAL
+            Parameters of the rotation matrix.
+
+    Further Details
+    ===============
+
+    Modified by V. Sima, Research Institute for Informatics, Bucharest,
+    Romania, to reduce the risk of cancellation errors,
+    when computing real eigenvalues, and to ensure, if possible, that
+    abs(RT1R) >= abs(RT2R).
+
+    =====================================================================
+*/
+
+
+    eps = slamch_("P");
+    if (*c__ == 0.f) {
+	*cs = 1.f;
+	*sn = 0.f;
+	goto L10;
+
+    } else if (*b == 0.f) {
+
+/*        Swap rows and columns */
+
+	*cs = 0.f;
+	*sn = 1.f;
+	temp = *d__;
+	*d__ = *a;
+	*a = temp;
+	*b = -(*c__);
+	*c__ = 0.f;
+	goto L10;
+    } else if (*a - *d__ == 0.f && r_sign(&c_b871, b) != r_sign(&c_b871, c__))
+	     {
+	*cs = 1.f;
+	*sn = 0.f;
+	goto L10;
+    } else {
+
+	temp = *a - *d__;
+	p = temp * .5f;
+/* Computing MAX */
+	r__1 = dabs(*b), r__2 = dabs(*c__);
+	bcmax = dmax(r__1,r__2);
+/* Computing MIN */
+	r__1 = dabs(*b), r__2 = dabs(*c__);
+	bcmis = dmin(r__1,r__2) * r_sign(&c_b871, b) * r_sign(&c_b871, c__);
+/* Computing MAX */
+	r__1 = dabs(p);
+	scale = dmax(r__1,bcmax);
+	z__ = p / scale * p + bcmax / scale * bcmis;
+
+/*
+          If Z is of the order of the machine accuracy, postpone the
+          decision on the nature of eigenvalues
+*/
+
+	if (z__ >= eps * 4.f) {
+
+/*           Real eigenvalues. Compute A and D. */
+
+	    r__1 = sqrt(scale) * sqrt(z__);
+	    z__ = p + r_sign(&r__1, &p);
+	    *a = *d__ + z__;
+	    *d__ -= bcmax / z__ * bcmis;
+
+/*           Compute B and the rotation matrix */
+
+	    tau = slapy2_(c__, &z__);
+	    *cs = z__ / tau;
+	    *sn = *c__ / tau;
+	    *b -= *c__;
+	    *c__ = 0.f;
+	} else {
+
+/*
+             Complex eigenvalues, or real (almost) equal eigenvalues.
+             Make diagonal elements equal.
+*/
+
+	    sigma = *b + *c__;
+	    tau = slapy2_(&sigma, &temp);
+	    *cs = sqrt((dabs(sigma) / tau + 1.f) * .5f);
+	    *sn = -(p / (tau * *cs)) * r_sign(&c_b871, &sigma);
+
+/*
+             Compute [ AA  BB ] = [ A  B ] [ CS -SN ]
+                     [ CC  DD ]   [ C  D ] [ SN  CS ]
+*/
+
+	    aa = *a * *cs + *b * *sn;
+	    bb = -(*a) * *sn + *b * *cs;
+	    cc = *c__ * *cs + *d__ * *sn;
+	    dd = -(*c__) * *sn + *d__ * *cs;
+
+/*
+             Compute [ A  B ] = [ CS  SN ] [ AA  BB ]
+                     [ C  D ]   [-SN  CS ] [ CC  DD ]
+*/
+
+	    *a = aa * *cs + cc * *sn;
+	    *b = bb * *cs + dd * *sn;
+	    *c__ = -aa * *sn + cc * *cs;
+	    *d__ = -bb * *sn + dd * *cs;
+
+	    temp = (*a + *d__) * .5f;
+	    *a = temp;
+	    *d__ = temp;
+
+	    if (*c__ != 0.f) {
+		if (*b != 0.f) {
+		    if (r_sign(&c_b871, b) == r_sign(&c_b871, c__)) {
+
+/*                    Real eigenvalues: reduce to upper triangular form */
+
+			sab = sqrt((dabs(*b)));
+			sac = sqrt((dabs(*c__)));
+			r__1 = sab * sac;
+			p = r_sign(&r__1, c__);
+			tau = 1.f / sqrt((r__1 = *b + *c__, dabs(r__1)));
+			*a = temp + p;
+			*d__ = temp - p;
+			*b -= *c__;
+			*c__ = 0.f;
+			cs1 = sab * tau;
+			sn1 = sac * tau;
+			temp = *cs * cs1 - *sn * sn1;
+			*sn = *cs * sn1 + *sn * cs1;
+			*cs = temp;
+		    }
+		} else {
+		    *b = -(*c__);
+		    *c__ = 0.f;
+		    temp = *cs;
+		    *cs = -(*sn);
+		    *sn = temp;
+		}
+	    }
+	}
+
+    }
+
+L10:
+
+/*     Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */
+
+    *rt1r = *a;
+    *rt2r = *d__;
+    if (*c__ == 0.f) {
+	*rt1i = 0.f;
+	*rt2i = 0.f;
+    } else {
+	*rt1i = sqrt((dabs(*b))) * sqrt((dabs(*c__)));
+	*rt2i = -(*rt1i);
+    }
+    return 0;
+
+/*     End of SLANV2 */
+
+} /* slanv2_ */
+
+doublereal slapy2_(real *x, real *y)
+{
+    /* System generated locals */
+    real ret_val, r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real w, z__, xabs, yabs;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
+    overflow.
+
+    Arguments
+    =========
+
+    X       (input) REAL
+    Y       (input) REAL
+            X and Y specify the values x and y.
+
+    =====================================================================
+*/
+
+
+    xabs = dabs(*x);
+    yabs = dabs(*y);
+    w = dmax(xabs,yabs);
+    z__ = dmin(xabs,yabs);
+    if (z__ == 0.f) {
+	ret_val = w;
+    } else {
+/* Computing 2nd power */
+	r__1 = z__ / w;
+	ret_val = w * sqrt(r__1 * r__1 + 1.f);
+    }
+    return ret_val;
+
+/*     End of SLAPY2 */
+
+} /* slapy2_ */
+
+doublereal slapy3_(real *x, real *y, real *z__)
+{
+    /* System generated locals */
+    real ret_val, r__1, r__2, r__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real w, xabs, yabs, zabs;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause
+    unnecessary overflow.
+
+    Arguments
+    =========
+
+    X       (input) REAL
+    Y       (input) REAL
+    Z       (input) REAL
+            X, Y and Z specify the values x, y and z.
+
+    =====================================================================
+*/
+
+
+    xabs = dabs(*x);
+    yabs = dabs(*y);
+    zabs = dabs(*z__);
+/* Computing MAX */
+    r__1 = max(xabs,yabs);
+    w = dmax(r__1,zabs);
+    if (w == 0.f) {
+	ret_val = 0.f;
+    } else {
+/* Computing 2nd power */
+	r__1 = xabs / w;
+/* Computing 2nd power */
+	r__2 = yabs / w;
+/* Computing 2nd power */
+	r__3 = zabs / w;
+	ret_val = w * sqrt(r__1 * r__1 + r__2 * r__2 + r__3 * r__3);
+    }
+    return ret_val;
+
+/*     End of SLAPY3 */
+
+} /* slapy3_ */
+
+/* Subroutine */ int slarf_(char *side, integer *m, integer *n, real *v,
+	integer *incv, real *tau, real *c__, integer *ldc, real *work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset;
+    real r__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLARF applies a real elementary reflector H to a real m by n matrix
+    C, from either the left or the right. H is represented in the form
+
+          H = I - tau * v * v'
+
+    where tau is a real scalar and v is a real vector.
+
+    If tau = 0, then H is taken to be the unit matrix.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    V       (input) REAL array, dimension
+                       (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+                    or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+            The vector v in the representation of H. V is not used if
+            TAU = 0.
+
+    INCV    (input) INTEGER
+            The increment between elements of v. INCV <> 0.
+
+    TAU     (input) REAL
+            The value tau in the representation of H.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) REAL array, dimension
+                           (N) if SIDE = 'L'
+                        or (M) if SIDE = 'R'
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    if (lsame_(side, "L")) {
+
+/*        Form  H * C */
+
+	if (*tau != 0.f) {
+
+/*           w := C' * v */
+
+	    sgemv_("Transpose", m, n, &c_b871, &c__[c_offset], ldc, &v[1],
+		    incv, &c_b1101, &work[1], &c__1);
+
+/*           C := C - v * w' */
+
+	    r__1 = -(*tau);
+	    sger_(m, n, &r__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
+		    ldc);
+	}
+    } else {
+
+/*        Form  C * H */
+
+	if (*tau != 0.f) {
+
+/*           w := C * v */
+
+	    sgemv_("No transpose", m, n, &c_b871, &c__[c_offset], ldc, &v[1],
+		    incv, &c_b1101, &work[1], &c__1);
+
+/*           C := C - w * v' */
+
+	    r__1 = -(*tau);
+	    sger_(m, n, &r__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
+		    ldc);
+	}
+    }
+    return 0;
+
+/*     End of SLARF */
+
+} /* slarf_ */
+
+/* Subroutine */ int slarfb_(char *side, char *trans, char *direct, char *
+	storev, integer *m, integer *n, integer *k, real *v, integer *ldv,
+	real *t, integer *ldt, real *c__, integer *ldc, real *work, integer *
+	ldwork)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
+	    work_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *), scopy_(integer *, real *,
+	    integer *, real *, integer *), strmm_(char *, char *, char *,
+	    char *, integer *, integer *, real *, real *, integer *, real *,
+	    integer *);
+    static char transt[1];
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLARFB applies a real block reflector H or its transpose H' to a
+    real m by n matrix C, from either the left or the right.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply H or H' from the Left
+            = 'R': apply H or H' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply H (No transpose)
+            = 'T': apply H' (Transpose)
+
+    DIRECT  (input) CHARACTER*1
+            Indicates how H is formed from a product of elementary
+            reflectors
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
+
+    STOREV  (input) CHARACTER*1
+            Indicates how the vectors which define the elementary
+            reflectors are stored:
+            = 'C': Columnwise
+            = 'R': Rowwise
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    K       (input) INTEGER
+            The order of the matrix T (= the number of elementary
+            reflectors whose product defines the block reflector).
+
+    V       (input) REAL array, dimension
+                                  (LDV,K) if STOREV = 'C'
+                                  (LDV,M) if STOREV = 'R' and SIDE = 'L'
+                                  (LDV,N) if STOREV = 'R' and SIDE = 'R'
+            The matrix V. See further details.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
+            if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
+            if STOREV = 'R', LDV >= K.
+
+    T       (input) REAL array, dimension (LDT,K)
+            The triangular k by k matrix T in the representation of the
+            block reflector.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDA >= max(1,M).
+
+    WORK    (workspace) REAL array, dimension (LDWORK,K)
+
+    LDWORK  (input) INTEGER
+            The leading dimension of the array WORK.
+            If SIDE = 'L', LDWORK >= max(1,N);
+            if SIDE = 'R', LDWORK >= max(1,M).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    work_dim1 = *ldwork;
+    work_offset = 1 + work_dim1;
+    work -= work_offset;
+
+    /* Function Body */
+    if (*m <= 0 || *n <= 0) {
+	return 0;
+    }
+
+    if (lsame_(trans, "N")) {
+	*(unsigned char *)transt = 'T';
+    } else {
+	*(unsigned char *)transt = 'N';
+    }
+
+    if (lsame_(storev, "C")) {
+
+	if (lsame_(direct, "F")) {
+
+/*
+             Let  V =  ( V1 )    (first K rows)
+                       ( V2 )
+             where  V1  is unit lower triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C1'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+/* L10: */
+		}
+
+/*              W := W * V1 */
+
+		strmm_("Right", "Lower", "No transpose", "Unit", n, k, &
+			c_b871, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C2'*V2 */
+
+		    i__1 = *m - *k;
+		    sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b871,
+			    &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1],
+			    ldv, &c_b871, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b871, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V * W' */
+
+		if (*m > *k) {
+
+/*                 C2 := C2 - V2 * W' */
+
+		    i__1 = *m - *k;
+		    sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b1150,
+			     &v[*k + 1 + v_dim1], ldv, &work[work_offset],
+			    ldwork, &c_b871, &c__[*k + 1 + c_dim1], ldc);
+		}
+
+/*              W := W * V1' */
+
+		strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b871, &
+			v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
+/* L20: */
+		    }
+/* L30: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+
+                W := C1
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L40: */
+		}
+
+/*              W := W * V1 */
+
+		strmm_("Right", "Lower", "No transpose", "Unit", m, k, &
+			c_b871, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C2 * V2 */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b871, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
+			    1 + v_dim1], ldv, &c_b871, &work[work_offset],
+			    ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b871, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V' */
+
+		if (*n > *k) {
+
+/*                 C2 := C2 - W * V2' */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b1150,
+			     &work[work_offset], ldwork, &v[*k + 1 + v_dim1],
+			    ldv, &c_b871, &c__[(*k + 1) * c_dim1 + 1], ldc);
+		}
+
+/*              W := W * V1' */
+
+		strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b871, &
+			v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
+/* L50: */
+		    }
+/* L60: */
+		}
+	    }
+
+	} else {
+
+/*
+             Let  V =  ( V1 )
+                       ( V2 )    (last K rows)
+             where  V2  is unit upper triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C2'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L70: */
+		}
+
+/*              W := W * V2 */
+
+		strmm_("Right", "Upper", "No transpose", "Unit", n, k, &
+			c_b871, &v[*m - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C1'*V1 */
+
+		    i__1 = *m - *k;
+		    sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b871,
+			    &c__[c_offset], ldc, &v[v_offset], ldv, &c_b871, &
+			    work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b871, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V * W' */
+
+		if (*m > *k) {
+
+/*                 C1 := C1 - V1 * W' */
+
+		    i__1 = *m - *k;
+		    sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b1150,
+			     &v[v_offset], ldv, &work[work_offset], ldwork, &
+			    c_b871, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2' */
+
+		strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b871, &
+			v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+
+/*              C2 := C2 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L80: */
+		    }
+/* L90: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+
+                W := C2
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L100: */
+		}
+
+/*              W := W * V2 */
+
+		strmm_("Right", "Upper", "No transpose", "Unit", m, k, &
+			c_b871, &v[*n - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C1 * V1 */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b871, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b871, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b871, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V' */
+
+		if (*n > *k) {
+
+/*                 C1 := C1 - W * V1' */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b1150,
+			     &work[work_offset], ldwork, &v[v_offset], ldv, &
+			    c_b871, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2' */
+
+		strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b871, &
+			v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+
+/*              C2 := C2 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L110: */
+		    }
+/* L120: */
+		}
+	    }
+	}
+
+    } else if (lsame_(storev, "R")) {
+
+	if (lsame_(direct, "F")) {
+
+/*
+             Let  V =  ( V1  V2 )    (V1: first K columns)
+             where  V1  is unit upper triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+
+                W := C1'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+/* L130: */
+		}
+
+/*              W := W * V1' */
+
+		strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b871, &
+			v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C2'*V2' */
+
+		    i__1 = *m - *k;
+		    sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b871, &
+			    c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 +
+			    1], ldv, &c_b871, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b871, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V' * W' */
+
+		if (*m > *k) {
+
+/*                 C2 := C2 - V2' * W' */
+
+		    i__1 = *m - *k;
+		    sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b1150, &
+			    v[(*k + 1) * v_dim1 + 1], ldv, &work[work_offset],
+			     ldwork, &c_b871, &c__[*k + 1 + c_dim1], ldc);
+		}
+
+/*              W := W * V1 */
+
+		strmm_("Right", "Upper", "No transpose", "Unit", n, k, &
+			c_b871, &v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
+/* L140: */
+		    }
+/* L150: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+
+                W := C1
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L160: */
+		}
+
+/*              W := W * V1' */
+
+		strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b871, &
+			v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C2 * V2' */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b871,
+			    &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) *
+			    v_dim1 + 1], ldv, &c_b871, &work[work_offset],
+			    ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b871, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V */
+
+		if (*n > *k) {
+
+/*                 C2 := C2 - W * V2 */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "No transpose", m, &i__1, k, &
+			    c_b1150, &work[work_offset], ldwork, &v[(*k + 1) *
+			     v_dim1 + 1], ldv, &c_b871, &c__[(*k + 1) *
+			    c_dim1 + 1], ldc);
+		}
+
+/*              W := W * V1 */
+
+		strmm_("Right", "Upper", "No transpose", "Unit", m, k, &
+			c_b871, &v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
+/* L170: */
+		    }
+/* L180: */
+		}
+
+	    }
+
+	} else {
+
+/*
+             Let  V =  ( V1  V2 )    (V2: last K columns)
+             where  V2  is unit lower triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+
+                W := C2'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L190: */
+		}
+
+/*              W := W * V2' */
+
+		strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b871, &
+			v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[work_offset]
+			, ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C1'*V1' */
+
+		    i__1 = *m - *k;
+		    sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b871, &
+			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b871, &
+			    work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b871, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V' * W' */
+
+		if (*m > *k) {
+
+/*                 C1 := C1 - V1' * W' */
+
+		    i__1 = *m - *k;
+		    sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b1150, &
+			    v[v_offset], ldv, &work[work_offset], ldwork, &
+			    c_b871, &c__[c_offset], ldc)
+			    ;
+		}
+
+/*              W := W * V2 */
+
+		strmm_("Right", "Lower", "No transpose", "Unit", n, k, &
+			c_b871, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C2 := C2 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L200: */
+		    }
+/* L210: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+
+                W := C2
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L220: */
+		}
+
+/*              W := W * V2' */
+
+		strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b871, &
+			v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[work_offset]
+			, ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C1 * V1' */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b871,
+			    &c__[c_offset], ldc, &v[v_offset], ldv, &c_b871, &
+			    work[work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b871, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V */
+
+		if (*n > *k) {
+
+/*                 C1 := C1 - W * V1 */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "No transpose", m, &i__1, k, &
+			    c_b1150, &work[work_offset], ldwork, &v[v_offset],
+			     ldv, &c_b871, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2 */
+
+		strmm_("Right", "Lower", "No transpose", "Unit", m, k, &
+			c_b871, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L230: */
+		    }
+/* L240: */
+		}
+
+	    }
+
+	}
+    }
+
+    return 0;
+
+/*     End of SLARFB */
+
+} /* slarfb_ */
+
+/* Subroutine */ int slarfg_(integer *n, real *alpha, real *x, integer *incx,
+	real *tau)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1;
+
+    /* Builtin functions */
+    double r_sign(real *, real *);
+
+    /* Local variables */
+    static integer j, knt;
+    static real beta;
+    extern doublereal snrm2_(integer *, real *, integer *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static real xnorm;
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    static real safmin, rsafmn;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLARFG generates a real elementary reflector H of order n, such
+    that
+
+          H * ( alpha ) = ( beta ),   H' * H = I.
+              (   x   )   (   0  )
+
+    where alpha and beta are scalars, and x is an (n-1)-element real
+    vector. H is represented in the form
+
+          H = I - tau * ( 1 ) * ( 1 v' ) ,
+                        ( v )
+
+    where tau is a real scalar and v is a real (n-1)-element
+    vector.
+
+    If the elements of x are all zero, then tau = 0 and H is taken to be
+    the unit matrix.
+
+    Otherwise  1 <= tau <= 2.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the elementary reflector.
+
+    ALPHA   (input/output) REAL
+            On entry, the value alpha.
+            On exit, it is overwritten with the value beta.
+
+    X       (input/output) REAL array, dimension
+                           (1+(N-2)*abs(INCX))
+            On entry, the vector x.
+            On exit, it is overwritten with the vector v.
+
+    INCX    (input) INTEGER
+            The increment between elements of X. INCX > 0.
+
+    TAU     (output) REAL
+            The value tau.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n <= 1) {
+	*tau = 0.f;
+	return 0;
+    }
+
+    i__1 = *n - 1;
+    xnorm = snrm2_(&i__1, &x[1], incx);
+
+    if (xnorm == 0.f) {
+
+/*        H  =  I */
+
+	*tau = 0.f;
+    } else {
+
+/*        general case */
+
+	r__1 = slapy2_(alpha, &xnorm);
+	beta = -r_sign(&r__1, alpha);
+	safmin = slamch_("S") / slamch_("E");
+	if (dabs(beta) < safmin) {
+
+/*           XNORM, BETA may be inaccurate; scale X and recompute them */
+
+	    rsafmn = 1.f / safmin;
+	    knt = 0;
+L10:
+	    ++knt;
+	    i__1 = *n - 1;
+	    sscal_(&i__1, &rsafmn, &x[1], incx);
+	    beta *= rsafmn;
+	    *alpha *= rsafmn;
+	    if (dabs(beta) < safmin) {
+		goto L10;
+	    }
+
+/*           New BETA is at most 1, at least SAFMIN */
+
+	    i__1 = *n - 1;
+	    xnorm = snrm2_(&i__1, &x[1], incx);
+	    r__1 = slapy2_(alpha, &xnorm);
+	    beta = -r_sign(&r__1, alpha);
+	    *tau = (beta - *alpha) / beta;
+	    i__1 = *n - 1;
+	    r__1 = 1.f / (*alpha - beta);
+	    sscal_(&i__1, &r__1, &x[1], incx);
+
+/*           If ALPHA is subnormal, it may lose relative accuracy */
+
+	    *alpha = beta;
+	    i__1 = knt;
+	    for (j = 1; j <= i__1; ++j) {
+		*alpha *= safmin;
+/* L20: */
+	    }
+	} else {
+	    *tau = (beta - *alpha) / beta;
+	    i__1 = *n - 1;
+	    r__1 = 1.f / (*alpha - beta);
+	    sscal_(&i__1, &r__1, &x[1], incx);
+	    *alpha = beta;
+	}
+    }
+
+    return 0;
+
+/*     End of SLARFG */
+
+} /* slarfg_ */
+
+/* Subroutine */ int slarft_(char *direct, char *storev, integer *n, integer *
+	k, real *v, integer *ldv, real *tau, real *t, integer *ldt)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
+    real r__1;
+
+    /* Local variables */
+    static integer i__, j;
+    static real vii;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *), strmv_(char *, char *, char *, integer *, real *,
+	    integer *, real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLARFT forms the triangular factor T of a real block reflector H
+    of order n, which is defined as a product of k elementary reflectors.
+
+    If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+
+    If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+
+    If STOREV = 'C', the vector which defines the elementary reflector
+    H(i) is stored in the i-th column of the array V, and
+
+       H  =  I - V * T * V'
+
+    If STOREV = 'R', the vector which defines the elementary reflector
+    H(i) is stored in the i-th row of the array V, and
+
+       H  =  I - V' * T * V
+
+    Arguments
+    =========
+
+    DIRECT  (input) CHARACTER*1
+            Specifies the order in which the elementary reflectors are
+            multiplied to form the block reflector:
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
+
+    STOREV  (input) CHARACTER*1
+            Specifies how the vectors which define the elementary
+            reflectors are stored (see also Further Details):
+            = 'C': columnwise
+            = 'R': rowwise
+
+    N       (input) INTEGER
+            The order of the block reflector H. N >= 0.
+
+    K       (input) INTEGER
+            The order of the triangular factor T (= the number of
+            elementary reflectors). K >= 1.
+
+    V       (input/output) REAL array, dimension
+                                 (LDV,K) if STOREV = 'C'
+                                 (LDV,N) if STOREV = 'R'
+            The matrix V. See further details.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+
+    TAU     (input) REAL array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i).
+
+    T       (output) REAL array, dimension (LDT,K)
+            The k by k triangular factor T of the block reflector.
+            If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+            lower triangular. The rest of the array is not used.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
+
+    Further Details
+    ===============
+
+    The shape of the matrix V and the storage of the vectors which define
+    the H(i) is best illustrated by the following example with n = 5 and
+    k = 3. The elements equal to 1 are not stored; the corresponding
+    array elements are modified but restored on exit. The rest of the
+    array is not used.
+
+    DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+
+                 V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+                     ( v1  1    )                     (     1 v2 v2 v2 )
+                     ( v1 v2  1 )                     (        1 v3 v3 )
+                     ( v1 v2 v3 )
+                     ( v1 v2 v3 )
+
+    DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+
+                 V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+                     ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+                     (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+                     (     1 v3 )
+                     (        1 )
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    --tau;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (lsame_(direct, "F")) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (tau[i__] == 0.f) {
+
+/*              H(i)  =  I */
+
+		i__2 = i__;
+		for (j = 1; j <= i__2; ++j) {
+		    t[j + i__ * t_dim1] = 0.f;
+/* L10: */
+		}
+	    } else {
+
+/*              general case */
+
+		vii = v[i__ + i__ * v_dim1];
+		v[i__ + i__ * v_dim1] = 1.f;
+		if (lsame_(storev, "C")) {
+
+/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
+
+		    i__2 = *n - i__ + 1;
+		    i__3 = i__ - 1;
+		    r__1 = -tau[i__];
+		    sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + v_dim1],
+			     ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b1101, &t[
+			    i__ * t_dim1 + 1], &c__1);
+		} else {
+
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
+
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__ + 1;
+		    r__1 = -tau[i__];
+		    sgemv_("No transpose", &i__2, &i__3, &r__1, &v[i__ *
+			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
+			    c_b1101, &t[i__ * t_dim1 + 1], &c__1);
+		}
+		v[i__ + i__ * v_dim1] = vii;
+
+/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+		t[i__ + i__ * t_dim1] = tau[i__];
+	    }
+/* L20: */
+	}
+    } else {
+	for (i__ = *k; i__ >= 1; --i__) {
+	    if (tau[i__] == 0.f) {
+
+/*              H(i)  =  I */
+
+		i__1 = *k;
+		for (j = i__; j <= i__1; ++j) {
+		    t[j + i__ * t_dim1] = 0.f;
+/* L30: */
+		}
+	    } else {
+
+/*              general case */
+
+		if (i__ < *k) {
+		    if (lsame_(storev, "C")) {
+			vii = v[*n - *k + i__ + i__ * v_dim1];
+			v[*n - *k + i__ + i__ * v_dim1] = 1.f;
+
+/*
+                      T(i+1:k,i) :=
+                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+*/
+
+			i__1 = *n - *k + i__;
+			i__2 = *k - i__;
+			r__1 = -tau[i__];
+			sgemv_("Transpose", &i__1, &i__2, &r__1, &v[(i__ + 1)
+				* v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], &
+				c__1, &c_b1101, &t[i__ + 1 + i__ * t_dim1], &
+				c__1);
+			v[*n - *k + i__ + i__ * v_dim1] = vii;
+		    } else {
+			vii = v[i__ + (*n - *k + i__) * v_dim1];
+			v[i__ + (*n - *k + i__) * v_dim1] = 1.f;
+
+/*
+                      T(i+1:k,i) :=
+                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+*/
+
+			i__1 = *k - i__;
+			i__2 = *n - *k + i__;
+			r__1 = -tau[i__];
+			sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ +
+				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
+				c_b1101, &t[i__ + 1 + i__ * t_dim1], &c__1);
+			v[i__ + (*n - *k + i__) * v_dim1] = vii;
+		    }
+
+/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+		    i__1 = *k - i__;
+		    strmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
+			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+			     t_dim1], &c__1)
+			    ;
+		}
+		t[i__ + i__ * t_dim1] = tau[i__];
+	    }
+/* L40: */
+	}
+    }
+    return 0;
+
+/*     End of SLARFT */
+
+} /* slarft_ */
+
+/* Subroutine */ int slarfx_(char *side, integer *m, integer *n, real *v,
+	real *tau, real *c__, integer *ldc, real *work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, i__1;
+    real r__1;
+
+    /* Local variables */
+    static integer j;
+    static real t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6,
+	    v7, v8, v9, t10, v10, sum;
+    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLARFX applies a real elementary reflector H to a real m by n
+    matrix C, from either the left or the right. H is represented in the
+    form
+
+          H = I - tau * v * v'
+
+    where tau is a real scalar and v is a real vector.
+
+    If tau = 0, then H is taken to be the unit matrix
+
+    This version uses inline code if H has order < 11.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    V       (input) REAL array, dimension (M) if SIDE = 'L'
+                                       or (N) if SIDE = 'R'
+            The vector v in the representation of H.
+
+    TAU     (input) REAL
+            The value tau in the representation of H.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDA >= (1,M).
+
+    WORK    (workspace) REAL array, dimension
+                        (N) if SIDE = 'L'
+                        or (M) if SIDE = 'R'
+            WORK is not referenced if H has order < 11.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    if (*tau == 0.f) {
+	return 0;
+    }
+    if (lsame_(side, "L")) {
+
+/*        Form  H * C, where H has order m. */
+
+	switch (*m) {
+	    case 1:  goto L10;
+	    case 2:  goto L30;
+	    case 3:  goto L50;
+	    case 4:  goto L70;
+	    case 5:  goto L90;
+	    case 6:  goto L110;
+	    case 7:  goto L130;
+	    case 8:  goto L150;
+	    case 9:  goto L170;
+	    case 10:  goto L190;
+	}
+
+/*
+          Code for general M
+
+          w := C'*v
+*/
+
+	sgemv_("Transpose", m, n, &c_b871, &c__[c_offset], ldc, &v[1], &c__1,
+		&c_b1101, &work[1], &c__1);
+
+/*        C := C - tau * v * w' */
+
+	r__1 = -(*tau);
+	sger_(m, n, &r__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc)
+		;
+	goto L410;
+L10:
+
+/*        Special code for 1 x 1 Householder */
+
+	t1 = 1.f - *tau * v[1] * v[1];
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1];
+/* L20: */
+	}
+	goto L410;
+L30:
+
+/*        Special code for 2 x 2 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+/* L40: */
+	}
+	goto L410;
+L50:
+
+/*        Special code for 3 x 3 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+/* L60: */
+	}
+	goto L410;
+L70:
+
+/*        Special code for 4 x 4 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+/* L80: */
+	}
+	goto L410;
+L90:
+
+/*        Special code for 5 x 5 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+/* L100: */
+	}
+	goto L410;
+L110:
+
+/*        Special code for 6 x 6 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+/* L120: */
+	}
+	goto L410;
+L130:
+
+/*        Special code for 7 x 7 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+/* L140: */
+	}
+	goto L410;
+L150:
+
+/*        Special code for 8 x 8 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
 	v2 = v[2];
 	t2 = *tau * v2;
 	v3 = v[3];
@@ -18629,20 +78593,20 @@ L190:
           w := C * v
 */
 
-	dgemv_("No transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], &
-		c__1, &c_b29, &work[1], &c__1);
+	sgemv_("No transpose", m, n, &c_b871, &c__[c_offset], ldc, &v[1], &
+		c__1, &c_b1101, &work[1], &c__1);
 
 /*        C := C - tau * w * v' */
 
-	d__1 = -(*tau);
-	dger_(m, n, &d__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc)
+	r__1 = -(*tau);
+	sger_(m, n, &r__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc)
 		;
 	goto L410;
 L210:
 
 /*        Special code for 1 x 1 Householder */
 
-	t1 = 1. - *tau * v[1] * v[1];
+	t1 = 1.f - *tau * v[1] * v[1];
 	i__1 = *m;
 	for (j = 1; j <= i__1; ++j) {
 	    c__[j + c_dim1] = t1 * c__[j + c_dim1];
@@ -18925,12 +78889,11 @@ L390:
 L410:
     return 0;
 
-/*     End of DLARFX */
+/*     End of SLARFX */
 
-} /* dlarfx_ */
+} /* slarfx_ */
 
-/* Subroutine */ int dlartg_(doublereal *f, doublereal *g, doublereal *cs,
-	doublereal *sn, doublereal *r__)
+/* Subroutine */ int slartg_(real *f, real *g, real *cs, real *sn, real *r__)
 {
     /* Initialized data */
 
@@ -18938,18 +78901,18 @@ L410:
 
     /* System generated locals */
     integer i__1;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Builtin functions */
-    double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal);
+    double log(doublereal), pow_ri(real *, integer *), sqrt(doublereal);
 
     /* Local variables */
     static integer i__;
-    static doublereal f1, g1, eps, scale;
+    static real f1, g1, eps, scale;
     static integer count;
-    static doublereal safmn2, safmx2;
-
-    static doublereal safmin;
+    static real safmn2, safmx2;
+    extern doublereal slamch_(char *);
+    static real safmin;
 
 
 /*
@@ -18962,17 +78925,17 @@ L410:
     Purpose
     =======
 
-    DLARTG generate a plane rotation so that
+    SLARTG generate a plane rotation so that
 
        [  CS  SN  ]  .  [ F ]  =  [ R ]   where CS**2 + SN**2 = 1.
        [ -SN  CS  ]     [ G ]     [ 0 ]
 
-    This is a slower, more accurate version of the BLAS1 routine DROTG,
+    This is a slower, more accurate version of the BLAS1 routine SROTG,
     with the following other differences:
        F and G are unchanged on return.
        If G=0, then CS=1 and SN=0.
        If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any
-          floating point operations (saves work in DBDSQR when
+          floating point operations (saves work in SBDSQR when
           there are zeros on the diagonal).
 
     If F exceeds G in magnitude, CS will be positive.
@@ -18980,19 +78943,19 @@ L410:
     Arguments
     =========
 
-    F       (input) DOUBLE PRECISION
+    F       (input) REAL
             The first component of vector to be rotated.
 
-    G       (input) DOUBLE PRECISION
+    G       (input) REAL
             The second component of vector to be rotated.
 
-    CS      (output) DOUBLE PRECISION
+    CS      (output) REAL
             The cosine of the rotation.
 
-    SN      (output) DOUBLE PRECISION
+    SN      (output) REAL
             The sine of the rotation.
 
-    R       (output) DOUBLE PRECISION
+    R       (output) REAL
             The nonzero component of the rotated vector.
 
     =====================================================================
@@ -19001,28 +78964,28 @@ L410:
 
     if (first) {
 	first = FALSE_;
-	safmin = SAFEMINIMUM;
-	eps = EPSILON;
-	d__1 = BASE;
-	i__1 = (integer) (log(safmin / eps) / log(BASE) /
-		2.);
-	safmn2 = pow_di(&d__1, &i__1);
-	safmx2 = 1. / safmn2;
-    }
-    if (*g == 0.) {
-	*cs = 1.;
-	*sn = 0.;
+	safmin = slamch_("S");
+	eps = slamch_("E");
+	r__1 = slamch_("B");
+	i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) /
+		2.f);
+	safmn2 = pow_ri(&r__1, &i__1);
+	safmx2 = 1.f / safmn2;
+    }
+    if (*g == 0.f) {
+	*cs = 1.f;
+	*sn = 0.f;
 	*r__ = *f;
-    } else if (*f == 0.) {
-	*cs = 0.;
-	*sn = 1.;
+    } else if (*f == 0.f) {
+	*cs = 0.f;
+	*sn = 1.f;
 	*r__ = *g;
     } else {
 	f1 = *f;
 	g1 = *g;
 /* Computing MAX */
-	d__1 = abs(f1), d__2 = abs(g1);
-	scale = max(d__1,d__2);
+	r__1 = dabs(f1), r__2 = dabs(g1);
+	scale = dmax(r__1,r__2);
 	if (scale >= safmx2) {
 	    count = 0;
 L10:
@@ -19030,16 +78993,16 @@ L10:
 	    f1 *= safmn2;
 	    g1 *= safmn2;
 /* Computing MAX */
-	    d__1 = abs(f1), d__2 = abs(g1);
-	    scale = max(d__1,d__2);
+	    r__1 = dabs(f1), r__2 = dabs(g1);
+	    scale = dmax(r__1,r__2);
 	    if (scale >= safmx2) {
 		goto L10;
 	    }
 /* Computing 2nd power */
-	    d__1 = f1;
+	    r__1 = f1;
 /* Computing 2nd power */
-	    d__2 = g1;
-	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
+	    r__2 = g1;
+	    *r__ = sqrt(r__1 * r__1 + r__2 * r__2);
 	    *cs = f1 / *r__;
 	    *sn = g1 / *r__;
 	    i__1 = count;
@@ -19054,16 +79017,16 @@ L30:
 	    f1 *= safmx2;
 	    g1 *= safmx2;
 /* Computing MAX */
-	    d__1 = abs(f1), d__2 = abs(g1);
-	    scale = max(d__1,d__2);
+	    r__1 = dabs(f1), r__2 = dabs(g1);
+	    scale = dmax(r__1,r__2);
 	    if (scale <= safmn2) {
 		goto L30;
 	    }
 /* Computing 2nd power */
-	    d__1 = f1;
+	    r__1 = f1;
 /* Computing 2nd power */
-	    d__2 = g1;
-	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
+	    r__2 = g1;
+	    *r__ = sqrt(r__1 * r__1 + r__2 * r__2);
 	    *cs = f1 / *r__;
 	    *sn = g1 / *r__;
 	    i__1 = count;
@@ -19073,14 +79036,14 @@ L30:
 	    }
 	} else {
 /* Computing 2nd power */
-	    d__1 = f1;
+	    r__1 = f1;
 /* Computing 2nd power */
-	    d__2 = g1;
-	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
+	    r__2 = g1;
+	    *r__ = sqrt(r__1 * r__1 + r__2 * r__2);
 	    *cs = f1 / *r__;
 	    *sn = g1 / *r__;
 	}
-	if (abs(*f) > abs(*g) && *cs < 0.) {
+	if (dabs(*f) > dabs(*g) && *cs < 0.f) {
 	    *cs = -(*cs);
 	    *sn = -(*sn);
 	    *r__ = -(*r__);
@@ -19088,21 +79051,21 @@ L30:
     }
     return 0;
 
-/*     End of DLARTG */
+/*     End of SLARTG */
 
-} /* dlartg_ */
+} /* slartg_ */
 
-/* Subroutine */ int dlas2_(doublereal *f, doublereal *g, doublereal *h__,
-	doublereal *ssmin, doublereal *ssmax)
+/* Subroutine */ int slas2_(real *f, real *g, real *h__, real *ssmin, real *
+	ssmax)
 {
     /* System generated locals */
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static doublereal c__, fa, ga, ha, as, at, au, fhmn, fhmx;
+    static real c__, fa, ga, ha, as, at, au, fhmn, fhmx;
 
 
 /*
@@ -19115,7 +79078,7 @@ L30:
     Purpose
     =======
 
-    DLAS2  computes the singular values of the 2-by-2 matrix
+    SLAS2  computes the singular values of the 2-by-2 matrix
        [  F   G  ]
        [  0   H  ].
     On return, SSMIN is the smaller singular value and SSMAX is the
@@ -19124,19 +79087,19 @@ L30:
     Arguments
     =========
 
-    F       (input) DOUBLE PRECISION
+    F       (input) REAL
             The (1,1) element of the 2-by-2 matrix.
 
-    G       (input) DOUBLE PRECISION
+    G       (input) REAL
             The (1,2) element of the 2-by-2 matrix.
 
-    H       (input) DOUBLE PRECISION
+    H       (input) REAL
             The (2,2) element of the 2-by-2 matrix.
 
-    SSMIN   (output) DOUBLE PRECISION
+    SSMIN   (output) REAL
             The smaller singular value.
 
-    SSMAX   (output) DOUBLE PRECISION
+    SSMAX   (output) REAL
             The larger singular value.
 
     Further Details
@@ -19162,33 +79125,33 @@ L30:
 */
 
 
-    fa = abs(*f);
-    ga = abs(*g);
-    ha = abs(*h__);
-    fhmn = min(fa,ha);
-    fhmx = max(fa,ha);
-    if (fhmn == 0.) {
-	*ssmin = 0.;
-	if (fhmx == 0.) {
+    fa = dabs(*f);
+    ga = dabs(*g);
+    ha = dabs(*h__);
+    fhmn = dmin(fa,ha);
+    fhmx = dmax(fa,ha);
+    if (fhmn == 0.f) {
+	*ssmin = 0.f;
+	if (fhmx == 0.f) {
 	    *ssmax = ga;
 	} else {
 /* Computing 2nd power */
-	    d__1 = min(fhmx,ga) / max(fhmx,ga);
-	    *ssmax = max(fhmx,ga) * sqrt(d__1 * d__1 + 1.);
+	    r__1 = dmin(fhmx,ga) / dmax(fhmx,ga);
+	    *ssmax = dmax(fhmx,ga) * sqrt(r__1 * r__1 + 1.f);
 	}
     } else {
 	if (ga < fhmx) {
-	    as = fhmn / fhmx + 1.;
+	    as = fhmn / fhmx + 1.f;
 	    at = (fhmx - fhmn) / fhmx;
 /* Computing 2nd power */
-	    d__1 = ga / fhmx;
-	    au = d__1 * d__1;
-	    c__ = 2. / (sqrt(as * as + au) + sqrt(at * at + au));
+	    r__1 = ga / fhmx;
+	    au = r__1 * r__1;
+	    c__ = 2.f / (sqrt(as * as + au) + sqrt(at * at + au));
 	    *ssmin = fhmn * c__;
 	    *ssmax = fhmx / c__;
 	} else {
 	    au = fhmx / ga;
-	    if (au == 0.) {
+	    if (au == 0.f) {
 
 /*
                 Avoid possible harmful underflow if exponent range
@@ -19199,13 +79162,14 @@ L30:
 		*ssmin = fhmn * fhmx / ga;
 		*ssmax = ga;
 	    } else {
-		as = fhmn / fhmx + 1.;
+		as = fhmn / fhmx + 1.f;
 		at = (fhmx - fhmn) / fhmx;
 /* Computing 2nd power */
-		d__1 = as * au;
+		r__1 = as * au;
 /* Computing 2nd power */
-		d__2 = at * au;
-		c__ = 1. / (sqrt(d__1 * d__1 + 1.) + sqrt(d__2 * d__2 + 1.));
+		r__2 = at * au;
+		c__ = 1.f / (sqrt(r__1 * r__1 + 1.f) + sqrt(r__2 * r__2 + 1.f)
+			);
 		*ssmin = fhmn * c__ * au;
 		*ssmin += *ssmin;
 		*ssmax = ga / (c__ + c__);
@@ -19214,29 +79178,29 @@ L30:
     }
     return 0;
 
-/*     End of DLAS2 */
+/*     End of SLAS2 */
 
-} /* dlas2_ */
+} /* slas2_ */
 
-/* Subroutine */ int dlascl_(char *type__, integer *kl, integer *ku,
-	doublereal *cfrom, doublereal *cto, integer *m, integer *n,
-	doublereal *a, integer *lda, integer *info)
+/* Subroutine */ int slascl_(char *type__, integer *kl, integer *ku, real *
+	cfrom, real *cto, integer *m, integer *n, real *a, integer *lda,
+	integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
 
     /* Local variables */
     static integer i__, j, k1, k2, k3, k4;
-    static doublereal mul, cto1;
+    static real mul, cto1;
     static logical done;
-    static doublereal ctoc;
+    static real ctoc;
     extern logical lsame_(char *, char *);
     static integer itype;
-    static doublereal cfrom1;
-
-    static doublereal cfromc;
+    static real cfrom1;
+    extern doublereal slamch_(char *);
+    static real cfromc;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static doublereal bignum, smlnum;
+    static real bignum, smlnum;
 
 
 /*
@@ -19249,7 +79213,7 @@ L30:
     Purpose
     =======
 
-    DLASCL multiplies the M by N real matrix A by the real scalar
+    SLASCL multiplies the M by N real matrix A by the real scalar
     CTO/CFROM.  This is done without over/underflow as long as the final
     result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
     A may be full, upper triangular, lower triangular, upper Hessenberg,
@@ -19281,8 +79245,8 @@ L30:
             The upper bandwidth of A.  Referenced only if TYPE = 'B',
             'Q' or 'Z'.
 
-    CFROM   (input) DOUBLE PRECISION
-    CTO     (input) DOUBLE PRECISION
+    CFROM   (input) REAL
+    CTO     (input) REAL
             The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
             without over/underflow if the final result CTO*A(I,J)/CFROM
             can be represented without over/underflow.  CFROM must be
@@ -19294,7 +79258,7 @@ L30:
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,M)
+    A       (input/output) REAL array, dimension (LDA,M)
             The matrix to be multiplied by CTO/CFROM.  See TYPE for the
             storage type.
 
@@ -19339,7 +79303,7 @@ L30:
 
     if (itype == -1) {
 	*info = -1;
-    } else if (*cfrom == 0.) {
+    } else if (*cfrom == 0.f) {
 	*info = -4;
     } else if (*m < 0) {
 	*info = -6;
@@ -19367,7 +79331,7 @@ L30:
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLASCL", &i__1);
+	xerbla_("SLASCL", &i__1);
 	return 0;
     }
 
@@ -19379,8 +79343,8 @@ L30:
 
 /*     Get machine parameters */
 
-    smlnum = SAFEMINIMUM;
-    bignum = 1. / smlnum;
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
 
     cfromc = *cfrom;
     ctoc = *cto;
@@ -19388,11 +79352,11 @@ L30:
 L10:
     cfrom1 = cfromc * smlnum;
     cto1 = ctoc / bignum;
-    if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
+    if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) {
 	mul = smlnum;
 	done = FALSE_;
 	cfromc = cfrom1;
-    } else if (abs(cto1) > abs(cfromc)) {
+    } else if (dabs(cto1) > dabs(cfromc)) {
 	mul = bignum;
 	done = FALSE_;
 	ctoc = cto1;
@@ -19525,14 +79489,13 @@ L10:
 
     return 0;
 
-/*     End of DLASCL */
+/*     End of SLASCL */
 
-} /* dlascl_ */
+} /* slascl_ */
 
-/* Subroutine */ int dlasd0_(integer *n, integer *sqre, doublereal *d__,
-	doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *
-	ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer *
-	info)
+/* Subroutine */ int slasd0_(integer *n, integer *sqre, real *d__, real *e,
+	real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz,
+	integer *iwork, real *work, integer *info)
 {
     /* System generated locals */
     integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
@@ -19543,19 +79506,17 @@ L10:
     /* Local variables */
     static integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf,
 	    iwk, lvl, ndb1, nlp1, nrp1;
-    static doublereal beta;
+    static real beta;
     static integer idxq, nlvl;
-    static doublereal alpha;
+    static real alpha;
     static integer inode, ndiml, idxqc, ndimr, itemp, sqrei;
-    extern /* Subroutine */ int dlasd1_(integer *, integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
-	     doublereal *, integer *, integer *, integer *, doublereal *,
-	    integer *), dlasdq_(char *, integer *, integer *, integer *,
-	    integer *, integer *, doublereal *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlasdt_(integer *, integer *,
-	    integer *, integer *, integer *, integer *, integer *), xerbla_(
-	    char *, integer *);
+    extern /* Subroutine */ int slasd1_(integer *, integer *, integer *, real
+	    *, real *, real *, real *, integer *, real *, integer *, integer *
+	    , integer *, real *, integer *), xerbla_(char *, integer *), slasdq_(char *, integer *, integer *, integer *, integer
+	    *, integer *, real *, real *, real *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *), slasdt_(integer *
+	    , integer *, integer *, integer *, integer *, integer *, integer *
+	    );
 
 
 /*
@@ -19568,13 +79529,13 @@ L10:
     Purpose
     =======
 
-    Using a divide and conquer approach, DLASD0 computes the singular
+    Using a divide and conquer approach, SLASD0 computes the singular
     value decomposition (SVD) of a real upper bidiagonal N-by-M
     matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
     The algorithm computes orthogonal matrices U and VT such that
     B = U * S * VT. The singular values S are overwritten on D.
 
-    A related subroutine, DLASDA, computes only the singular values,
+    A related subroutine, SLASDA, computes only the singular values,
     and optionally, the singular vectors in compact form.
 
     Arguments
@@ -19589,22 +79550,22 @@ L10:
            = 0: The bidiagonal matrix has column dimension M = N;
            = 1: The bidiagonal matrix has column dimension M = N+1;
 
-    D      (input/output) DOUBLE PRECISION array, dimension (N)
+    D      (input/output) REAL array, dimension (N)
            On entry D contains the main diagonal of the bidiagonal
            matrix.
            On exit D, if INFO = 0, contains its singular values.
 
-    E      (input) DOUBLE PRECISION array, dimension (M-1)
+    E      (input) REAL array, dimension (M-1)
            Contains the subdiagonal entries of the bidiagonal matrix.
            On exit, E has been destroyed.
 
-    U      (output) DOUBLE PRECISION array, dimension at least (LDQ, N)
+    U      (output) REAL array, dimension at least (LDQ, N)
            On exit, U contains the left singular vectors.
 
     LDU    (input) INTEGER
            On entry, leading dimension of U.
 
-    VT     (output) DOUBLE PRECISION array, dimension at least (LDVT, M)
+    VT     (output) REAL array, dimension at least (LDVT, M)
            On exit, VT' contains the right singular vectors.
 
     LDVT   (input) INTEGER
@@ -19617,7 +79578,7 @@ L10:
     IWORK  INTEGER work array.
            Dimension must be at least (8 * N)
 
-    WORK   DOUBLE PRECISION work array.
+    WORK   REAL work array.
            Dimension must be at least (3 * M**2 + 2 * M)
 
     INFO   (output) INTEGER
@@ -19670,14 +79631,14 @@ L10:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLASD0", &i__1);
+	xerbla_("SLASD0", &i__1);
 	return 0;
     }
 
-/*     If the input matrix is too small, call DLASDQ to find the SVD. */
+/*     If the input matrix is too small, call SLASDQ to find the SVD. */
 
     if (*n <= *smlsiz) {
-	dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset],
+	slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset],
 		ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info);
 	return 0;
     }
@@ -19689,12 +79650,12 @@ L10:
     ndimr = ndiml + *n;
     idxq = ndimr + *n;
     iwk = idxq + *n;
-    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+    slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
 	    smlsiz);
 
 /*
        For the nodes on bottom level of the tree, solve
-       their subproblems by DLASDQ.
+       their subproblems by SLASDQ.
 */
 
     ndb1 = (nd + 1) / 2;
@@ -19719,7 +79680,7 @@ L10:
 	nlf = ic - nl;
 	nrf = ic + 1;
 	sqrei = 1;
-	dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[
+	slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[
 		nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[
 		nlf + nlf * u_dim1], ldu, &work[1], info);
 	if (*info != 0) {
@@ -19737,7 +79698,7 @@ L10:
 	    sqrei = 1;
 	}
 	nrp1 = nr + sqrei;
-	dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[
+	slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[
 		nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[
 		nrf + nrf * u_dim1], ldu, &work[1], info);
 	if (*info != 0) {
@@ -19784,7 +79745,7 @@ L10:
 	    idxqc = idxq + nlf - 1;
 	    alpha = d__[ic];
 	    beta = e[ic];
-	    dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf *
+	    slasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf *
 		     u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[
 		    idxqc], &iwork[iwk], &work[1], info);
 	    if (*info != 0) {
@@ -19797,38 +79758,36 @@ L10:
 
     return 0;
 
-/*     End of DLASD0 */
+/*     End of SLASD0 */
 
-} /* dlasd0_ */
+} /* slasd0_ */
 
-/* Subroutine */ int dlasd1_(integer *nl, integer *nr, integer *sqre,
-	doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u,
-	integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer *
-	iwork, doublereal *work, integer *info)
+/* Subroutine */ int slasd1_(integer *nl, integer *nr, integer *sqre, real *
+	d__, real *alpha, real *beta, real *u, integer *ldu, real *vt,
+	integer *ldvt, integer *idxq, integer *iwork, real *work, integer *
+	info)
 {
     /* System generated locals */
     integer u_dim1, u_offset, vt_dim1, vt_offset, i__1;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Local variables */
     static integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2,
 	    idxc, idxp, ldvt2;
-    extern /* Subroutine */ int dlasd2_(integer *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, integer *,
-	    integer *, integer *, integer *, integer *, integer *), dlasd3_(
-	    integer *, integer *, integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, integer *, integer *, doublereal *, integer *),
-	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *,
-	    integer *, integer *, doublereal *, integer *, integer *),
-	     dlamrg_(integer *, integer *, doublereal *, integer *, integer *,
-	     integer *);
+    extern /* Subroutine */ int slasd2_(integer *, integer *, integer *,
+	    integer *, real *, real *, real *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *, real *, integer *,
+	    integer *, integer *, integer *, integer *, integer *, integer *),
+	     slasd3_(integer *, integer *, integer *, integer *, real *, real
+	    *, integer *, real *, real *, integer *, real *, integer *, real *
+	    , integer *, real *, integer *, integer *, integer *, real *,
+	    integer *);
     static integer isigma;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static doublereal orgnrm;
+    extern /* Subroutine */ int xerbla_(char *, integer *), slascl_(
+	    char *, integer *, integer *, real *, real *, integer *, integer *
+	    , real *, integer *, integer *), slamrg_(integer *,
+	    integer *, real *, integer *, integer *, integer *);
+    static real orgnrm;
     static integer coltyp;
 
 
@@ -19842,13 +79801,13 @@ L10:
     Purpose
     =======
 
-    DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
-    where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0.
+    SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
+    where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0.
 
-    A related subroutine DLASD7 handles the case in which the singular
+    A related subroutine SLASD7 handles the case in which the singular
     values (and the singular vectors in factored form) are desired.
 
-    DLASD1 computes the SVD as follows:
+    SLASD1 computes the SVD as follows:
 
                   ( D1(in)  0    0     0 )
       B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
@@ -19868,12 +79827,12 @@ L10:
        when there are multiple singular values or when there are zeros in
        the Z vector.  For each such occurence the dimension of the
        secular equation problem is reduced by one.  This stage is
-       performed by the routine DLASD2.
+       performed by the routine SLASD2.
 
        The second stage consists of calculating the updated
        singular values. This is done by finding the square roots of the
-       roots of the secular equation via the routine DLASD4 (as called
-       by DLASD3). This routine also calculates the singular vectors of
+       roots of the secular equation via the routine SLASD4 (as called
+       by SLASD3). This routine also calculates the singular vectors of
        the current problem.
 
        The final stage consists of computing the updated singular vectors
@@ -19897,21 +79856,21 @@ L10:
            The bidiagonal matrix has row dimension N = NL + NR + 1,
            and column dimension M = N + SQRE.
 
-    D      (input/output) DOUBLE PRECISION array,
+    D      (input/output) REAL array,
                           dimension (N = NL+NR+1).
            On entry D(1:NL,1:NL) contains the singular values of the
            upper block; and D(NL+2:N) contains the singular values of
            the lower block. On exit D(1:N) contains the singular values
            of the modified matrix.
 
-    ALPHA  (input) DOUBLE PRECISION
+    ALPHA  (input) REAL
            Contains the diagonal element associated with the added row.
 
-    BETA   (input) DOUBLE PRECISION
+    BETA   (input) REAL
            Contains the off-diagonal element associated with the added
            row.
 
-    U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
+    U      (input/output) REAL array, dimension(LDU,N)
            On entry U(1:NL, 1:NL) contains the left singular vectors of
            the upper block; U(NL+2:N, NL+2:N) contains the left singular
            vectors of the lower block. On exit U contains the left
@@ -19920,7 +79879,7 @@ L10:
     LDU    (input) INTEGER
            The leading dimension of the array U.  LDU >= max( 1, N ).
 
-    VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
+    VT     (input/output) REAL array, dimension(LDVT,M)
            where M = N + SQRE.
            On entry VT(1:NL+1, 1:NL+1)' contains the right singular
            vectors of the upper block; VT(NL+2:M, NL+2:M)' contains
@@ -19938,7 +79897,7 @@ L10:
 
     IWORK  (workspace) INTEGER array, dimension( 4 * N )
 
-    WORK   (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M )
+    WORK   (workspace) REAL array, dimension( 3*M**2 + 2*M )
 
     INFO   (output) INTEGER
             = 0:  successful exit.
@@ -19982,7 +79941,7 @@ L10:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLASD1", &i__1);
+	xerbla_("SLASD1", &i__1);
 	return 0;
     }
 
@@ -19992,7 +79951,7 @@ L10:
 /*
        The following values are for bookkeeping purposes only.  They are
        integer pointers which indicate the portion of the workspace
-       used by a particular array in DLASD2 and DLASD3.
+       used by a particular array in SLASD2 and SLASD3.
 */
 
     ldu2 = n;
@@ -20014,23 +79973,23 @@ L10:
 
    Computing MAX
 */
-    d__1 = abs(*alpha), d__2 = abs(*beta);
-    orgnrm = max(d__1,d__2);
-    d__[*nl + 1] = 0.;
+    r__1 = dabs(*alpha), r__2 = dabs(*beta);
+    orgnrm = dmax(r__1,r__2);
+    d__[*nl + 1] = 0.f;
     i__1 = n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((d__1 = d__[i__], abs(d__1)) > orgnrm) {
-	    orgnrm = (d__1 = d__[i__], abs(d__1));
+	if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) {
+	    orgnrm = (r__1 = d__[i__], dabs(r__1));
 	}
 /* L10: */
     }
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &n, &c__1, &d__[1], &n, info);
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &n, &c__1, &d__[1], &n, info);
     *alpha /= orgnrm;
     *beta /= orgnrm;
 
 /*     Deflate singular values. */
 
-    dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset],
+    slasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset],
 	    ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, &
 	    work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], &
 	    idxq[1], &iwork[coltyp], info);
@@ -20038,7 +79997,7 @@ L10:
 /*     Solve Secular Equation and update singular vectors. */
 
     ldq = k;
-    dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[
+    slasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[
 	    u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[
 	    ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info);
     if (*info != 0) {
@@ -20047,54 +80006,52 @@ L10:
 
 /*     Unscale. */
 
-    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &n, &c__1, &d__[1], &n, info);
+    slascl_("G", &c__0, &c__0, &c_b871, &orgnrm, &n, &c__1, &d__[1], &n, info);
 
 /*     Prepare the IDXQ sorting permutation. */
 
     n1 = k;
     n2 = n - k;
-    dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
+    slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
 
     return 0;
 
-/*     End of DLASD1 */
+/*     End of SLASD1 */
 
-} /* dlasd1_ */
+} /* slasd1_ */
 
-/* Subroutine */ int dlasd2_(integer *nl, integer *nr, integer *sqre, integer
-	*k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal *
-	beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt,
-	doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2,
-	integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *
-	idxq, integer *coltyp, integer *info)
+/* Subroutine */ int slasd2_(integer *nl, integer *nr, integer *sqre, integer
+	*k, real *d__, real *z__, real *alpha, real *beta, real *u, integer *
+	ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2,
+	real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc,
+	 integer *idxq, integer *coltyp, integer *info)
 {
     /* System generated locals */
     integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset,
 	    vt2_dim1, vt2_offset, i__1;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Local variables */
-    static doublereal c__;
+    static real c__;
     static integer i__, j, m, n;
-    static doublereal s;
+    static real s;
     static integer k2;
-    static doublereal z1;
+    static real z1;
     static integer ct, jp;
-    static doublereal eps, tau, tol;
-    static integer psm[4], nlp1, nlp2, idxi, idxj;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static integer ctot[4], idxjp;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer jprev;
-
-    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), dlacpy_(char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *), xerbla_(char *,
+    static real eps, tau, tol;
+    static integer psm[4], nlp1, nlp2, idxi, idxj, ctot[4];
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *);
+    static integer idxjp, jprev;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
 	    integer *);
-    static doublereal hlftol;
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
+	    integer *, integer *, real *, integer *, integer *, integer *);
+    static real hlftol;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
+	    integer *, real *, integer *), slaset_(char *, integer *,
+	    integer *, real *, real *, real *, integer *);
 
 
 /*
@@ -20107,14 +80064,14 @@ L10:
     Purpose
     =======
 
-    DLASD2 merges the two sets of singular values together into a single
+    SLASD2 merges the two sets of singular values together into a single
     sorted set.  Then it tries to deflate the size of the problem.
     There are two ways in which deflation can occur:  when two or more
     singular values are close together or if there is a tiny entry in the
     Z vector.  For each such occurrence the order of the related secular
     equation problem is reduced by one.
 
-    DLASD2 is called from DLASD1.
+    SLASD2 is called from SLASD1.
 
     Arguments
     =========
@@ -20136,20 +80093,20 @@ L10:
            Contains the dimension of the non-deflated matrix,
            This is the order of the related secular equation. 1 <= K <=N.
 
-    D      (input/output) DOUBLE PRECISION array, dimension(N)
+    D      (input/output) REAL array, dimension(N)
            On entry D contains the singular values of the two submatrices
            to be combined.  On exit D contains the trailing (N-K) updated
            singular values (those which were deflated) sorted into
            increasing order.
 
-    ALPHA  (input) DOUBLE PRECISION
+    ALPHA  (input) REAL
            Contains the diagonal element associated with the added row.
 
-    BETA   (input) DOUBLE PRECISION
+    BETA   (input) REAL
            Contains the off-diagonal element associated with the added
            row.
 
-    U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
+    U      (input/output) REAL array, dimension(LDU,N)
            On entry U contains the left singular vectors of two
            submatrices in the two square blocks with corners at (1,1),
            (NL, NL), and (NL+2, NL+2), (N,N).
@@ -20159,17 +80116,17 @@ L10:
     LDU    (input) INTEGER
            The leading dimension of the array U.  LDU >= N.
 
-    Z      (output) DOUBLE PRECISION array, dimension(N)
+    Z      (output) REAL array, dimension(N)
            On exit Z contains the updating row vector in the secular
            equation.
 
-    DSIGMA (output) DOUBLE PRECISION array, dimension (N)
+    DSIGMA (output) REAL array, dimension (N)
            Contains a copy of the diagonal elements (K-1 singular values
            and one zero) in the secular equation.
 
-    U2     (output) DOUBLE PRECISION array, dimension(LDU2,N)
+    U2     (output) REAL array, dimension(LDU2,N)
            Contains a copy of the first K-1 left singular vectors which
-           will be used by DLASD3 in a matrix multiply (DGEMM) to solve
+           will be used by SLASD3 in a matrix multiply (SGEMM) to solve
            for the new left singular vectors. U2 is arranged into four
            blocks. The first block contains a column with 1 at NL+1 and
            zero everywhere else; the second block contains non-zero
@@ -20179,7 +80136,7 @@ L10:
     LDU2   (input) INTEGER
            The leading dimension of the array U2.  LDU2 >= N.
 
-    VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
+    VT     (input/output) REAL array, dimension(LDVT,M)
            On entry VT' contains the right singular vectors of two
            submatrices in the two square blocks with corners at (1,1),
            (NL+1, NL+1), and (NL+2, NL+2), (M,M).
@@ -20191,9 +80148,9 @@ L10:
     LDVT   (input) INTEGER
            The leading dimension of the array VT.  LDVT >= M.
 
-    VT2    (output) DOUBLE PRECISION array, dimension(LDVT2,N)
+    VT2    (output) REAL array, dimension(LDVT2,N)
            VT2' contains a copy of the first K right singular vectors
-           which will be used by DLASD3 in a matrix multiply (DGEMM) to
+           which will be used by SLASD3 in a matrix multiply (SGEMM) to
            solve for the new right singular vectors. VT2 is arranged into
            three blocks. The first block contains a row that corresponds
            to the special 0 diagonal element in SIGMA; the second block
@@ -20303,7 +80260,7 @@ L10:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLASD2", &i__1);
+	xerbla_("SLASD2", &i__1);
 	return 0;
     }
 
@@ -20366,7 +80323,7 @@ L10:
 /* L60: */
     }
 
-    dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
+    slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
 
     i__1 = n;
     for (i__ = 2; i__ <= i__1; ++i__) {
@@ -20379,13 +80336,13 @@ L10:
 
 /*     Calculate the allowable deflation tolerance */
 
-    eps = EPSILON;
+    eps = slamch_("Epsilon");
 /* Computing MAX */
-    d__1 = abs(*alpha), d__2 = abs(*beta);
-    tol = max(d__1,d__2);
+    r__1 = dabs(*alpha), r__2 = dabs(*beta);
+    tol = dmax(r__1,r__2);
 /* Computing MAX */
-    d__2 = (d__1 = d__[n], abs(d__1));
-    tol = eps * 8. * max(d__2,tol);
+    r__2 = (r__1 = d__[n], dabs(r__1));
+    tol = eps * 8.f * dmax(r__2,tol);
 
 /*
        There are 2 kinds of deflation -- first a value in the z-vector
@@ -20412,7 +80369,7 @@ L10:
     k2 = n + 1;
     i__1 = n;
     for (j = 2; j <= i__1; ++j) {
-	if ((d__1 = z__[j], abs(d__1)) <= tol) {
+	if ((r__1 = z__[j], dabs(r__1)) <= tol) {
 
 /*           Deflate due to small z component. */
 
@@ -20435,7 +80392,7 @@ L100:
     if (j > n) {
 	goto L110;
     }
-    if ((d__1 = z__[j], abs(d__1)) <= tol) {
+    if ((r__1 = z__[j], dabs(r__1)) <= tol) {
 
 /*        Deflate due to small z component. */
 
@@ -20446,7 +80403,7 @@ L100:
 
 /*        Check if singular values are close enough to allow deflation. */
 
-	if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) {
+	if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) {
 
 /*           Deflation is possible. */
 
@@ -20458,11 +80415,11 @@ L100:
              destructive underflow.
 */
 
-	    tau = dlapy2_(&c__, &s);
+	    tau = slapy2_(&c__, &s);
 	    c__ /= tau;
 	    s = -s / tau;
 	    z__[j] = tau;
-	    z__[jprev] = 0.;
+	    z__[jprev] = 0.f;
 
 /*
              Apply back the Givens rotation to the left and right
@@ -20477,9 +80434,9 @@ L100:
 	    if (idxj <= nlp1) {
 		--idxj;
 	    }
-	    drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &
+	    srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &
 		    c__1, &c__, &s);
-	    drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &
+	    srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &
 		    c__, &s);
 	    if (coltyp[j] != coltyp[jprev]) {
 		coltyp[j] = 3;
@@ -20566,30 +80523,30 @@ L120:
 	if (idxj <= nlp1) {
 	    --idxj;
 	}
-	dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
-	dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
+	scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
+	scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
 /* L160: */
     }
 
 /*     Determine DSIGMA(1), DSIGMA(2) and Z(1) */
 
-    dsigma[1] = 0.;
-    hlftol = tol / 2.;
-    if (abs(dsigma[2]) <= hlftol) {
+    dsigma[1] = 0.f;
+    hlftol = tol / 2.f;
+    if (dabs(dsigma[2]) <= hlftol) {
 	dsigma[2] = hlftol;
     }
     if (m > n) {
-	z__[1] = dlapy2_(&z1, &z__[m]);
+	z__[1] = slapy2_(&z1, &z__[m]);
 	if (z__[1] <= tol) {
-	    c__ = 1.;
-	    s = 0.;
+	    c__ = 1.f;
+	    s = 0.f;
 	    z__[1] = tol;
 	} else {
 	    c__ = z1 / z__[1];
 	    s = z__[m] / z__[1];
 	}
     } else {
-	if (abs(z1) <= tol) {
+	if (dabs(z1) <= tol) {
 	    z__[1] = tol;
 	} else {
 	    z__[1] = z1;
@@ -20599,15 +80556,15 @@ L120:
 /*     Move the rest of the updating row to Z. */
 
     i__1 = *k - 1;
-    dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);
+    scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);
 
 /*
        Determine the first column of U2, the first row of VT2 and the
        last row of VT.
 */
 
-    dlaset_("A", &n, &c__1, &c_b29, &c_b29, &u2[u2_offset], ldu2);
-    u2[nlp1 + u2_dim1] = 1.;
+    slaset_("A", &n, &c__1, &c_b1101, &c_b1101, &u2[u2_offset], ldu2);
+    u2[nlp1 + u2_dim1] = 1.f;
     if (m > n) {
 	i__1 = nlp1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
@@ -20622,10 +80579,10 @@ L120:
 /* L180: */
 	}
     } else {
-	dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
+	scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
     }
     if (m > n) {
-	dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
+	scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
     }
 
 /*
@@ -20635,16 +80592,16 @@ L120:
 
     if (n > *k) {
 	i__1 = n - *k;
-	dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
 	i__1 = n - *k;
-	dlacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1)
+	slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1)
 		 * u_dim1 + 1], ldu);
 	i__1 = n - *k;
-	dlacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 +
+	slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 +
 		vt_dim1], ldvt);
     }
 
-/*     Copy CTOT into COLTYP for referencing in DLASD3. */
+/*     Copy CTOT into COLTYP for referencing in SLASD3. */
 
     for (j = 1; j <= 4; ++j) {
 	coltyp[j] = ctot[j - 1];
@@ -20653,45 +80610,43 @@ L120:
 
     return 0;
 
-/*     End of DLASD2 */
+/*     End of SLASD2 */
 
-} /* dlasd2_ */
+} /* slasd2_ */
 
-/* Subroutine */ int dlasd3_(integer *nl, integer *nr, integer *sqre, integer
-	*k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma,
-	doublereal *u, integer *ldu, doublereal *u2, integer *ldu2,
-	doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2,
-	integer *idxc, integer *ctot, doublereal *z__, integer *info)
+/* Subroutine */ int slasd3_(integer *nl, integer *nr, integer *sqre, integer
+	*k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer *
+	ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2,
+	integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer *
+	info)
 {
     /* System generated locals */
     integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1,
 	    vt_offset, vt2_dim1, vt2_offset, i__1, i__2;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+    double sqrt(doublereal), r_sign(real *, real *);
 
     /* Local variables */
     static integer i__, j, m, n, jc;
-    static doublereal rho;
+    static real rho;
     static integer nlp1, nlp2, nrp1;
-    static doublereal temp;
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
+    static real temp;
+    extern doublereal snrm2_(integer *, real *, integer *);
     static integer ctemp;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
     static integer ktemp;
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, integer *), dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dlacpy_(char *, integer *, integer
-	    *, doublereal *, integer *, doublereal *, integer *),
-	    xerbla_(char *, integer *);
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    extern doublereal slamc3_(real *, real *);
+    extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *,
+	    real *, real *, real *, real *, integer *), xerbla_(char *,
+	    integer *), slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *,
+	    real *, integer *);
 
 
 /*
@@ -20704,9 +80659,9 @@ L120:
     Purpose
     =======
 
-    DLASD3 finds all the square roots of the roots of the secular
+    SLASD3 finds all the square roots of the roots of the secular
     equation, as defined by the values in D and Z.  It makes the
-    appropriate calls to DLASD4 and then updates the singular
+    appropriate calls to SLASD4 and then updates the singular
     vectors by matrix multiplication.
 
     This code makes very mild assumptions about floating point
@@ -20716,7 +80671,7 @@ L120:
     It could conceivably fail on hexadecimal or decimal machines
     without guard digits, but we know of none.
 
-    DLASD3 is called from DLASD1.
+    SLASD3 is called from SLASD1.
 
     Arguments
     =========
@@ -20737,43 +80692,43 @@ L120:
     K      (input) INTEGER
            The size of the secular equation, 1 =< K = < N.
 
-    D      (output) DOUBLE PRECISION array, dimension(K)
+    D      (output) REAL array, dimension(K)
            On exit the square roots of the roots of the secular equation,
            in ascending order.
 
-    Q      (workspace) DOUBLE PRECISION array,
+    Q      (workspace) REAL array,
                        dimension at least (LDQ,K).
 
     LDQ    (input) INTEGER
            The leading dimension of the array Q.  LDQ >= K.
 
-    DSIGMA (input) DOUBLE PRECISION array, dimension(K)
+    DSIGMA (input) REAL array, dimension(K)
            The first K elements of this array contain the old roots
            of the deflated updating problem.  These are the poles
            of the secular equation.
 
-    U      (input) DOUBLE PRECISION array, dimension (LDU, N)
+    U      (input) REAL array, dimension (LDU, N)
            The last N - K columns of this matrix contain the deflated
            left singular vectors.
 
     LDU    (input) INTEGER
            The leading dimension of the array U.  LDU >= N.
 
-    U2     (input) DOUBLE PRECISION array, dimension (LDU2, N)
+    U2     (input) REAL array, dimension (LDU2, N)
            The first K columns of this matrix contain the non-deflated
            left singular vectors for the split problem.
 
     LDU2   (input) INTEGER
            The leading dimension of the array U2.  LDU2 >= N.
 
-    VT     (input) DOUBLE PRECISION array, dimension (LDVT, M)
+    VT     (input) REAL array, dimension (LDVT, M)
            The last M - K columns of VT' contain the deflated
            right singular vectors.
 
     LDVT   (input) INTEGER
            The leading dimension of the array VT.  LDVT >= N.
 
-    VT2    (input) DOUBLE PRECISION array, dimension (LDVT2, N)
+    VT2    (input) REAL array, dimension (LDVT2, N)
            The first K columns of VT2' contain the non-deflated
            right singular vectors for the split problem.
 
@@ -20788,7 +80743,7 @@ L120:
            and the third is dense. The first column of U and the row of
            VT are treated separately, however.
 
-           The rows of the singular vectors found by DLASD4
+           The rows of the singular vectors found by SLASD4
            must be likewise permuted before the matrix multiplies can
            take place.
 
@@ -20797,7 +80752,7 @@ L120:
            in U (or rows in VT), as described in IDXC. The fourth column
            type is any column which has been deflated.
 
-    Z      (input) DOUBLE PRECISION array, dimension (K)
+    Z      (input) REAL array, dimension (K)
            The first K elements of this array contain the components
            of the deflation-adjusted updating row vector.
 
@@ -20872,17 +80827,17 @@ L120:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLASD3", &i__1);
+	xerbla_("SLASD3", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
     if (*k == 1) {
-	d__[1] = abs(z__[1]);
-	dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
-	if (z__[1] > 0.) {
-	    dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
+	d__[1] = dabs(z__[1]);
+	scopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
+	if (z__[1] > 0.f) {
+	    scopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
 	} else {
 	    i__1 = n;
 	    for (i__ = 1; i__ <= i__1; ++i__) {
@@ -20914,25 +80869,25 @@ L120:
 
     i__1 = *k;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
+	dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
 /* L20: */
     }
 
 /*     Keep a copy of Z. */
 
-    dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);
+    scopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);
 
 /*     Normalize Z. */
 
-    rho = dnrm2_(k, &z__[1], &c__1);
-    dlascl_("G", &c__0, &c__0, &rho, &c_b15, k, &c__1, &z__[1], k, info);
+    rho = snrm2_(k, &z__[1], &c__1);
+    slascl_("G", &c__0, &c__0, &rho, &c_b871, k, &c__1, &z__[1], k, info);
     rho *= rho;
 
 /*     Find the new singular values. */
 
     i__1 = *k;
     for (j = 1; j <= i__1; ++j) {
-	dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j],
+	slasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j],
 		 &vt[j * vt_dim1 + 1], info);
 
 /*        If the zero finder fails, the computation is terminated. */
@@ -20960,8 +80915,8 @@ L120:
 		    i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
 /* L50: */
 	}
-	d__2 = sqrt((d__1 = z__[i__], abs(d__1)));
-	z__[i__] = d_sign(&d__2, &q[i__ + q_dim1]);
+	r__2 = sqrt((r__1 = z__[i__], dabs(r__1)));
+	z__[i__] = r_sign(&r__2, &q[i__ + q_dim1]);
 /* L60: */
     }
 
@@ -20974,7 +80929,7 @@ L120:
     for (i__ = 1; i__ <= i__1; ++i__) {
 	vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ *
 		vt_dim1 + 1];
-	u[i__ * u_dim1 + 1] = -1.;
+	u[i__ * u_dim1 + 1] = -1.f;
 	i__2 = *k;
 	for (j = 2; j <= i__2; ++j) {
 	    vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__
@@ -20982,7 +80937,7 @@ L120:
 	    u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1];
 /* L70: */
 	}
-	temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1);
+	temp = snrm2_(k, &u[i__ * u_dim1 + 1], &c__1);
 	q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp;
 	i__2 = *k;
 	for (j = 2; j <= i__2; ++j) {
@@ -20996,38 +80951,38 @@ L120:
 /*     Update the left singular vector matrix. */
 
     if (*k == 2) {
-	dgemm_("N", "N", &n, k, k, &c_b15, &u2[u2_offset], ldu2, &q[q_offset],
-		 ldq, &c_b29, &u[u_offset], ldu);
+	sgemm_("N", "N", &n, k, k, &c_b871, &u2[u2_offset], ldu2, &q[q_offset]
+		, ldq, &c_b1101, &u[u_offset], ldu);
 	goto L100;
     }
     if (ctot[1] > 0) {
-	dgemm_("N", "N", nl, k, &ctot[1], &c_b15, &u2[(u2_dim1 << 1) + 1],
-		ldu2, &q[q_dim1 + 2], ldq, &c_b29, &u[u_dim1 + 1], ldu);
+	sgemm_("N", "N", nl, k, &ctot[1], &c_b871, &u2[(u2_dim1 << 1) + 1],
+		ldu2, &q[q_dim1 + 2], ldq, &c_b1101, &u[u_dim1 + 1], ldu);
 	if (ctot[3] > 0) {
 	    ktemp = ctot[1] + 2 + ctot[2];
-	    dgemm_("N", "N", nl, k, &ctot[3], &c_b15, &u2[ktemp * u2_dim1 + 1]
-		    , ldu2, &q[ktemp + q_dim1], ldq, &c_b15, &u[u_dim1 + 1],
-		    ldu);
+	    sgemm_("N", "N", nl, k, &ctot[3], &c_b871, &u2[ktemp * u2_dim1 +
+		    1], ldu2, &q[ktemp + q_dim1], ldq, &c_b871, &u[u_dim1 + 1]
+		    , ldu);
 	}
     } else if (ctot[3] > 0) {
 	ktemp = ctot[1] + 2 + ctot[2];
-	dgemm_("N", "N", nl, k, &ctot[3], &c_b15, &u2[ktemp * u2_dim1 + 1],
-		ldu2, &q[ktemp + q_dim1], ldq, &c_b29, &u[u_dim1 + 1], ldu);
+	sgemm_("N", "N", nl, k, &ctot[3], &c_b871, &u2[ktemp * u2_dim1 + 1],
+		ldu2, &q[ktemp + q_dim1], ldq, &c_b1101, &u[u_dim1 + 1], ldu);
     } else {
-	dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
+	slacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
     }
-    dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
+    scopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
     ktemp = ctot[1] + 2;
     ctemp = ctot[2] + ctot[3];
-    dgemm_("N", "N", nr, k, &ctemp, &c_b15, &u2[nlp2 + ktemp * u2_dim1], ldu2,
-	     &q[ktemp + q_dim1], ldq, &c_b29, &u[nlp2 + u_dim1], ldu);
+    sgemm_("N", "N", nr, k, &ctemp, &c_b871, &u2[nlp2 + ktemp * u2_dim1],
+	    ldu2, &q[ktemp + q_dim1], ldq, &c_b1101, &u[nlp2 + u_dim1], ldu);
 
 /*     Generate the right singular vectors. */
 
 L100:
     i__1 = *k;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1);
+	temp = snrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1);
 	q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp;
 	i__2 = *k;
 	for (j = 2; j <= i__2; ++j) {
@@ -21041,18 +80996,18 @@ L100:
 /*     Update the right singular vector matrix. */
 
     if (*k == 2) {
-	dgemm_("N", "N", k, &m, k, &c_b15, &q[q_offset], ldq, &vt2[vt2_offset]
-		, ldvt2, &c_b29, &vt[vt_offset], ldvt);
+	sgemm_("N", "N", k, &m, k, &c_b871, &q[q_offset], ldq, &vt2[
+		vt2_offset], ldvt2, &c_b1101, &vt[vt_offset], ldvt);
 	return 0;
     }
     ktemp = ctot[1] + 1;
-    dgemm_("N", "N", k, &nlp1, &ktemp, &c_b15, &q[q_dim1 + 1], ldq, &vt2[
-	    vt2_dim1 + 1], ldvt2, &c_b29, &vt[vt_dim1 + 1], ldvt);
+    sgemm_("N", "N", k, &nlp1, &ktemp, &c_b871, &q[q_dim1 + 1], ldq, &vt2[
+	    vt2_dim1 + 1], ldvt2, &c_b1101, &vt[vt_dim1 + 1], ldvt);
     ktemp = ctot[1] + 2 + ctot[2];
     if (ktemp <= *ldvt2) {
-	dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b15, &q[ktemp * q_dim1 + 1],
-		ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b15, &vt[vt_dim1 + 1],
-		ldvt);
+	sgemm_("N", "N", k, &nlp1, &ctot[3], &c_b871, &q[ktemp * q_dim1 + 1],
+		ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b871, &vt[vt_dim1 + 1],
+		 ldvt);
     }
 
     ktemp = ctot[1] + 1;
@@ -21070,53 +81025,52 @@ L100:
 	}
     }
     ctemp = ctot[2] + 1 + ctot[3];
-    dgemm_("N", "N", k, &nrp1, &ctemp, &c_b15, &q[ktemp * q_dim1 + 1], ldq, &
-	    vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b29, &vt[nlp2 * vt_dim1 +
-	    1], ldvt);
+    sgemm_("N", "N", k, &nrp1, &ctemp, &c_b871, &q[ktemp * q_dim1 + 1], ldq, &
+	    vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b1101, &vt[nlp2 * vt_dim1
+	    + 1], ldvt);
 
     return 0;
 
-/*     End of DLASD3 */
+/*     End of SLASD3 */
 
-} /* dlasd3_ */
+} /* slasd3_ */
 
-/* Subroutine */ int dlasd4_(integer *n, integer *i__, doublereal *d__,
-	doublereal *z__, doublereal *delta, doublereal *rho, doublereal *
-	sigma, doublereal *work, integer *info)
+/* Subroutine */ int slasd4_(integer *n, integer *i__, real *d__, real *z__,
+	real *delta, real *rho, real *sigma, real *work, integer *info)
 {
     /* System generated locals */
     integer i__1;
-    doublereal d__1;
+    real r__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static doublereal a, b, c__;
+    static real a, b, c__;
     static integer j;
-    static doublereal w, dd[3];
+    static real w, dd[3];
     static integer ii;
-    static doublereal dw, zz[3];
+    static real dw, zz[3];
     static integer ip1;
-    static doublereal eta, phi, eps, tau, psi;
+    static real eta, phi, eps, tau, psi;
     static integer iim1, iip1;
-    static doublereal dphi, dpsi;
+    static real dphi, dpsi;
     static integer iter;
-    static doublereal temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq,
-	    dtiip;
+    static real temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, dtiip;
     static integer niter;
-    static doublereal dtisq;
+    static real dtisq;
     static logical swtch;
-    static doublereal dtnsq;
-    extern /* Subroutine */ int dlaed6_(integer *, logical *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *)
-	    , dlasd5_(integer *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *);
-    static doublereal delsq2, dtnsq1;
+    static real dtnsq;
+    extern /* Subroutine */ int slaed6_(integer *, logical *, real *, real *,
+	    real *, real *, real *, integer *);
+    static real delsq2;
+    extern /* Subroutine */ int slasd5_(integer *, real *, real *, real *,
+	    real *, real *, real *);
+    static real dtnsq1;
     static logical swtch3;
-
+    extern doublereal slamch_(char *);
     static logical orgati;
-    static doublereal erretm, dtipsq, rhoinv;
+    static real erretm, dtipsq, rhoinv;
 
 
 /*
@@ -21155,26 +81109,26 @@ L100:
     I      (input) INTEGER
            The index of the eigenvalue to be computed.  1 <= I <= N.
 
-    D      (input) DOUBLE PRECISION array, dimension ( N )
+    D      (input) REAL array, dimension ( N )
            The original eigenvalues.  It is assumed that they are in
            order, 0 <= D(I) < D(J)  for I < J.
 
-    Z      (input) DOUBLE PRECISION array, dimension ( N )
+    Z      (input) REAL array, dimension ( N )
            The components of the updating vector.
 
-    DELTA  (output) DOUBLE PRECISION array, dimension ( N )
+    DELTA  (output) REAL array, dimension ( N )
            If N .ne. 1, DELTA contains (D(j) - sigma_I) in its  j-th
            component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
            contains the information necessary to construct the
            (singular) eigenvectors.
 
-    RHO    (input) DOUBLE PRECISION
+    RHO    (input) REAL
            The scalar in the symmetric updating formula.
 
-    SIGMA  (output) DOUBLE PRECISION
+    SIGMA  (output) REAL
            The computed lambda_I, the I-th updated eigenvalue.
 
-    WORK   (workspace) DOUBLE PRECISION array, dimension ( N )
+    WORK   (workspace) REAL array, dimension ( N )
            If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th
            component.  If N = 1, then WORK( 1 ) = 1.
 
@@ -21226,19 +81180,19 @@ L100:
 /*        Presumably, I=1 upon entry */
 
 	*sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]);
-	delta[1] = 1.;
-	work[1] = 1.;
+	delta[1] = 1.f;
+	work[1] = 1.f;
 	return 0;
     }
     if (*n == 2) {
-	dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]);
+	slasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]);
 	return 0;
     }
 
 /*     Compute machine epsilon */
 
-    eps = EPSILON;
-    rhoinv = 1. / *rho;
+    eps = slamch_("Epsilon");
+    rhoinv = 1.f / *rho;
 
 /*     The case I = N */
 
@@ -21251,7 +81205,7 @@ L100:
 
 /*        Calculate initial guess */
 
-	temp = *rho / 2.;
+	temp = *rho / 2.f;
 
 /*
           If ||Z||_2 is not one, then TEMP should be set to
@@ -21266,7 +81220,7 @@ L100:
 /* L10: */
 	}
 
-	psi = 0.;
+	psi = 0.f;
 	i__1 = *n - 2;
 	for (j = 1; j <= i__1; ++j) {
 	    psi += z__[j] * z__[j] / (delta[j] * work[j]);
@@ -21277,7 +81231,7 @@ L100:
 	w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[*
 		n] / (delta[*n] * work[*n]);
 
-	if (w <= 0.) {
+	if (w <= 0.f) {
 	    temp1 = sqrt(d__[*n] * d__[*n] + *rho);
 	    temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[*
 		    n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] *
@@ -21295,10 +81249,10 @@ L100:
 		a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*
 			n];
 		b = z__[*n] * z__[*n] * delsq;
-		if (a < 0.) {
-		    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
+		if (a < 0.f) {
+		    tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
 		} else {
-		    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
+		    tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
 		}
 	    }
 
@@ -21317,10 +81271,10 @@ L100:
              SIGMA_n^2 - D( N )*D( N )
 */
 
-	    if (a < 0.) {
-		tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
+	    if (a < 0.f) {
+		tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
 	    } else {
-		tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
+		tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
 	    }
 
 /*
@@ -21344,9 +81298,9 @@ L100:
 
 /*        Evaluate PSI and the derivative DPSI */
 
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
 	i__1 = ii;
 	for (j = 1; j <= i__1; ++j) {
 	    temp = z__[j] / (delta[j] * work[j]);
@@ -21355,21 +81309,21 @@ L100:
 	    erretm += psi;
 /* L40: */
 	}
-	erretm = abs(erretm);
+	erretm = dabs(erretm);
 
 /*        Evaluate PHI and the derivative DPHI */
 
 	temp = z__[*n] / (delta[*n] * work[*n]);
 	phi = z__[*n] * temp;
 	dphi = temp * temp;
-	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
-		+ dphi);
+	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
+		dpsi + dphi);
 
 	w = rhoinv + phi + psi;
 
 /*        Test for convergence */
 
-	if (abs(w) <= eps * erretm) {
+	if (dabs(w) <= eps * erretm) {
 	    goto L240;
 	}
 
@@ -21381,17 +81335,17 @@ L100:
 	c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
 	a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi);
 	b = dtnsq * dtnsq1 * w;
-	if (c__ < 0.) {
-	    c__ = abs(c__);
+	if (c__ < 0.f) {
+	    c__ = dabs(c__);
 	}
-	if (c__ == 0.) {
+	if (c__ == 0.f) {
 	    eta = *rho - *sigma * *sigma;
-	} else if (a >= 0.) {
-	    eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
-		    * 2.);
+	} else if (a >= 0.f) {
+	    eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
+		    c__ * 2.f);
 	} else {
-	    eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
-		    );
+	    eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+		    r__1))));
 	}
 
 /*
@@ -21402,7 +81356,7 @@ L100:
           will guarantee eta*w < 0.
 */
 
-	if (w * eta > 0.) {
+	if (w * eta > 0.f) {
 	    eta = -w / (dpsi + dphi);
 	}
 	temp = eta - dtnsq;
@@ -21423,9 +81377,9 @@ L100:
 
 /*        Evaluate PSI and the derivative DPSI */
 
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
 	i__1 = ii;
 	for (j = 1; j <= i__1; ++j) {
 	    temp = z__[j] / (work[j] * delta[j]);
@@ -21434,15 +81388,15 @@ L100:
 	    erretm += psi;
 /* L60: */
 	}
-	erretm = abs(erretm);
+	erretm = dabs(erretm);
 
 /*        Evaluate PHI and the derivative DPHI */
 
 	temp = z__[*n] / (work[*n] * delta[*n]);
 	phi = z__[*n] * temp;
 	dphi = temp * temp;
-	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
-		+ dphi);
+	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
+		dpsi + dphi);
 
 	w = rhoinv + phi + psi;
 
@@ -21454,7 +81408,7 @@ L100:
 
 /*           Test for convergence */
 
-	    if (abs(w) <= eps * erretm) {
+	    if (dabs(w) <= eps * erretm) {
 		goto L240;
 	    }
 
@@ -21465,12 +81419,12 @@ L100:
 	    c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
 	    a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi);
 	    b = dtnsq1 * dtnsq * w;
-	    if (a >= 0.) {
-		eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-			c__ * 2.);
+	    if (a >= 0.f) {
+		eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
+			 (c__ * 2.f);
 	    } else {
-		eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
-			d__1))));
+		eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+			r__1))));
 	    }
 
 /*
@@ -21481,12 +81435,12 @@ L100:
              will guarantee eta*w < 0.
 */
 
-	    if (w * eta > 0.) {
+	    if (w * eta > 0.f) {
 		eta = -w / (dpsi + dphi);
 	    }
 	    temp = eta - dtnsq;
-	    if (temp <= 0.) {
-		eta /= 2.;
+	    if (temp <= 0.f) {
+		eta /= 2.f;
 	    }
 
 	    tau += eta;
@@ -21502,9 +81456,9 @@ L100:
 
 /*           Evaluate PSI and the derivative DPSI */
 
-	    dpsi = 0.;
-	    psi = 0.;
-	    erretm = 0.;
+	    dpsi = 0.f;
+	    psi = 0.f;
+	    erretm = 0.f;
 	    i__1 = ii;
 	    for (j = 1; j <= i__1; ++j) {
 		temp = z__[j] / (work[j] * delta[j]);
@@ -21513,15 +81467,15 @@ L100:
 		erretm += psi;
 /* L80: */
 	    }
-	    erretm = abs(erretm);
+	    erretm = dabs(erretm);
 
 /*           Evaluate PHI and the derivative DPHI */
 
 	    temp = z__[*n] / (work[*n] * delta[*n]);
 	    phi = z__[*n] * temp;
 	    dphi = temp * temp;
-	    erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
-		    dpsi + dphi);
+	    erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) *
+		    (dpsi + dphi);
 
 	    w = rhoinv + phi + psi;
 /* L90: */
@@ -21544,7 +81498,7 @@ L100:
 /*        Calculate initial guess */
 
 	delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]);
-	delsq2 = delsq / 2.;
+	delsq2 = delsq / 2.f;
 	temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2));
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
@@ -21553,14 +81507,14 @@ L100:
 /* L100: */
 	}
 
-	psi = 0.;
+	psi = 0.f;
 	i__1 = *i__ - 1;
 	for (j = 1; j <= i__1; ++j) {
 	    psi += z__[j] * z__[j] / (work[j] * delta[j]);
 /* L110: */
 	}
 
-	phi = 0.;
+	phi = 0.f;
 	i__1 = *i__ + 2;
 	for (j = *n; j >= i__1; --j) {
 	    phi += z__[j] * z__[j] / (work[j] * delta[j]);
@@ -21570,7 +81524,7 @@ L100:
 	w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[
 		ip1] * z__[ip1] / (work[ip1] * delta[ip1]);
 
-	if (w > 0.) {
+	if (w > 0.f) {
 
 /*
              d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
@@ -21579,16 +81533,16 @@ L100:
 */
 
 	    orgati = TRUE_;
-	    sg2lb = 0.;
+	    sg2lb = 0.f;
 	    sg2ub = delsq2;
 	    a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
 	    b = z__[*i__] * z__[*i__] * delsq;
-	    if (a > 0.) {
-		tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
-			d__1))));
+	    if (a > 0.f) {
+		tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+			r__1))));
 	    } else {
-		tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-			c__ * 2.);
+		tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
+			 (c__ * 2.f);
 	    }
 
 /*
@@ -21608,15 +81562,15 @@ L100:
 
 	    orgati = FALSE_;
 	    sg2lb = -delsq2;
-	    sg2ub = 0.;
+	    sg2ub = 0.f;
 	    a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
 	    b = z__[ip1] * z__[ip1] * delsq;
-	    if (a < 0.) {
-		tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
-			d__1))));
+	    if (a < 0.f) {
+		tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs(
+			r__1))));
 	    } else {
-		tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
-			(c__ * 2.);
+		tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1))))
+			/ (c__ * 2.f);
 	    }
 
 /*
@@ -21625,8 +81579,8 @@ L100:
              SIGMA - D( IP1 ).
 */
 
-	    eta = tau / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau,
-		    abs(d__1))));
+	    eta = tau / (d__[ip1] + sqrt((r__1 = d__[ip1] * d__[ip1] + tau,
+		    dabs(r__1))));
 	}
 
 	if (orgati) {
@@ -21653,9 +81607,9 @@ L100:
 
 /*        Evaluate PSI and the derivative DPSI */
 
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
 	i__1 = iim1;
 	for (j = 1; j <= i__1; ++j) {
 	    temp = z__[j] / (work[j] * delta[j]);
@@ -21664,12 +81618,12 @@ L100:
 	    erretm += psi;
 /* L150: */
 	}
-	erretm = abs(erretm);
+	erretm = dabs(erretm);
 
 /*        Evaluate PHI and the derivative DPHI */
 
-	dphi = 0.;
-	phi = 0.;
+	dphi = 0.f;
+	phi = 0.f;
 	i__1 = iip1;
 	for (j = *n; j >= i__1; --j) {
 	    temp = z__[j] / (work[j] * delta[j]);
@@ -21688,11 +81642,11 @@ L100:
 
 	swtch3 = FALSE_;
 	if (orgati) {
-	    if (w < 0.) {
+	    if (w < 0.f) {
 		swtch3 = TRUE_;
 	    }
 	} else {
-	    if (w > 0.) {
+	    if (w > 0.f) {
 		swtch3 = TRUE_;
 	    }
 	}
@@ -21704,19 +81658,19 @@ L100:
 	dw = dpsi + dphi + temp * temp;
 	temp = z__[ii] * temp;
 	w += temp;
-	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
-		abs(tau) * dw;
+	erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
+		+ dabs(tau) * dw;
 
 /*        Test for convergence */
 
-	if (abs(w) <= eps * erretm) {
+	if (dabs(w) <= eps * erretm) {
 	    goto L240;
 	}
 
-	if (w <= 0.) {
-	    sg2lb = max(sg2lb,tau);
+	if (w <= 0.f) {
+	    sg2lb = dmax(sg2lb,tau);
 	} else {
-	    sg2ub = min(sg2ub,tau);
+	    sg2ub = dmin(sg2ub,tau);
 	}
 
 /*        Calculate the new step */
@@ -21727,17 +81681,17 @@ L100:
 	    dtisq = work[*i__] * delta[*i__];
 	    if (orgati) {
 /* Computing 2nd power */
-		d__1 = z__[*i__] / dtisq;
-		c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
+		r__1 = z__[*i__] / dtisq;
+		c__ = w - dtipsq * dw + delsq * (r__1 * r__1);
 	    } else {
 /* Computing 2nd power */
-		d__1 = z__[ip1] / dtipsq;
-		c__ = w - dtisq * dw - delsq * (d__1 * d__1);
+		r__1 = z__[ip1] / dtipsq;
+		c__ = w - dtisq * dw - delsq * (r__1 * r__1);
 	    }
 	    a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
 	    b = dtipsq * dtisq * w;
-	    if (c__ == 0.) {
-		if (a == 0.) {
+	    if (c__ == 0.f) {
+		if (a == 0.f) {
 		    if (orgati) {
 			a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi +
 				dphi);
@@ -21747,12 +81701,12 @@ L100:
 		    }
 		}
 		eta = b / a;
-	    } else if (a <= 0.) {
-		eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
-			c__ * 2.);
+	    } else if (a <= 0.f) {
+		eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
+			 (c__ * 2.f);
 	    } else {
-		eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
-			d__1))));
+		eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+			r__1))));
 	    }
 	} else {
 
@@ -21788,7 +81742,7 @@ L100:
 	    dd[0] = dtiim;
 	    dd[1] = delta[ii] * work[ii];
 	    dd[2] = dtiip;
-	    dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
+	    slaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
 	    if (*info != 0) {
 		goto L240;
 	    }
@@ -21802,7 +81756,7 @@ L100:
           will guarantee eta*w < 0.
 */
 
-	if (w * eta >= 0.) {
+	if (w * eta >= 0.f) {
 	    eta = -w / dw;
 	}
 	if (orgati) {
@@ -21813,10 +81767,10 @@ L100:
 	    temp = eta - temp1;
 	}
 	if (temp > sg2ub || temp < sg2lb) {
-	    if (w < 0.) {
-		eta = (sg2ub - tau) / 2.;
+	    if (w < 0.f) {
+		eta = (sg2ub - tau) / 2.f;
 	    } else {
-		eta = (sg2lb - tau) / 2.;
+		eta = (sg2lb - tau) / 2.f;
 	    }
 	}
 
@@ -21835,9 +81789,9 @@ L100:
 
 /*        Evaluate PSI and the derivative DPSI */
 
-	dpsi = 0.;
-	psi = 0.;
-	erretm = 0.;
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
 	i__1 = iim1;
 	for (j = 1; j <= i__1; ++j) {
 	    temp = z__[j] / (work[j] * delta[j]);
@@ -21846,12 +81800,12 @@ L100:
 	    erretm += psi;
 /* L180: */
 	}
-	erretm = abs(erretm);
+	erretm = dabs(erretm);
 
 /*        Evaluate PHI and the derivative DPHI */
 
-	dphi = 0.;
-	phi = 0.;
+	dphi = 0.f;
+	phi = 0.f;
 	i__1 = iip1;
 	for (j = *n; j >= i__1; --j) {
 	    temp = z__[j] / (work[j] * delta[j]);
@@ -21865,22 +81819,22 @@ L100:
 	dw = dpsi + dphi + temp * temp;
 	temp = z__[ii] * temp;
 	w = rhoinv + phi + psi + temp;
-	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
-		abs(tau) * dw;
+	erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
+		+ dabs(tau) * dw;
 
-	if (w <= 0.) {
-	    sg2lb = max(sg2lb,tau);
+	if (w <= 0.f) {
+	    sg2lb = dmax(sg2lb,tau);
 	} else {
-	    sg2ub = min(sg2ub,tau);
+	    sg2ub = dmin(sg2ub,tau);
 	}
 
 	swtch = FALSE_;
 	if (orgati) {
-	    if (-w > abs(prew) / 10.) {
+	    if (-w > dabs(prew) / 10.f) {
 		swtch = TRUE_;
 	    }
 	} else {
-	    if (w > abs(prew) / 10.) {
+	    if (w > dabs(prew) / 10.f) {
 		swtch = TRUE_;
 	    }
 	}
@@ -21893,7 +81847,7 @@ L100:
 
 /*           Test for convergence */
 
-	    if (abs(w) <= eps * erretm) {
+	    if (dabs(w) <= eps * erretm) {
 		goto L240;
 	    }
 
@@ -21905,12 +81859,12 @@ L100:
 		if (! swtch) {
 		    if (orgati) {
 /* Computing 2nd power */
-			d__1 = z__[*i__] / dtisq;
-			c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
+			r__1 = z__[*i__] / dtisq;
+			c__ = w - dtipsq * dw + delsq * (r__1 * r__1);
 		    } else {
 /* Computing 2nd power */
-			d__1 = z__[ip1] / dtipsq;
-			c__ = w - dtisq * dw - delsq * (d__1 * d__1);
+			r__1 = z__[ip1] / dtipsq;
+			c__ = w - dtisq * dw - delsq * (r__1 * r__1);
 		    }
 		} else {
 		    temp = z__[ii] / (work[ii] * delta[ii]);
@@ -21923,8 +81877,8 @@ L100:
 		}
 		a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
 		b = dtipsq * dtisq * w;
-		if (c__ == 0.) {
-		    if (a == 0.) {
+		if (c__ == 0.f) {
+		    if (a == 0.f) {
 			if (! swtch) {
 			    if (orgati) {
 				a = z__[*i__] * z__[*i__] + dtipsq * dtipsq *
@@ -21938,12 +81892,12 @@ L100:
 			}
 		    }
 		    eta = b / a;
-		} else if (a <= 0.) {
-		    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
-			     / (c__ * 2.);
+		} else if (a <= 0.f) {
+		    eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1))
+			    )) / (c__ * 2.f);
 		} else {
-		    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
-			    abs(d__1))));
+		    eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__,
+			    dabs(r__1))));
 		}
 	    } else {
 
@@ -21986,7 +81940,7 @@ L100:
 		dd[0] = dtiim;
 		dd[1] = delta[ii] * work[ii];
 		dd[2] = dtiip;
-		dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
+		slaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
 		if (*info != 0) {
 		    goto L240;
 		}
@@ -22000,7 +81954,7 @@ L100:
              will guarantee eta*w < 0.
 */
 
-	    if (w * eta >= 0.) {
+	    if (w * eta >= 0.f) {
 		eta = -w / dw;
 	    }
 	    if (orgati) {
@@ -22011,10 +81965,10 @@ L100:
 		temp = eta - temp1;
 	    }
 	    if (temp > sg2ub || temp < sg2lb) {
-		if (w < 0.) {
-		    eta = (sg2ub - tau) / 2.;
+		if (w < 0.f) {
+		    eta = (sg2ub - tau) / 2.f;
 		} else {
-		    eta = (sg2lb - tau) / 2.;
+		    eta = (sg2lb - tau) / 2.f;
 		}
 	    }
 
@@ -22033,9 +81987,9 @@ L100:
 
 /*           Evaluate PSI and the derivative DPSI */
 
-	    dpsi = 0.;
-	    psi = 0.;
-	    erretm = 0.;
+	    dpsi = 0.f;
+	    psi = 0.f;
+	    erretm = 0.f;
 	    i__1 = iim1;
 	    for (j = 1; j <= i__1; ++j) {
 		temp = z__[j] / (work[j] * delta[j]);
@@ -22044,12 +81998,12 @@ L100:
 		erretm += psi;
 /* L210: */
 	    }
-	    erretm = abs(erretm);
+	    erretm = dabs(erretm);
 
 /*           Evaluate PHI and the derivative DPHI */
 
-	    dphi = 0.;
-	    phi = 0.;
+	    dphi = 0.f;
+	    phi = 0.f;
 	    i__1 = iip1;
 	    for (j = *n; j >= i__1; --j) {
 		temp = z__[j] / (work[j] * delta[j]);
@@ -22063,16 +82017,16 @@ L100:
 	    dw = dpsi + dphi + temp * temp;
 	    temp = z__[ii] * temp;
 	    w = rhoinv + phi + psi + temp;
-	    erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
-		    + abs(tau) * dw;
-	    if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
+	    erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) *
+		    3.f + dabs(tau) * dw;
+	    if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) {
 		swtch = ! swtch;
 	    }
 
-	    if (w <= 0.) {
-		sg2lb = max(sg2lb,tau);
+	    if (w <= 0.f) {
+		sg2lb = dmax(sg2lb,tau);
 	    } else {
-		sg2ub = min(sg2ub,tau);
+		sg2ub = dmin(sg2ub,tau);
 	    }
 
 /* L230: */
@@ -22087,22 +82041,21 @@ L100:
 L240:
     return 0;
 
-/*     End of DLASD4 */
+/*     End of SLASD4 */
 
-} /* dlasd4_ */
+} /* slasd4_ */
 
-/* Subroutine */ int dlasd5_(integer *i__, doublereal *d__, doublereal *z__,
-	doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal *
-	work)
+/* Subroutine */ int slasd5_(integer *i__, real *d__, real *z__, real *delta,
+	real *rho, real *dsigma, real *work)
 {
     /* System generated locals */
-    doublereal d__1;
+    real r__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static doublereal b, c__, w, del, tau, delsq;
+    static real b, c__, w, del, tau, delsq;
 
 
 /*
@@ -22134,24 +82087,24 @@ L240:
     I      (input) INTEGER
            The index of the eigenvalue to be computed.  I = 1 or I = 2.
 
-    D      (input) DOUBLE PRECISION array, dimension ( 2 )
+    D      (input) REAL array, dimension ( 2 )
            The original eigenvalues.  We assume 0 <= D(1) < D(2).
 
-    Z      (input) DOUBLE PRECISION array, dimension ( 2 )
+    Z      (input) REAL array, dimension ( 2 )
            The components of the updating vector.
 
-    DELTA  (output) DOUBLE PRECISION array, dimension ( 2 )
+    DELTA  (output) REAL array, dimension ( 2 )
            Contains (D(j) - lambda_I) in its  j-th component.
            The vector DELTA contains the information necessary
            to construct the eigenvectors.
 
-    RHO    (input) DOUBLE PRECISION
+    RHO    (input) REAL
            The scalar in the symmetric updating formula.
 
-    DSIGMA (output) DOUBLE PRECISION
+    DSIGMA (output) REAL
            The computed lambda_I, the I-th updated eigenvalue.
 
-    WORK   (workspace) DOUBLE PRECISION array, dimension ( 2 )
+    WORK   (workspace) REAL array, dimension ( 2 )
            WORK contains (D(j) + sigma_I) in its  j-th component.
 
     Further Details
@@ -22175,9 +82128,9 @@ L240:
     del = d__[2] - d__[1];
     delsq = del * (d__[2] + d__[1]);
     if (*i__ == 1) {
-	w = *rho * 4. * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.) - z__[1] *
-		z__[1] / (d__[1] * 3. + d__[2])) / del + 1.;
-	if (w > 0.) {
+	w = *rho * 4.f * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.f) - z__[1] *
+		 z__[1] / (d__[1] * 3.f + d__[2])) / del + 1.f;
+	if (w > 0.f) {
 	    b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
 	    c__ = *rho * z__[1] * z__[1] * delsq;
 
@@ -22187,7 +82140,8 @@ L240:
              The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 )
 */
 
-	    tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
+	    tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1))
+		    ));
 
 /*           The following TAU is DSIGMA - D( 1 ) */
 
@@ -22195,7 +82149,7 @@ L240:
 	    *dsigma = d__[1] + tau;
 	    delta[1] = -tau;
 	    delta[2] = del - tau;
-	    work[1] = d__[1] * 2. + tau;
+	    work[1] = d__[1] * 2.f + tau;
 	    work[2] = d__[1] + tau + d__[2];
 /*
              DELTA( 1 ) = -Z( 1 ) / TAU
@@ -22207,20 +82161,20 @@ L240:
 
 /*           The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
 
-	    if (b > 0.) {
-		tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
+	    if (b > 0.f) {
+		tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f));
 	    } else {
-		tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
+		tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f;
 	    }
 
 /*           The following TAU is DSIGMA - D( 2 ) */
 
-	    tau /= d__[2] + sqrt((d__1 = d__[2] * d__[2] + tau, abs(d__1)));
+	    tau /= d__[2] + sqrt((r__1 = d__[2] * d__[2] + tau, dabs(r__1)));
 	    *dsigma = d__[2] + tau;
 	    delta[1] = -(del + tau);
 	    delta[2] = -tau;
 	    work[1] = d__[1] + tau + d__[2];
-	    work[2] = d__[2] * 2. + tau;
+	    work[2] = d__[2] * 2.f + tau;
 /*
              DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
              DELTA( 2 ) = -Z( 2 ) / TAU
@@ -22240,10 +82194,10 @@ L240:
 
 /*        The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
 
-	if (b > 0.) {
-	    tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
+	if (b > 0.f) {
+	    tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f;
 	} else {
-	    tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
+	    tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f));
 	}
 
 /*        The following TAU is DSIGMA - D( 2 ) */
@@ -22253,7 +82207,7 @@ L240:
 	delta[1] = -(del + tau);
 	delta[2] = -tau;
 	work[1] = d__[1] + tau + d__[2];
-	work[2] = d__[2] * 2. + tau;
+	work[2] = d__[2] * 2.f + tau;
 /*
           DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
           DELTA( 2 ) = -Z( 2 ) / TAU
@@ -22264,41 +82218,38 @@ L240:
     }
     return 0;
 
-/*     End of DLASD5 */
+/*     End of SLASD5 */
 
-} /* dlasd5_ */
+} /* slasd5_ */
 
-/* Subroutine */ int dlasd6_(integer *icompq, integer *nl, integer *nr,
-	integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl,
-	doublereal *alpha, doublereal *beta, integer *idxq, integer *perm,
-	integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum,
-	 integer *ldgnum, doublereal *poles, doublereal *difl, doublereal *
-	difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s,
-	doublereal *work, integer *iwork, integer *info)
+/* Subroutine */ int slasd6_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, real *d__, real *vf, real *vl, real *alpha, real *beta,
+	 integer *idxq, integer *perm, integer *givptr, integer *givcol,
+	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
+	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
+	work, integer *iwork, integer *info)
 {
     /* System generated locals */
     integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset,
 	    poles_dim1, poles_offset, i__1;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Local variables */
     static integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlasd7_(integer *, integer *, integer *,
-	     integer *, integer *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, integer *, integer *,
-	    integer *, integer *, integer *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, integer *), dlasd8_(
-	    integer *, integer *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
-	     doublereal *, integer *), dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dlamrg_(integer *, integer *,
-	    doublereal *, integer *, integer *, integer *);
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), slasd7_(integer *, integer *, integer *, integer *,
+	    integer *, real *, real *, real *, real *, real *, real *, real *,
+	     real *, real *, real *, integer *, integer *, integer *, integer
+	    *, integer *, integer *, integer *, real *, integer *, real *,
+	    real *, integer *), slasd8_(integer *, integer *, real *, real *,
+	    real *, real *, real *, real *, integer *, real *, real *,
+	    integer *);
     static integer isigma;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    static doublereal orgnrm;
+    extern /* Subroutine */ int xerbla_(char *, integer *), slascl_(
+	    char *, integer *, integer *, real *, real *, integer *, integer *
+	    , real *, integer *, integer *), slamrg_(integer *,
+	    integer *, real *, integer *, integer *, integer *);
+    static real orgnrm;
 
 
 /*
@@ -22311,15 +82262,15 @@ L240:
     Purpose
     =======
 
-    DLASD6 computes the SVD of an updated upper bidiagonal matrix B
+    SLASD6 computes the SVD of an updated upper bidiagonal matrix B
     obtained by merging two smaller ones by appending a row. This
     routine is used only for the problem which requires all singular
     values and optionally singular vector matrices in factored form.
     B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
-    A related subroutine, DLASD1, handles the case in which all singular
+    A related subroutine, SLASD1, handles the case in which all singular
     values and singular vectors of the bidiagonal matrix are desired.
 
-    DLASD6 computes the SVD as follows:
+    SLASD6 computes the SVD as follows:
 
                   ( D1(in)  0    0     0 )
       B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
@@ -22335,7 +82286,7 @@ L240:
     components of all the right singular vectors of the lower block, and
     the last components of all the right singular vectors of the upper
     block. These components are stored and updated in VF and VL,
-    respectively, in DLASD6. Hence U and VT are not explicitly
+    respectively, in SLASD6. Hence U and VT are not explicitly
     referenced.
 
     The singular values are stored in D. The algorithm consists of two
@@ -22345,16 +82296,16 @@ L240:
           when there are multiple singular values or if there is a zero
           in the Z vector. For each such occurence the dimension of the
           secular equation problem is reduced by one. This stage is
-          performed by the routine DLASD7.
+          performed by the routine SLASD7.
 
           The second stage consists of calculating the updated
           singular values. This is done by finding the roots of the
-          secular equation via the routine DLASD4 (as called by DLASD8).
+          secular equation via the routine SLASD4 (as called by SLASD8).
           This routine also updates VF and VL and computes the distances
           between the updated singular values and the old singular
           values.
 
-    DLASD6 is called from DLASDA.
+    SLASD6 is called from SLASDA.
 
     Arguments
     =========
@@ -22378,30 +82329,30 @@ L240:
            The bidiagonal matrix has row dimension N = NL + NR + 1,
            and column dimension M = N + SQRE.
 
-    D      (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ).
+    D      (input/output) REAL array, dimension ( NL+NR+1 ).
            On entry D(1:NL,1:NL) contains the singular values of the
            upper block, and D(NL+2:N) contains the singular values
            of the lower block. On exit D(1:N) contains the singular
            values of the modified matrix.
 
-    VF     (input/output) DOUBLE PRECISION array, dimension ( M )
+    VF     (input/output) REAL array, dimension ( M )
            On entry, VF(1:NL+1) contains the first components of all
            right singular vectors of the upper block; and VF(NL+2:M)
            contains the first components of all right singular vectors
            of the lower block. On exit, VF contains the first components
            of all right singular vectors of the bidiagonal matrix.
 
-    VL     (input/output) DOUBLE PRECISION array, dimension ( M )
+    VL     (input/output) REAL array, dimension ( M )
            On entry, VL(1:NL+1) contains the  last components of all
            right singular vectors of the upper block; and VL(NL+2:M)
            contains the last components of all right singular vectors of
            the lower block. On exit, VL contains the last components of
            all right singular vectors of the bidiagonal matrix.
 
-    ALPHA  (input) DOUBLE PRECISION
+    ALPHA  (input) REAL
            Contains the diagonal element associated with the added row.
 
-    BETA   (input) DOUBLE PRECISION
+    BETA   (input) REAL
            Contains the off-diagonal element associated with the added
            row.
 
@@ -22425,25 +82376,25 @@ L240:
     LDGCOL (input) INTEGER
            leading dimension of GIVCOL, must be at least N.
 
-    GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+    GIVNUM (output) REAL array, dimension ( LDGNUM, 2 )
            Each number indicates the C or S value to be used in the
            corresponding Givens rotation. Not referenced if ICOMPQ = 0.
 
     LDGNUM (input) INTEGER
            The leading dimension of GIVNUM and POLES, must be at least N.
 
-    POLES  (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+    POLES  (output) REAL array, dimension ( LDGNUM, 2 )
            On exit, POLES(1,*) is an array containing the new singular
            values obtained from solving the secular equation, and
            POLES(2,*) is an array containing the poles in the secular
            equation. Not referenced if ICOMPQ = 0.
 
-    DIFL   (output) DOUBLE PRECISION array, dimension ( N )
+    DIFL   (output) REAL array, dimension ( N )
            On exit, DIFL(I) is the distance between I-th updated
            (undeflated) singular value and the I-th (undeflated) old
            singular value.
 
-    DIFR   (output) DOUBLE PRECISION array,
+    DIFR   (output) REAL array,
                     dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and
                     dimension ( N ) if ICOMPQ = 0.
            On exit, DIFR(I, 1) is the distance between I-th updated
@@ -22453,9 +82404,9 @@ L240:
            If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
            normalizing factors for the right singular vector matrix.
 
-           See DLASD8 for details on DIFL and DIFR.
+           See SLASD8 for details on DIFL and DIFR.
 
-    Z      (output) DOUBLE PRECISION array, dimension ( M )
+    Z      (output) REAL array, dimension ( M )
            The first elements of this array contain the components
            of the deflation-adjusted updating row vector.
 
@@ -22463,15 +82414,15 @@ L240:
            Contains the dimension of the non-deflated matrix,
            This is the order of the related secular equation. 1 <= K <=N.
 
-    C      (output) DOUBLE PRECISION
+    C      (output) REAL
            C contains garbage if SQRE =0 and the C-value of a Givens
            rotation related to the right null space if SQRE = 1.
 
-    S      (output) DOUBLE PRECISION
+    S      (output) REAL
            S contains garbage if SQRE =0 and the S-value of a Givens
            rotation related to the right null space if SQRE = 1.
 
-    WORK   (workspace) DOUBLE PRECISION array, dimension ( 4 * M )
+    WORK   (workspace) REAL array, dimension ( 4 * M )
 
     IWORK  (workspace) INTEGER array, dimension ( 3 * N )
 
@@ -22534,14 +82485,14 @@ L240:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLASD6", &i__1);
+	xerbla_("SLASD6", &i__1);
 	return 0;
     }
 
 /*
        The following values are for bookkeeping purposes only.  They are
        integer pointers which indicate the portion of the workspace
-       used by a particular array in DLASD7 and DLASD8.
+       used by a particular array in SLASD7 and SLASD8.
 */
 
     isigma = 1;
@@ -22558,23 +82509,23 @@ L240:
 
    Computing MAX
 */
-    d__1 = abs(*alpha), d__2 = abs(*beta);
-    orgnrm = max(d__1,d__2);
-    d__[*nl + 1] = 0.;
+    r__1 = dabs(*alpha), r__2 = dabs(*beta);
+    orgnrm = dmax(r__1,r__2);
+    d__[*nl + 1] = 0.f;
     i__1 = n;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	if ((d__1 = d__[i__], abs(d__1)) > orgnrm) {
-	    orgnrm = (d__1 = d__[i__], abs(d__1));
+	if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) {
+	    orgnrm = (r__1 = d__[i__], dabs(r__1));
 	}
 /* L10: */
     }
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &n, &c__1, &d__[1], &n, info);
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &n, &c__1, &d__[1], &n, info);
     *alpha /= orgnrm;
     *beta /= orgnrm;
 
 /*     Sort and Deflate singular values. */
 
-    dlasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], &
+    slasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], &
 	    work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], &
 	    iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[
 	    givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s,
@@ -22582,60 +82533,58 @@ L240:
 
 /*     Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */
 
-    dlasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1],
+    slasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1],
 	    ldgnum, &work[isigma], &work[iw], info);
 
 /*     Save the poles if ICOMPQ = 1. */
 
     if (*icompq == 1) {
-	dcopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1);
-	dcopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1);
+	scopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1);
+	scopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1);
     }
 
 /*     Unscale. */
 
-    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &n, &c__1, &d__[1], &n, info);
+    slascl_("G", &c__0, &c__0, &c_b871, &orgnrm, &n, &c__1, &d__[1], &n, info);
 
 /*     Prepare the IDXQ sorting permutation. */
 
     n1 = *k;
     n2 = n - *k;
-    dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
+    slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
 
     return 0;
 
-/*     End of DLASD6 */
+/*     End of SLASD6 */
 
-} /* dlasd6_ */
+} /* slasd6_ */
 
-/* Subroutine */ int dlasd7_(integer *icompq, integer *nl, integer *nr,
-	integer *sqre, integer *k, doublereal *d__, doublereal *z__,
-	doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl,
-	doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal *
-	dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm,
-	integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum,
-	 integer *ldgnum, doublereal *c__, doublereal *s, integer *info)
+/* Subroutine */ int slasd7_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, integer *k, real *d__, real *z__, real *zw, real *vf,
+	real *vfw, real *vl, real *vlw, real *alpha, real *beta, real *dsigma,
+	 integer *idx, integer *idxp, integer *idxq, integer *perm, integer *
+	givptr, integer *givcol, integer *ldgcol, real *givnum, integer *
+	ldgnum, real *c__, real *s, integer *info)
 {
     /* System generated locals */
     integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Local variables */
     static integer i__, j, m, n, k2;
-    static doublereal z1;
+    static real z1;
     static integer jp;
-    static doublereal eps, tau, tol;
+    static real eps, tau, tol;
     static integer nlp1, nlp2, idxi, idxj;
-    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static integer idxjp;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer jprev;
-
-    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
-	    integer *, integer *, integer *), xerbla_(char *, integer *);
-    static doublereal hlftol;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *);
+    static integer idxjp, jprev;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
+	    integer *, integer *, real *, integer *, integer *, integer *);
+    static real hlftol;
 
 
 /*
@@ -22648,14 +82597,14 @@ L240:
     Purpose
     =======
 
-    DLASD7 merges the two sets of singular values together into a single
+    SLASD7 merges the two sets of singular values together into a single
     sorted set. Then it tries to deflate the size of the problem. There
     are two ways in which deflation can occur:  when two or more singular
     values are close together or if there is a tiny entry in the Z
     vector. For each such occurrence the order of the related
     secular equation problem is reduced by one.
 
-    DLASD7 is called from DLASD6.
+    SLASD7 is called from SLASD6.
 
     Arguments
     =========
@@ -22685,47 +82634,47 @@ L240:
            Contains the dimension of the non-deflated matrix, this is
            the order of the related secular equation. 1 <= K <=N.
 
-    D      (input/output) DOUBLE PRECISION array, dimension ( N )
+    D      (input/output) REAL array, dimension ( N )
            On entry D contains the singular values of the two submatrices
            to be combined. On exit D contains the trailing (N-K) updated
            singular values (those which were deflated) sorted into
            increasing order.
 
-    Z      (output) DOUBLE PRECISION array, dimension ( M )
+    Z      (output) REAL array, dimension ( M )
            On exit Z contains the updating row vector in the secular
            equation.
 
-    ZW     (workspace) DOUBLE PRECISION array, dimension ( M )
+    ZW     (workspace) REAL array, dimension ( M )
            Workspace for Z.
 
-    VF     (input/output) DOUBLE PRECISION array, dimension ( M )
+    VF     (input/output) REAL array, dimension ( M )
            On entry, VF(1:NL+1) contains the first components of all
            right singular vectors of the upper block; and VF(NL+2:M)
            contains the first components of all right singular vectors
            of the lower block. On exit, VF contains the first components
            of all right singular vectors of the bidiagonal matrix.
 
-    VFW    (workspace) DOUBLE PRECISION array, dimension ( M )
+    VFW    (workspace) REAL array, dimension ( M )
            Workspace for VF.
 
-    VL     (input/output) DOUBLE PRECISION array, dimension ( M )
+    VL     (input/output) REAL array, dimension ( M )
            On entry, VL(1:NL+1) contains the  last components of all
            right singular vectors of the upper block; and VL(NL+2:M)
            contains the last components of all right singular vectors
            of the lower block. On exit, VL contains the last components
            of all right singular vectors of the bidiagonal matrix.
 
-    VLW    (workspace) DOUBLE PRECISION array, dimension ( M )
+    VLW    (workspace) REAL array, dimension ( M )
            Workspace for VL.
 
-    ALPHA  (input) DOUBLE PRECISION
+    ALPHA  (input) REAL
            Contains the diagonal element associated with the added row.
 
-    BETA   (input) DOUBLE PRECISION
+    BETA   (input) REAL
            Contains the off-diagonal element associated with the added
            row.
 
-    DSIGMA (output) DOUBLE PRECISION array, dimension ( N )
+    DSIGMA (output) REAL array, dimension ( N )
            Contains a copy of the diagonal elements (K-1 singular values
            and one zero) in the secular equation.
 
@@ -22761,18 +82710,18 @@ L240:
     LDGCOL (input) INTEGER
            The leading dimension of GIVCOL, must be at least N.
 
-    GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+    GIVNUM (output) REAL array, dimension ( LDGNUM, 2 )
            Each number indicates the C or S value to be used in the
            corresponding Givens rotation. Not referenced if ICOMPQ = 0.
 
     LDGNUM (input) INTEGER
            The leading dimension of GIVNUM, must be at least N.
 
-    C      (output) DOUBLE PRECISION
+    C      (output) REAL
            C contains garbage if SQRE =0 and the C-value of a Givens
            rotation related to the right null space if SQRE = 1.
 
-    S      (output) DOUBLE PRECISION
+    S      (output) REAL
            S contains garbage if SQRE =0 and the S-value of a Givens
            rotation related to the right null space if SQRE = 1.
 
@@ -22833,7 +82782,7 @@ L240:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLASD7", &i__1);
+	xerbla_("SLASD7", &i__1);
 	return 0;
     }
 
@@ -22849,11 +82798,11 @@ L240:
 */
 
     z1 = *alpha * vl[nlp1];
-    vl[nlp1] = 0.;
+    vl[nlp1] = 0.f;
     tau = vf[nlp1];
     for (i__ = *nl; i__ >= 1; --i__) {
 	z__[i__ + 1] = *alpha * vl[i__];
-	vl[i__] = 0.;
+	vl[i__] = 0.f;
 	vf[i__ + 1] = vf[i__];
 	d__[i__ + 1] = d__[i__];
 	idxq[i__ + 1] = idxq[i__] + 1;
@@ -22866,7 +82815,7 @@ L240:
     i__1 = m;
     for (i__ = nlp2; i__ <= i__1; ++i__) {
 	z__[i__] = *beta * vf[i__];
-	vf[i__] = 0.;
+	vf[i__] = 0.f;
 /* L20: */
     }
 
@@ -22889,7 +82838,7 @@ L240:
 /* L40: */
     }
 
-    dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
+    slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
 
     i__1 = n;
     for (i__ = 2; i__ <= i__1; ++i__) {
@@ -22903,13 +82852,13 @@ L240:
 
 /*     Calculate the allowable deflation tolerence */
 
-    eps = EPSILON;
+    eps = slamch_("Epsilon");
 /* Computing MAX */
-    d__1 = abs(*alpha), d__2 = abs(*beta);
-    tol = max(d__1,d__2);
+    r__1 = dabs(*alpha), r__2 = dabs(*beta);
+    tol = dmax(r__1,r__2);
 /* Computing MAX */
-    d__2 = (d__1 = d__[n], abs(d__1));
-    tol = eps * 64. * max(d__2,tol);
+    r__2 = (r__1 = d__[n], dabs(r__1));
+    tol = eps * 64.f * dmax(r__2,tol);
 
 /*
        There are 2 kinds of deflation -- first a value in the z-vector
@@ -22936,7 +82885,7 @@ L240:
     k2 = n + 1;
     i__1 = n;
     for (j = 2; j <= i__1; ++j) {
-	if ((d__1 = z__[j], abs(d__1)) <= tol) {
+	if ((r__1 = z__[j], dabs(r__1)) <= tol) {
 
 /*           Deflate due to small z component. */
 
@@ -22958,7 +82907,7 @@ L80:
     if (j > n) {
 	goto L90;
     }
-    if ((d__1 = z__[j], abs(d__1)) <= tol) {
+    if ((r__1 = z__[j], dabs(r__1)) <= tol) {
 
 /*        Deflate due to small z component. */
 
@@ -22968,7 +82917,7 @@ L80:
 
 /*        Check if singular values are close enough to allow deflation. */
 
-	if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) {
+	if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) {
 
 /*           Deflation is possible. */
 
@@ -22980,9 +82929,9 @@ L80:
              destructive underflow.
 */
 
-	    tau = dlapy2_(c__, s);
+	    tau = slapy2_(c__, s);
 	    z__[j] = tau;
-	    z__[jprev] = 0.;
+	    z__[jprev] = 0.f;
 	    *c__ /= tau;
 	    *s = -(*s) / tau;
 
@@ -23003,8 +82952,8 @@ L80:
 		givnum[*givptr + (givnum_dim1 << 1)] = *c__;
 		givnum[*givptr + givnum_dim1] = *s;
 	    }
-	    drot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s);
-	    drot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s);
+	    srot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s);
+	    srot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s);
 	    --k2;
 	    idxp[k2] = jprev;
 	    jprev = j;
@@ -23060,32 +83009,32 @@ L100:
 */
 
     i__1 = n - *k;
-    dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
+    scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
 
 /*
        Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and
        VL(M).
 */
 
-    dsigma[1] = 0.;
-    hlftol = tol / 2.;
-    if (abs(dsigma[2]) <= hlftol) {
+    dsigma[1] = 0.f;
+    hlftol = tol / 2.f;
+    if (dabs(dsigma[2]) <= hlftol) {
 	dsigma[2] = hlftol;
     }
     if (m > n) {
-	z__[1] = dlapy2_(&z1, &z__[m]);
+	z__[1] = slapy2_(&z1, &z__[m]);
 	if (z__[1] <= tol) {
-	    *c__ = 1.;
-	    *s = 0.;
+	    *c__ = 1.f;
+	    *s = 0.f;
 	    z__[1] = tol;
 	} else {
 	    *c__ = z1 / z__[1];
 	    *s = -z__[m] / z__[1];
 	}
-	drot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s);
-	drot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s);
+	srot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s);
+	srot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s);
     } else {
-	if (abs(z1) <= tol) {
+	if (dabs(z1) <= tol) {
 	    z__[1] = tol;
 	} else {
 	    z__[1] = z1;
@@ -23095,51 +83044,48 @@ L100:
 /*     Restore Z, VF, and VL. */
 
     i__1 = *k - 1;
-    dcopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1);
+    scopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1);
     i__1 = n - 1;
-    dcopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1);
+    scopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1);
     i__1 = n - 1;
-    dcopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1);
+    scopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1);
 
     return 0;
 
-/*     End of DLASD7 */
+/*     End of SLASD7 */
 
-} /* dlasd7_ */
+} /* slasd7_ */
 
-/* Subroutine */ int dlasd8_(integer *icompq, integer *k, doublereal *d__,
-	doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl,
-	doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal *
-	work, integer *info)
+/* Subroutine */ int slasd8_(integer *icompq, integer *k, real *d__, real *
+	z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr,
+	real *dsigma, real *work, integer *info)
 {
     /* System generated locals */
     integer difr_dim1, difr_offset, i__1, i__2;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+    double sqrt(doublereal), r_sign(real *, real *);
 
     /* Local variables */
     static integer i__, j;
-    static doublereal dj, rho;
+    static real dj, rho;
     static integer iwk1, iwk2, iwk3;
-    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    static doublereal temp;
-    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    static real temp;
+    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
     static integer iwk2i, iwk3i;
-    static doublereal diflj, difrj, dsigj;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, integer *), dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dlaset_(char *, integer *, integer
-	    *, doublereal *, doublereal *, doublereal *, integer *),
-	    xerbla_(char *, integer *);
-    static doublereal dsigjp;
+    extern doublereal snrm2_(integer *, real *, integer *);
+    static real diflj, difrj, dsigj;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    extern doublereal slamc3_(real *, real *);
+    extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *,
+	    real *, real *, real *, real *, integer *), xerbla_(char *,
+	    integer *);
+    static real dsigjp;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *,
+	    real *, integer *);
 
 
 /*
@@ -23152,14 +83098,14 @@ L100:
     Purpose
     =======
 
-    DLASD8 finds the square roots of the roots of the secular equation,
+    SLASD8 finds the square roots of the roots of the secular equation,
     as defined by the values in DSIGMA and Z. It makes the appropriate
-    calls to DLASD4, and stores, for each  element in D, the distance
+    calls to SLASD4, and stores, for each  element in D, the distance
     to its two nearest poles (elements in DSIGMA). It also updates
     the arrays VF and VL, the first and last components of all the
     right singular vectors of the original bidiagonal matrix.
 
-    DLASD8 is called from DLASD6.
+    SLASD8 is called from SLASD6.
 
     Arguments
     =========
@@ -23172,31 +83118,31 @@ L100:
 
     K       (input) INTEGER
             The number of terms in the rational function to be solved
-            by DLASD4.  K >= 1.
+            by SLASD4.  K >= 1.
 
-    D       (output) DOUBLE PRECISION array, dimension ( K )
+    D       (output) REAL array, dimension ( K )
             On output, D contains the updated singular values.
 
-    Z       (input) DOUBLE PRECISION array, dimension ( K )
+    Z       (input) REAL array, dimension ( K )
             The first K elements of this array contain the components
             of the deflation-adjusted updating row vector.
 
-    VF      (input/output) DOUBLE PRECISION array, dimension ( K )
+    VF      (input/output) REAL array, dimension ( K )
             On entry, VF contains  information passed through DBEDE8.
             On exit, VF contains the first K components of the first
             components of all right singular vectors of the bidiagonal
             matrix.
 
-    VL      (input/output) DOUBLE PRECISION array, dimension ( K )
+    VL      (input/output) REAL array, dimension ( K )
             On entry, VL contains  information passed through DBEDE8.
             On exit, VL contains the first K components of the last
             components of all right singular vectors of the bidiagonal
             matrix.
 
-    DIFL    (output) DOUBLE PRECISION array, dimension ( K )
+    DIFL    (output) REAL array, dimension ( K )
             On exit, DIFL(I) = D(I) - DSIGMA(I).
 
-    DIFR    (output) DOUBLE PRECISION array,
+    DIFR    (output) REAL array,
                      dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and
                      dimension ( K ) if ICOMPQ = 0.
             On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not
@@ -23208,12 +83154,12 @@ L100:
     LDDIFR  (input) INTEGER
             The leading dimension of DIFR, must be at least K.
 
-    DSIGMA  (input) DOUBLE PRECISION array, dimension ( K )
+    DSIGMA  (input) REAL array, dimension ( K )
             The first K elements of this array contain the old roots
             of the deflated updating problem.  These are the poles
             of the secular equation.
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension at least 3 * K
+    WORK    (workspace) REAL array, dimension at least 3 * K
 
     INFO    (output) INTEGER
             = 0:  successful exit.
@@ -23257,18 +83203,18 @@ L100:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLASD8", &i__1);
+	xerbla_("SLASD8", &i__1);
 	return 0;
     }
 
 /*     Quick return if possible */
 
     if (*k == 1) {
-	d__[1] = abs(z__[1]);
+	d__[1] = dabs(z__[1]);
 	difl[1] = d__[1];
 	if (*icompq == 1) {
-	    difl[2] = 1.;
-	    difr[(difr_dim1 << 1) + 1] = 1.;
+	    difl[2] = 1.f;
+	    difr[(difr_dim1 << 1) + 1] = 1.f;
 	}
 	return 0;
     }
@@ -23294,7 +83240,7 @@ L100:
 
     i__1 = *k;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
+	dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
 /* L10: */
     }
 
@@ -23308,13 +83254,13 @@ L100:
 
 /*     Normalize Z. */
 
-    rho = dnrm2_(k, &z__[1], &c__1);
-    dlascl_("G", &c__0, &c__0, &rho, &c_b15, k, &c__1, &z__[1], k, info);
+    rho = snrm2_(k, &z__[1], &c__1);
+    slascl_("G", &c__0, &c__0, &rho, &c_b871, k, &c__1, &z__[1], k, info);
     rho *= rho;
 
 /*     Initialize WORK(IWK3). */
 
-    dlaset_("A", k, &c__1, &c_b15, &c_b15, &work[iwk3], k);
+    slaset_("A", k, &c__1, &c_b871, &c_b871, &work[iwk3], k);
 
 /*
        Compute the updated singular values, the arrays DIFL, DIFR,
@@ -23323,7 +83269,7 @@ L100:
 
     i__1 = *k;
     for (j = 1; j <= i__1; ++j) {
-	dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[
+	slasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[
 		iwk2], info);
 
 /*        If the root finder fails, the computation is terminated. */
@@ -23355,8 +83301,8 @@ L100:
 
     i__1 = *k;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1)));
-	z__[i__] = d_sign(&d__2, &z__[i__]);
+	r__2 = sqrt((r__1 = work[iwk3i + i__], dabs(r__1)));
+	z__[i__] = r_sign(&r__2, &z__[i__]);
 /* L50: */
     }
 
@@ -23374,40 +83320,39 @@ L100:
 	work[j] = -z__[j] / diflj / (dsigma[j] + dj);
 	i__2 = j - 1;
 	for (i__ = 1; i__ <= i__2; ++i__) {
-	    work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigj) - diflj) / (
+	    work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigj) - diflj) / (
 		    dsigma[i__] + dj);
 /* L60: */
 	}
 	i__2 = *k;
 	for (i__ = j + 1; i__ <= i__2; ++i__) {
-	    work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigjp) + difrj) /
+	    work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigjp) + difrj) /
 		    (dsigma[i__] + dj);
 /* L70: */
 	}
-	temp = dnrm2_(k, &work[1], &c__1);
-	work[iwk2i + j] = ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp;
-	work[iwk3i + j] = ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp;
+	temp = snrm2_(k, &work[1], &c__1);
+	work[iwk2i + j] = sdot_(k, &work[1], &c__1, &vf[1], &c__1) / temp;
+	work[iwk3i + j] = sdot_(k, &work[1], &c__1, &vl[1], &c__1) / temp;
 	if (*icompq == 1) {
 	    difr[j + (difr_dim1 << 1)] = temp;
 	}
 /* L80: */
     }
 
-    dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1);
-    dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1);
+    scopy_(k, &work[iwk2], &c__1, &vf[1], &c__1);
+    scopy_(k, &work[iwk3], &c__1, &vl[1], &c__1);
 
     return 0;
 
-/*     End of DLASD8 */
+/*     End of SLASD8 */
 
-} /* dlasd8_ */
+} /* slasd8_ */
 
-/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n,
-	integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer
-	*ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr,
-	doublereal *z__, doublereal *poles, integer *givptr, integer *givcol,
-	integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__,
-	doublereal *s, doublereal *work, integer *iwork, integer *info)
+/* Subroutine */ int slasda_(integer *icompq, integer *smlsiz, integer *n,
+	integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt,
+	integer *k, real *difl, real *difr, real *z__, real *poles, integer *
+	givptr, integer *givcol, integer *ldgcol, integer *perm, real *givnum,
+	 real *c__, real *s, real *work, integer *iwork, integer *info)
 {
     /* System generated locals */
     integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1,
@@ -23421,27 +83366,24 @@ L100:
     /* Local variables */
     static integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc,
 	    nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1;
-    static doublereal beta;
+    static real beta;
     static integer idxq, nlvl;
-    static doublereal alpha;
-    static integer inode, ndiml, ndimr, idxqi, itemp;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer sqrei;
-    extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     doublereal *, integer *, integer *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
-	     doublereal *, integer *, integer *);
+    static real alpha;
+    static integer inode, ndiml, ndimr, idxqi, itemp, sqrei;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), slasd6_(integer *, integer *, integer *, integer *,
+	    real *, real *, real *, real *, real *, integer *, integer *,
+	    integer *, integer *, integer *, real *, integer *, real *, real *
+	    , real *, real *, integer *, real *, real *, real *, integer *,
+	    integer *);
     static integer nwork1, nwork2;
-    extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer
-	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
-	     integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlasdt_(integer *, integer *,
-	    integer *, integer *, integer *, integer *, integer *), dlaset_(
-	    char *, integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), slasdq_(
+	    char *, integer *, integer *, integer *, integer *, integer *,
+	    real *, real *, real *, integer *, real *, integer *, real *,
+	    integer *, real *, integer *), slasdt_(integer *, integer
+	    *, integer *, integer *, integer *, integer *, integer *),
+	    slaset_(char *, integer *, integer *, real *, real *, real *,
+	    integer *);
     static integer smlszp;
 
 
@@ -23455,14 +83397,14 @@ L100:
     Purpose
     =======
 
-    Using a divide and conquer approach, DLASDA computes the singular
+    Using a divide and conquer approach, SLASDA computes the singular
     value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
     B with diagonal D and offdiagonal E, where M = N + SQRE. The
     algorithm computes the singular values in the SVD B = U * S * VT.
     The orthogonal matrices U and VT are optionally computed in
     compact form.
 
-    A related subroutine, DLASD0, computes the singular values and
+    A related subroutine, SLASD0, computes the singular values and
     the singular vectors in explicit form.
 
     Arguments
@@ -23488,15 +83430,15 @@ L100:
            = 0: The bidiagonal matrix has column dimension M = N;
            = 1: The bidiagonal matrix has column dimension M = N + 1.
 
-    D      (input/output) DOUBLE PRECISION array, dimension ( N )
+    D      (input/output) REAL array, dimension ( N )
            On entry D contains the main diagonal of the bidiagonal
            matrix. On exit D, if INFO = 0, contains its singular values.
 
-    E      (input) DOUBLE PRECISION array, dimension ( M-1 )
+    E      (input) REAL array, dimension ( M-1 )
            Contains the subdiagonal entries of the bidiagonal matrix.
            On exit, E has been destroyed.
 
-    U      (output) DOUBLE PRECISION array,
+    U      (output) REAL array,
            dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced
            if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left
            singular vector matrices of all subproblems at the bottom
@@ -23506,7 +83448,7 @@ L100:
            The leading dimension of arrays U, VT, DIFL, DIFR, POLES,
            GIVNUM, and Z.
 
-    VT     (output) DOUBLE PRECISION array,
+    VT     (output) REAL array,
            dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced
            if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right
            singular vector matrices of all subproblems at the bottom
@@ -23517,26 +83459,26 @@ L100:
            If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th
            secular equation on the computation tree.
 
-    DIFL   (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ),
+    DIFL   (output) REAL array, dimension ( LDU, NLVL ),
            where NLVL = floor(log_2 (N/SMLSIZ))).
 
-    DIFR   (output) DOUBLE PRECISION array,
+    DIFR   (output) REAL array,
                     dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and
                     dimension ( N ) if ICOMPQ = 0.
            If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1)
            record distances between singular values on the I-th
            level and singular values on the (I -1)-th level, and
            DIFR(1:N, 2 * I ) contains the normalizing factors for
-           the right singular vector matrix. See DLASD8 for details.
+           the right singular vector matrix. See SLASD8 for details.
 
-    Z      (output) DOUBLE PRECISION array,
+    Z      (output) REAL array,
                     dimension ( LDU, NLVL ) if ICOMPQ = 1 and
                     dimension ( N ) if ICOMPQ = 0.
            The first K elements of Z(1, I) contain the components of
            the deflation-adjusted updating row vector for subproblems
            on the I-th level.
 
-    POLES  (output) DOUBLE PRECISION array,
+    POLES  (output) REAL array,
            dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced
            if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and
            POLES(1, 2*I) contain  the new and old singular values
@@ -23563,26 +83505,26 @@ L100:
            if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records
            permutations done on the I-th level of the computation tree.
 
-    GIVNUM (output) DOUBLE PRECISION array,
+    GIVNUM (output) REAL array,
            dimension ( LDU,  2 * NLVL ) if ICOMPQ = 1, and not
            referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
            GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S-
            values of Givens rotations performed on the I-th level on
            the computation tree.
 
-    C      (output) DOUBLE PRECISION array,
+    C      (output) REAL array,
            dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0.
            If ICOMPQ = 1 and the I-th subproblem is not square, on exit,
            C( I ) contains the C-value of a Givens rotation related to
            the right null space of the I-th subproblem.
 
-    S      (output) DOUBLE PRECISION array, dimension ( N ) if
+    S      (output) REAL array, dimension ( N ) if
            ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1
            and the I-th subproblem is not square, on exit, S( I )
            contains the S-value of a Givens rotation related to
            the right null space of the I-th subproblem.
 
-    WORK   (workspace) DOUBLE PRECISION array, dimension
+    WORK   (workspace) REAL array, dimension
            (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)).
 
     IWORK  (workspace) INTEGER array.
@@ -23661,21 +83603,21 @@ L100:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLASDA", &i__1);
+	xerbla_("SLASDA", &i__1);
 	return 0;
     }
 
     m = *n + *sqre;
 
-/*     If the input matrix is too small, call DLASDQ to find the SVD. */
+/*     If the input matrix is too small, call SLASDQ to find the SVD. */
 
     if (*n <= *smlsiz) {
 	if (*icompq == 0) {
-	    dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
+	    slasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
 		    vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, &
 		    work[1], info);
 	} else {
-	    dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
+	    slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
 		    , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1],
 		    info);
 	}
@@ -23699,12 +83641,12 @@ L100:
     nwork1 = vl + m;
     nwork2 = nwork1 + smlszp * smlszp;
 
-    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+    slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
 	    smlsiz);
 
 /*
        for the nodes on bottom level of the tree, solve
-       their subproblems by DLASDQ.
+       their subproblems by SLASDQ.
 */
 
     ndb1 = (nd + 1) / 2;
@@ -23731,22 +83673,23 @@ L100:
 	vli = vl + nlf - 1;
 	sqrei = 1;
 	if (*icompq == 0) {
-	    dlaset_("A", &nlp1, &nlp1, &c_b29, &c_b15, &work[nwork1], &smlszp);
-	    dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], &
+	    slaset_("A", &nlp1, &nlp1, &c_b1101, &c_b871, &work[nwork1], &
+		    smlszp);
+	    slasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], &
 		    work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2],
 		    &nl, &work[nwork2], info);
 	    itemp = nwork1 + nl * smlszp;
-	    dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1);
-	    dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1);
+	    scopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1);
+	    scopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1);
 	} else {
-	    dlaset_("A", &nl, &nl, &c_b29, &c_b15, &u[nlf + u_dim1], ldu);
-	    dlaset_("A", &nlp1, &nlp1, &c_b29, &c_b15, &vt[nlf + vt_dim1],
+	    slaset_("A", &nl, &nl, &c_b1101, &c_b871, &u[nlf + u_dim1], ldu);
+	    slaset_("A", &nlp1, &nlp1, &c_b1101, &c_b871, &vt[nlf + vt_dim1],
 		    ldu);
-	    dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &
+	    slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &
 		    vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf +
 		    u_dim1], ldu, &work[nwork1], info);
-	    dcopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1);
-	    dcopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1)
+	    scopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1);
+	    scopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1)
 		    ;
 	}
 	if (*info != 0) {
@@ -23767,22 +83710,23 @@ L100:
 	vli += nlp1;
 	nrp1 = nr + sqrei;
 	if (*icompq == 0) {
-	    dlaset_("A", &nrp1, &nrp1, &c_b29, &c_b15, &work[nwork1], &smlszp);
-	    dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], &
+	    slaset_("A", &nrp1, &nrp1, &c_b1101, &c_b871, &work[nwork1], &
+		    smlszp);
+	    slasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], &
 		    work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2],
 		    &nr, &work[nwork2], info);
 	    itemp = nwork1 + (nrp1 - 1) * smlszp;
-	    dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1);
-	    dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1);
+	    scopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1);
+	    scopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1);
 	} else {
-	    dlaset_("A", &nr, &nr, &c_b29, &c_b15, &u[nrf + u_dim1], ldu);
-	    dlaset_("A", &nrp1, &nrp1, &c_b29, &c_b15, &vt[nrf + vt_dim1],
+	    slaset_("A", &nr, &nr, &c_b1101, &c_b871, &u[nrf + u_dim1], ldu);
+	    slaset_("A", &nrp1, &nrp1, &c_b1101, &c_b871, &vt[nrf + vt_dim1],
 		    ldu);
-	    dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &
+	    slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &
 		    vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf +
 		    u_dim1], ldu, &work[nwork1], info);
-	    dcopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1);
-	    dcopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1)
+	    scopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1);
+	    scopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1)
 		    ;
 	}
 	if (*info != 0) {
@@ -23834,7 +83778,7 @@ L100:
 	    alpha = d__[ic];
 	    beta = e[ic];
 	    if (*icompq == 0) {
-		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
+		slasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
 			work[vli], &alpha, &beta, &iwork[idxqi], &perm[
 			perm_offset], &givptr[1], &givcol[givcol_offset],
 			ldgcol, &givnum[givnum_offset], ldu, &poles[
@@ -23843,7 +83787,7 @@ L100:
 			 &iwork[iwk], info);
 	    } else {
 		--j;
-		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
+		slasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
 			work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf +
 			lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 *
 			givcol_dim1], ldgcol, &givnum[nlf + lvl2 *
@@ -23862,14 +83806,14 @@ L100:
 
     return 0;
 
-/*     End of DLASDA */
+/*     End of SLASDA */
 
-} /* dlasda_ */
+} /* slasda_ */
 
-/* Subroutine */ int dlasdq_(char *uplo, integer *sqre, integer *n, integer *
-	ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e,
-	doublereal *vt, integer *ldvt, doublereal *u, integer *ldu,
-	doublereal *c__, integer *ldc, doublereal *work, integer *info)
+/* Subroutine */ int slasdq_(char *uplo, integer *sqre, integer *n, integer *
+	ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt,
+	integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real *
+	work, integer *info)
 {
     /* System generated locals */
     integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
@@ -23877,22 +83821,21 @@ L100:
 
     /* Local variables */
     static integer i__, j;
-    static doublereal r__, cs, sn;
+    static real r__, cs, sn;
     static integer np1, isub;
-    static doublereal smin;
+    static real smin;
     static integer sqre1;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *
-	    , doublereal *, integer *);
+    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *,
+	    integer *, real *, real *, real *, integer *);
     static integer iuplo;
-    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *), xerbla_(char *,
-	    integer *), dbdsqr_(char *, integer *, integer *, integer
-	    *, integer *, doublereal *, doublereal *, doublereal *, integer *,
-	     doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *);
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
+	    integer *), xerbla_(char *, integer *), slartg_(real *,
+	    real *, real *, real *, real *);
     static logical rotate;
+    extern /* Subroutine */ int sbdsqr_(char *, integer *, integer *, integer
+	    *, integer *, real *, real *, real *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *);
 
 
 /*
@@ -23905,7 +83848,7 @@ L100:
     Purpose
     =======
 
-    DLASDQ computes the singular value decomposition (SVD) of a real
+    SLASDQ computes the singular value decomposition (SVD) of a real
     (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
     E, accumulating the transformations if desired. Letting B denote
     the input bidiagonal matrix, the algorithm computes orthogonal
@@ -23955,12 +83898,12 @@ L100:
           On entry, NCC specifies the number of columns of
           the matrix C. NCC must be at least 0.
 
-    D     (input/output) DOUBLE PRECISION array, dimension (N)
+    D     (input/output) REAL array, dimension (N)
           On entry, D contains the diagonal entries of the
           bidiagonal matrix whose SVD is desired. On normal exit,
           D contains the singular values in ascending order.
 
-    E     (input/output) DOUBLE PRECISION array.
+    E     (input/output) REAL array.
           dimension is (N-1) if SQRE = 0 and N if SQRE = 1.
           On entry, the entries of E contain the offdiagonal entries
           of the bidiagonal matrix whose SVD is desired. On normal
@@ -23969,7 +83912,7 @@ L100:
           of a bidiagonal matrix orthogonally equivalent to the one
           given as input.
 
-    VT    (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
+    VT    (input/output) REAL array, dimension (LDVT, NCVT)
           On entry, contains a matrix which on exit has been
           premultiplied by P', dimension N-by-NCVT if SQRE = 0
           and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0).
@@ -23979,7 +83922,7 @@ L100:
           declared in the calling (sub) program. LDVT must be at
           least 1. If NCVT is nonzero LDVT must also be at least N.
 
-    U     (input/output) DOUBLE PRECISION array, dimension (LDU, N)
+    U     (input/output) REAL array, dimension (LDU, N)
           On entry, contains a  matrix which on exit has been
           postmultiplied by Q, dimension NRU-by-N if SQRE = 0
           and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0).
@@ -23989,7 +83932,7 @@ L100:
           declared in the calling (sub) program. LDU must be at
           least max( 1, NRU ) .
 
-    C     (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
+    C     (input/output) REAL array, dimension (LDC, NCC)
           On entry, contains an N-by-NCC matrix which on exit
           has been premultiplied by Q'  dimension N-by-NCC if SQRE = 0
           and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0).
@@ -23999,7 +83942,7 @@ L100:
           declared in the calling (sub) program. LDC must be at
           least 1. If NCC is nonzero, LDC must also be at least N.
 
-    WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)
+    WORK  (workspace) REAL array, dimension (4*N)
           Workspace. Only referenced if one of NCVT, NRU, or NCC is
           nonzero, and if N is at least 2.
 
@@ -24066,7 +84009,7 @@ L100:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLASDQ", &i__1);
+	xerbla_("SLASDQ", &i__1);
 	return 0;
     }
     if (*n == 0) {
@@ -24087,7 +84030,7 @@ L100:
     if (iuplo == 1 && sqre1 == 1) {
 	i__1 = *n - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
-	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
 	    d__[i__] = r__;
 	    e[i__] = sn * d__[i__ + 1];
 	    d__[i__ + 1] = cs * d__[i__ + 1];
@@ -24097,9 +84040,9 @@ L100:
 	    }
 /* L10: */
 	}
-	dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
+	slartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
 	d__[*n] = r__;
-	e[*n] = 0.;
+	e[*n] = 0.f;
 	if (rotate) {
 	    work[*n] = cs;
 	    work[*n + *n] = sn;
@@ -24110,7 +84053,7 @@ L100:
 /*        Update singular vectors if desired. */
 
 	if (*ncvt > 0) {
-	    dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[
+	    slasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[
 		    vt_offset], ldvt);
 	}
     }
@@ -24123,7 +84066,7 @@ L100:
     if (iuplo == 2) {
 	i__1 = *n - 1;
 	for (i__ = 1; i__ <= i__1; ++i__) {
-	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
 	    d__[i__] = r__;
 	    e[i__] = sn * d__[i__ + 1];
 	    d__[i__ + 1] = cs * d__[i__ + 1];
@@ -24140,7 +84083,7 @@ L100:
 */
 
 	if (sqre1 == 1) {
-	    dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
+	    slartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
 	    d__[*n] = r__;
 	    if (rotate) {
 		work[*n] = cs;
@@ -24152,30 +84095,30 @@ L100:
 
 	if (*nru > 0) {
 	    if (sqre1 == 0) {
-		dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[
+		slasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[
 			u_offset], ldu);
 	    } else {
-		dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[
+		slasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[
 			u_offset], ldu);
 	    }
 	}
 	if (*ncc > 0) {
 	    if (sqre1 == 0) {
-		dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[
+		slasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[
 			c_offset], ldc);
 	    } else {
-		dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[
+		slasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[
 			c_offset], ldc);
 	    }
 	}
     }
 
 /*
-       Call DBDSQR to compute the SVD of the reduced real
+       Call SBDSQR to compute the SVD of the reduced real
        N-by-N upper bidiagonal matrix.
 */
 
-    dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[
+    sbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[
 	    u_offset], ldu, &c__[c_offset], ldc, &work[1], info);
 
 /*
@@ -24205,15 +84148,15 @@ L100:
 	    d__[isub] = d__[i__];
 	    d__[i__] = smin;
 	    if (*ncvt > 0) {
-		dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1],
+		sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1],
 			ldvt);
 	    }
 	    if (*nru > 0) {
-		dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1]
+		sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1]
 			, &c__1);
 	    }
 	    if (*ncc > 0) {
-		dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc)
+		sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc)
 			;
 	    }
 	}
@@ -24222,11 +84165,11 @@ L100:
 
     return 0;
 
-/*     End of DLASDQ */
+/*     End of SLASDQ */
 
-} /* dlasdq_ */
+} /* slasdq_ */
 
-/* Subroutine */ int dlasdt_(integer *n, integer *lvl, integer *nd, integer *
+/* Subroutine */ int slasdt_(integer *n, integer *lvl, integer *nd, integer *
 	inode, integer *ndiml, integer *ndimr, integer *msub)
 {
     /* System generated locals */
@@ -24237,7 +84180,7 @@ L100:
 
     /* Local variables */
     static integer i__, il, ir, maxn;
-    static doublereal temp;
+    static real temp;
     static integer nlvl, llst, ncrnt;
 
 
@@ -24245,13 +84188,13 @@ L100:
     -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+       October 31, 1999
 
 
     Purpose
     =======
 
-    DLASDT creates a tree of subproblems for bidiagonal divide and
+    SLASDT creates a tree of subproblems for bidiagonal divide and
     conquer.
 
     Arguments
@@ -24300,7 +84243,7 @@ L100:
 
     /* Function Body */
     maxn = max(1,*n);
-    temp = log((doublereal) maxn / (doublereal) (*msub + 1)) / log(2.);
+    temp = log((real) maxn / (real) (*msub + 1)) / log(2.f);
     *lvl = (integer) temp + 1;
 
     i__ = *n / 2;
@@ -24338,12 +84281,12 @@ L100:
 
     return 0;
 
-/*     End of DLASDT */
+/*     End of SLASDT */
 
-} /* dlasdt_ */
+} /* slasdt_ */
 
-/* Subroutine */ int dlaset_(char *uplo, integer *m, integer *n, doublereal *
-	alpha, doublereal *beta, doublereal *a, integer *lda)
+/* Subroutine */ int slaset_(char *uplo, integer *m, integer *n, real *alpha,
+	real *beta, real *a, integer *lda)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
@@ -24363,7 +84306,7 @@ L100:
     Purpose
     =======
 
-    DLASET initializes an m-by-n matrix A to BETA on the diagonal and
+    SLASET initializes an m-by-n matrix A to BETA on the diagonal and
     ALPHA on the offdiagonals.
 
     Arguments
@@ -24383,13 +84326,13 @@ L100:
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    ALPHA   (input) DOUBLE PRECISION
+    ALPHA   (input) REAL
             The constant to which the offdiagonal elements are to be set.
 
-    BETA    (input) DOUBLE PRECISION
+    BETA    (input) REAL
             The constant to which the diagonal elements are to be set.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On exit, the leading m-by-n submatrix of A is set as follows:
 
             if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n,
@@ -24472,39 +84415,36 @@ L100:
 
     return 0;
 
-/*     End of DLASET */
+/*     End of SLASET */
 
-} /* dlaset_ */
+} /* slaset_ */
 
-/* Subroutine */ int dlasq1_(integer *n, doublereal *d__, doublereal *e,
-	doublereal *work, integer *info)
+/* Subroutine */ int slasq1_(integer *n, real *d__, real *e, real *work,
+	integer *info)
 {
     /* System generated locals */
     integer i__1, i__2;
-    doublereal d__1, d__2, d__3;
+    real r__1, r__2, r__3;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
     static integer i__;
-    static doublereal eps;
-    extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal
-	    *, doublereal *, doublereal *);
-    static doublereal scale;
+    static real eps;
+    extern /* Subroutine */ int slas2_(real *, real *, real *, real *, real *)
+	    ;
+    static real scale;
     static integer iinfo;
-    static doublereal sigmn;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static doublereal sigmx;
-    extern /* Subroutine */ int dlasq2_(integer *, doublereal *, integer *);
-
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *);
-    static doublereal safmin;
-    extern /* Subroutine */ int xerbla_(char *, integer *), dlasrt_(
-	    char *, integer *, doublereal *, integer *);
+    static real sigmn, sigmx;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), slasq2_(integer *, real *, integer *);
+    extern doublereal slamch_(char *);
+    static real safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *), slascl_(
+	    char *, integer *, integer *, real *, real *, integer *, integer *
+	    , real *, integer *, integer *), slasrt_(char *, integer *
+	    , real *, integer *);
 
 
 /*
@@ -24517,7 +84457,7 @@ L100:
     Purpose
     =======
 
-    DLASQ1 computes the singular values of a real N-by-N bidiagonal
+    SLASQ1 computes the singular values of a real N-by-N bidiagonal
     matrix with diagonal D and off-diagonal E. The singular values
     are computed to high relative accuracy, in the absence of
     denormalization, underflow and overflow. The algorithm was first
@@ -24536,17 +84476,17 @@ L100:
     N     (input) INTEGER
           The number of rows and columns in the matrix. N >= 0.
 
-    D     (input/output) DOUBLE PRECISION array, dimension (N)
+    D     (input/output) REAL array, dimension (N)
           On entry, D contains the diagonal elements of the
           bidiagonal matrix whose SVD is desired. On normal exit,
           D contains the singular values in decreasing order.
 
-    E     (input/output) DOUBLE PRECISION array, dimension (N)
+    E     (input/output) REAL array, dimension (N)
           On entry, elements E(1:N-1) contain the off-diagonal elements
           of the bidiagonal matrix whose SVD is desired.
           On exit, E is overwritten.
 
-    WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)
+    WORK  (workspace) REAL array, dimension (4*N)
 
     INFO  (output) INTEGER
           = 0: successful exit
@@ -24572,15 +84512,15 @@ L100:
     if (*n < 0) {
 	*info = -2;
 	i__1 = -(*info);
-	xerbla_("DLASQ1", &i__1);
+	xerbla_("SLASQ1", &i__1);
 	return 0;
     } else if (*n == 0) {
 	return 0;
     } else if (*n == 1) {
-	d__[1] = abs(d__[1]);
+	d__[1] = dabs(d__[1]);
 	return 0;
     } else if (*n == 2) {
-	dlas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx);
+	slas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx);
 	d__[1] = sigmx;
 	d__[2] = sigmn;
 	return 0;
@@ -24588,29 +84528,29 @@ L100:
 
 /*     Estimate the largest singular value. */
 
-    sigmx = 0.;
+    sigmx = 0.f;
     i__1 = *n - 1;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	d__[i__] = (d__1 = d__[i__], abs(d__1));
+	d__[i__] = (r__1 = d__[i__], dabs(r__1));
 /* Computing MAX */
-	d__2 = sigmx, d__3 = (d__1 = e[i__], abs(d__1));
-	sigmx = max(d__2,d__3);
+	r__2 = sigmx, r__3 = (r__1 = e[i__], dabs(r__1));
+	sigmx = dmax(r__2,r__3);
 /* L10: */
     }
-    d__[*n] = (d__1 = d__[*n], abs(d__1));
+    d__[*n] = (r__1 = d__[*n], dabs(r__1));
 
 /*     Early return if SIGMX is zero (matrix is already diagonal). */
 
-    if (sigmx == 0.) {
-	dlasrt_("D", n, &d__[1], &iinfo);
+    if (sigmx == 0.f) {
+	slasrt_("D", n, &d__[1], &iinfo);
 	return 0;
     }
 
     i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing MAX */
-	d__1 = sigmx, d__2 = d__[i__];
-	sigmx = max(d__1,d__2);
+	r__1 = sigmx, r__2 = d__[i__];
+	sigmx = dmax(r__1,r__2);
 /* L20: */
     }
 
@@ -24619,15 +84559,15 @@ L100:
        input data makes scaling by a power of the radix pointless).
 */
 
-    eps = PRECISION;
-    safmin = SAFEMINIMUM;
+    eps = slamch_("Precision");
+    safmin = slamch_("Safe minimum");
     scale = sqrt(eps / safmin);
-    dcopy_(n, &d__[1], &c__1, &work[1], &c__2);
+    scopy_(n, &d__[1], &c__1, &work[1], &c__2);
     i__1 = *n - 1;
-    dcopy_(&i__1, &e[1], &c__1, &work[2], &c__2);
+    scopy_(&i__1, &e[1], &c__1, &work[2], &c__2);
     i__1 = (*n << 1) - 1;
     i__2 = (*n << 1) - 1;
-    dlascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2,
+    slascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2,
 	    &iinfo);
 
 /*     Compute the q's and e's. */
@@ -24635,13 +84575,13 @@ L100:
     i__1 = (*n << 1) - 1;
     for (i__ = 1; i__ <= i__1; ++i__) {
 /* Computing 2nd power */
-	d__1 = work[i__];
-	work[i__] = d__1 * d__1;
+	r__1 = work[i__];
+	work[i__] = r__1 * r__1;
 /* L30: */
     }
-    work[*n * 2] = 0.;
+    work[*n * 2] = 0.f;
 
-    dlasq2_(n, &work[1], info);
+    slasq2_(n, &work[1], info);
 
     if (*info == 0) {
 	i__1 = *n;
@@ -24649,52 +84589,51 @@ L100:
 	    d__[i__] = sqrt(work[i__]);
 /* L40: */
 	}
-	dlascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, &
+	slascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, &
 		iinfo);
     }
 
     return 0;
 
-/*     End of DLASQ1 */
+/*     End of SLASQ1 */
 
-} /* dlasq1_ */
+} /* slasq1_ */
 
-/* Subroutine */ int dlasq2_(integer *n, doublereal *z__, integer *info)
+/* Subroutine */ int slasq2_(integer *n, real *z__, integer *info)
 {
     /* System generated locals */
     integer i__1, i__2, i__3;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static doublereal d__, e;
+    static real d__, e;
     static integer k;
-    static doublereal s, t;
+    static real s, t;
     static integer i0, i4, n0, pp;
-    static doublereal eps, tol;
+    static real eps, tol;
     static integer ipn4;
-    static doublereal tol2;
+    static real tol2;
     static logical ieee;
     static integer nbig;
-    static doublereal dmin__, emin, emax;
+    static real dmin__, emin, emax;
     static integer ndiv, iter;
-    static doublereal qmin, temp, qmax, zmax;
+    static real qmin, temp, qmax, zmax;
     static integer splt, nfail;
-    static doublereal desig, trace, sigma;
+    static real desig, trace, sigma;
     static integer iinfo;
-    extern /* Subroutine */ int dlasq3_(integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     integer *, integer *, integer *, logical *);
-
+    extern /* Subroutine */ int slasq3_(integer *, integer *, real *, integer
+	    *, real *, real *, real *, real *, integer *, integer *, integer *
+	    , logical *);
+    extern doublereal slamch_(char *);
     static integer iwhila, iwhilb;
-    static doublereal oldemn, safmin;
+    static real oldemn, safmin;
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
-	    integer *);
+    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
 
 
 /*
@@ -24707,7 +84646,7 @@ L100:
     Purpose
     =======
 
-    DLASQ2 computes all the eigenvalues of the symmetric positive
+    SLASQ2 computes all the eigenvalues of the symmetric positive
     definite tridiagonal matrix associated with the qd array Z to high
     relative accuracy are computed to high relative accuracy, in the
     absence of denormalization, underflow and overflow.
@@ -24718,10 +84657,10 @@ L100:
     Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
     symmetric tridiagonal to which it is similar.
 
-    Note : DLASQ2 defines a logical variable, IEEE, which is true
+    Note : SLASQ2 defines a logical variable, IEEE, which is true
     on machines which follow ieee-754 floating-point standard in their
     handling of infinities and NaNs, and false otherwise. This variable
-    is passed to DLASQ3.
+    is passed to SLASQ3.
 
     Arguments
     =========
@@ -24729,7 +84668,7 @@ L100:
     N     (input) INTEGER
           The number of rows and columns in the matrix. N >= 0.
 
-    Z     (workspace) DOUBLE PRECISION array, dimension ( 4*N )
+    Z     (workspace) REAL array, dimension ( 4*N )
           On entry Z holds the qd array. On exit, entries 1 to N hold
           the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
           trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
@@ -24760,7 +84699,7 @@ L100:
 
 
        Test the input arguments.
-       (in case DLASQ2 is not called by DLASQ1)
+       (in case SLASQ2 is not called by SLASQ1)
 */
 
     /* Parameter adjustments */
@@ -24768,16 +84707,16 @@ L100:
 
     /* Function Body */
     *info = 0;
-    eps = PRECISION;
-    safmin = SAFEMINIMUM;
-    tol = eps * 100.;
+    eps = slamch_("Precision");
+    safmin = slamch_("Safe minimum");
+    tol = eps * 100.f;
 /* Computing 2nd power */
-    d__1 = tol;
-    tol2 = d__1 * d__1;
+    r__1 = tol;
+    tol2 = r__1 * r__1;
 
     if (*n < 0) {
 	*info = -1;
-	xerbla_("DLASQ2", &c__1);
+	xerbla_("SLASQ2", &c__1);
 	return 0;
     } else if (*n == 0) {
 	return 0;
@@ -24785,18 +84724,18 @@ L100:
 
 /*        1-by-1 case. */
 
-	if (z__[1] < 0.) {
+	if (z__[1] < 0.f) {
 	    *info = -201;
-	    xerbla_("DLASQ2", &c__2);
+	    xerbla_("SLASQ2", &c__2);
 	}
 	return 0;
     } else if (*n == 2) {
 
 /*        2-by-2 case. */
 
-	if (z__[2] < 0. || z__[3] < 0.) {
+	if (z__[2] < 0.f || z__[3] < 0.f) {
 	    *info = -2;
-	    xerbla_("DLASQ2", &c__2);
+	    xerbla_("SLASQ2", &c__2);
 	    return 0;
 	} else if (z__[3] > z__[1]) {
 	    d__ = z__[3];
@@ -24805,10 +84744,10 @@ L100:
 	}
 	z__[5] = z__[1] + z__[2] + z__[3];
 	if (z__[2] > z__[3] * tol2) {
-	    t = (z__[1] - z__[3] + z__[2]) * .5;
+	    t = (z__[1] - z__[3] + z__[2]) * .5f;
 	    s = z__[3] * (z__[2] / t);
 	    if (s <= t) {
-		s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.)));
+		s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.f) + 1.f)));
 	    } else {
 		s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s)));
 	    }
@@ -24823,57 +84762,57 @@ L100:
 
 /*     Check for negative data and compute sums of q's and e's. */
 
-    z__[*n * 2] = 0.;
+    z__[*n * 2] = 0.f;
     emin = z__[2];
-    qmax = 0.;
-    zmax = 0.;
-    d__ = 0.;
-    e = 0.;
+    qmax = 0.f;
+    zmax = 0.f;
+    d__ = 0.f;
+    e = 0.f;
 
     i__1 = *n - 1 << 1;
     for (k = 1; k <= i__1; k += 2) {
-	if (z__[k] < 0.) {
+	if (z__[k] < 0.f) {
 	    *info = -(k + 200);
-	    xerbla_("DLASQ2", &c__2);
+	    xerbla_("SLASQ2", &c__2);
 	    return 0;
-	} else if (z__[k + 1] < 0.) {
+	} else if (z__[k + 1] < 0.f) {
 	    *info = -(k + 201);
-	    xerbla_("DLASQ2", &c__2);
+	    xerbla_("SLASQ2", &c__2);
 	    return 0;
 	}
 	d__ += z__[k];
 	e += z__[k + 1];
 /* Computing MAX */
-	d__1 = qmax, d__2 = z__[k];
-	qmax = max(d__1,d__2);
+	r__1 = qmax, r__2 = z__[k];
+	qmax = dmax(r__1,r__2);
 /* Computing MIN */
-	d__1 = emin, d__2 = z__[k + 1];
-	emin = min(d__1,d__2);
+	r__1 = emin, r__2 = z__[k + 1];
+	emin = dmin(r__1,r__2);
 /* Computing MAX */
-	d__1 = max(qmax,zmax), d__2 = z__[k + 1];
-	zmax = max(d__1,d__2);
+	r__1 = max(qmax,zmax), r__2 = z__[k + 1];
+	zmax = dmax(r__1,r__2);
 /* L10: */
     }
-    if (z__[(*n << 1) - 1] < 0.) {
+    if (z__[(*n << 1) - 1] < 0.f) {
 	*info = -((*n << 1) + 199);
-	xerbla_("DLASQ2", &c__2);
+	xerbla_("SLASQ2", &c__2);
 	return 0;
     }
     d__ += z__[(*n << 1) - 1];
 /* Computing MAX */
-    d__1 = qmax, d__2 = z__[(*n << 1) - 1];
-    qmax = max(d__1,d__2);
-    zmax = max(qmax,zmax);
+    r__1 = qmax, r__2 = z__[(*n << 1) - 1];
+    qmax = dmax(r__1,r__2);
+    zmax = dmax(qmax,zmax);
 
 /*     Check for diagonality. */
 
-    if (e == 0.) {
+    if (e == 0.f) {
 	i__1 = *n;
 	for (k = 2; k <= i__1; ++k) {
 	    z__[k] = z__[(k << 1) - 1];
 /* L20: */
 	}
-	dlasrt_("D", n, &z__[1], &iinfo);
+	slasrt_("D", n, &z__[1], &iinfo);
 	z__[(*n << 1) - 1] = d__;
 	return 0;
     }
@@ -24882,23 +84821,23 @@ L100:
 
 /*     Check for zero data. */
 
-    if (trace == 0.) {
-	z__[(*n << 1) - 1] = 0.;
+    if (trace == 0.f) {
+	z__[(*n << 1) - 1] = 0.f;
 	return 0;
     }
 
 /*     Check whether the machine is IEEE conformable. */
 
-    ieee = ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)
-	    6, (ftnlen)1) == 1 && ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2,
+    ieee = ilaenv_(&c__10, "SLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)
+	    6, (ftnlen)1) == 1 && ilaenv_(&c__11, "SLASQ2", "N", &c__1, &c__2,
 	     &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1;
 
 /*     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
 
     for (k = *n << 1; k >= 2; k += -2) {
-	z__[k * 2] = 0.;
+	z__[k * 2] = 0.f;
 	z__[(k << 1) - 1] = z__[k];
-	z__[(k << 1) - 2] = 0.;
+	z__[(k << 1) - 2] = 0.f;
 	z__[(k << 1) - 3] = z__[k - 1];
 /* L30: */
     }
@@ -24908,7 +84847,7 @@ L100:
 
 /*     Reverse the qd-array, if warranted. */
 
-    if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) {
+    if (z__[(i0 << 2) - 3] * 1.5f < z__[(n0 << 2) - 3]) {
 	ipn4 = i0 + n0 << 2;
 	i__1 = i0 + n0 - 1 << 1;
 	for (i4 = i0 << 2; i4 <= i__1; i4 += 4) {
@@ -24932,7 +84871,7 @@ L100:
 	i__1 = (i0 << 2) + pp;
 	for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) {
 	    if (z__[i4 - 1] <= tol2 * d__) {
-		z__[i4 - 1] = -0.;
+		z__[i4 - 1] = -0.f;
 		d__ = z__[i4 - 3];
 	    } else {
 		d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
@@ -24948,9 +84887,9 @@ L100:
 	for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) {
 	    z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1];
 	    if (z__[i4 - 1] <= tol2 * d__) {
-		z__[i4 - 1] = -0.;
+		z__[i4 - 1] = -0.f;
 		z__[i4 - (pp << 1) - 2] = d__;
-		z__[i4 - (pp << 1)] = 0.;
+		z__[i4 - (pp << 1)] = 0.f;
 		d__ = z__[i4 + 1];
 	    } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] &&
 		    safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) {
@@ -24963,8 +84902,8 @@ L100:
 		d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]);
 	    }
 /* Computing MIN */
-	    d__1 = emin, d__2 = z__[i4 - (pp << 1)];
-	    emin = min(d__1,d__2);
+	    r__1 = emin, r__2 = z__[i4 - (pp << 1)];
+	    emin = dmin(r__1,r__2);
 /* L60: */
 	}
 	z__[(n0 << 2) - pp - 2] = d__;
@@ -24975,8 +84914,8 @@ L100:
 	i__1 = (n0 << 2) - pp - 2;
 	for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) {
 /* Computing MAX */
-	    d__1 = qmax, d__2 = z__[i4];
-	    qmax = max(d__1,d__2);
+	    r__1 = qmax, r__2 = z__[i4];
+	    qmax = dmax(r__1,r__2);
 /* L70: */
 	}
 
@@ -25003,13 +84942,13 @@ L100:
           splits from the rest of the array, but is negated.
 */
 
-	desig = 0.;
+	desig = 0.f;
 	if (n0 == *n) {
-	    sigma = 0.;
+	    sigma = 0.f;
 	} else {
 	    sigma = -z__[(n0 << 2) - 1];
 	}
-	if (sigma < 0.) {
+	if (sigma < 0.f) {
 	    *info = 1;
 	    return 0;
 	}
@@ -25019,32 +84958,32 @@ L100:
           EMIN. Find Gershgorin-type bound if Q's much greater than E's.
 */
 
-	emax = 0.;
+	emax = 0.f;
 	if (n0 > i0) {
-	    emin = (d__1 = z__[(n0 << 2) - 5], abs(d__1));
+	    emin = (r__1 = z__[(n0 << 2) - 5], dabs(r__1));
 	} else {
-	    emin = 0.;
+	    emin = 0.f;
 	}
 	qmin = z__[(n0 << 2) - 3];
 	qmax = qmin;
 	for (i4 = n0 << 2; i4 >= 8; i4 += -4) {
-	    if (z__[i4 - 5] <= 0.) {
+	    if (z__[i4 - 5] <= 0.f) {
 		goto L100;
 	    }
-	    if (qmin >= emax * 4.) {
+	    if (qmin >= emax * 4.f) {
 /* Computing MIN */
-		d__1 = qmin, d__2 = z__[i4 - 3];
-		qmin = min(d__1,d__2);
+		r__1 = qmin, r__2 = z__[i4 - 3];
+		qmin = dmin(r__1,r__2);
 /* Computing MAX */
-		d__1 = emax, d__2 = z__[i4 - 5];
-		emax = max(d__1,d__2);
+		r__1 = emax, r__2 = z__[i4 - 5];
+		emax = dmax(r__1,r__2);
 	    }
 /* Computing MAX */
-	    d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5];
-	    qmax = max(d__1,d__2);
+	    r__1 = qmax, r__2 = z__[i4 - 7] + z__[i4 - 5];
+	    qmax = dmax(r__1,r__2);
 /* Computing MIN */
-	    d__1 = emin, d__2 = z__[i4 - 5];
-	    emin = min(d__1,d__2);
+	    r__1 = emin, r__2 = z__[i4 - 5];
+	    emin = dmin(r__1,r__2);
 /* L90: */
 	}
 	i4 = 4;
@@ -25052,7 +84991,7 @@ L100:
 L100:
 	i0 = i4 / 4;
 
-/*        Store EMIN for passing to DLASQ3. */
+/*        Store EMIN for passing to SLASQ3. */
 
 	z__[(n0 << 2) - 1] = emin;
 
@@ -25061,8 +85000,8 @@ L100:
 
    Computing MAX
 */
-	d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax);
-	dmin__ = -max(d__1,d__2);
+	r__1 = 0.f, r__2 = qmin - sqrt(qmin) * 2.f * sqrt(emax);
+	dmin__ = -dmax(r__1,r__2);
 
 /*        Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */
 
@@ -25077,7 +85016,7 @@ L100:
 
 /*           While submatrix unfinished take a good dqds step. */
 
-	    dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
+	    slasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
 		    nfail, &iter, &ndiv, &ieee);
 
 	    pp = 1 - pp;
@@ -25097,19 +85036,19 @@ L100:
 				tol2 * sigma) {
 			    z__[i4 - 1] = -sigma;
 			    splt = i4 / 4;
-			    qmax = 0.;
+			    qmax = 0.f;
 			    emin = z__[i4 + 3];
 			    oldemn = z__[i4 + 4];
 			} else {
 /* Computing MAX */
-			    d__1 = qmax, d__2 = z__[i4 + 1];
-			    qmax = max(d__1,d__2);
+			    r__1 = qmax, r__2 = z__[i4 + 1];
+			    qmax = dmax(r__1,r__2);
 /* Computing MIN */
-			    d__1 = emin, d__2 = z__[i4 - 1];
-			    emin = min(d__1,d__2);
+			    r__1 = emin, r__2 = z__[i4 - 1];
+			    emin = dmin(r__1,r__2);
 /* Computing MIN */
-			    d__1 = oldemn, d__2 = z__[i4];
-			    oldemn = min(d__1,d__2);
+			    r__1 = oldemn, r__2 = z__[i4];
+			    oldemn = dmin(r__1,r__2);
 			}
 /* L110: */
 		    }
@@ -25150,9 +85089,9 @@ L150:
 
 /*     Sort and compute sum of eigenvalues. */
 
-    dlasrt_("D", n, &z__[1], &iinfo);
+    slasrt_("D", n, &z__[1], &iinfo);
 
-    e = 0.;
+    e = 0.f;
     for (k = *n; k >= 1; --k) {
 	e += z__[k];
 /* L170: */
@@ -25162,56 +85101,52 @@ L150:
 
     z__[(*n << 1) + 1] = trace;
     z__[(*n << 1) + 2] = e;
-    z__[(*n << 1) + 3] = (doublereal) iter;
+    z__[(*n << 1) + 3] = (real) iter;
 /* Computing 2nd power */
     i__1 = *n;
-    z__[(*n << 1) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1);
-    z__[(*n << 1) + 5] = nfail * 100. / (doublereal) iter;
+    z__[(*n << 1) + 4] = (real) ndiv / (real) (i__1 * i__1);
+    z__[(*n << 1) + 5] = nfail * 100.f / (real) iter;
     return 0;
 
-/*     End of DLASQ2 */
+/*     End of SLASQ2 */
 
-} /* dlasq2_ */
+} /* slasq2_ */
 
-/* Subroutine */ int dlasq3_(integer *i0, integer *n0, doublereal *z__,
-	integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig,
-	 doublereal *qmax, integer *nfail, integer *iter, integer *ndiv,
-	logical *ieee)
+/* Subroutine */ int slasq3_(integer *i0, integer *n0, real *z__, integer *pp,
+	 real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail,
+	integer *iter, integer *ndiv, logical *ieee)
 {
     /* Initialized data */
 
     static integer ttype = 0;
-    static doublereal dmin1 = 0.;
-    static doublereal dmin2 = 0.;
-    static doublereal dn = 0.;
-    static doublereal dn1 = 0.;
-    static doublereal dn2 = 0.;
-    static doublereal tau = 0.;
+    static real dmin1 = 0.f;
+    static real dmin2 = 0.f;
+    static real dn = 0.f;
+    static real dn1 = 0.f;
+    static real dn2 = 0.f;
+    static real tau = 0.f;
 
     /* System generated locals */
     integer i__1;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static doublereal s, t;
+    static real s, t;
     static integer j4, nn;
-    static doublereal eps, tol;
+    static real eps, tol;
     static integer n0in, ipn4;
-    static doublereal tol2, temp;
-    extern /* Subroutine */ int dlasq4_(integer *, integer *, doublereal *,
-	    integer *, integer *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *)
-	    , dlasq5_(integer *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, logical *), dlasq6_(
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *);
-
-    static doublereal safmin;
+    static real tol2, temp;
+    extern /* Subroutine */ int slasq4_(integer *, integer *, real *, integer
+	    *, integer *, real *, real *, real *, real *, real *, real *,
+	    real *, integer *), slasq5_(integer *, integer *, real *, integer
+	    *, real *, real *, real *, real *, real *, real *, real *,
+	    logical *), slasq6_(integer *, integer *, real *, integer *, real
+	    *, real *, real *, real *, real *, real *);
+    extern doublereal slamch_(char *);
+    static real safmin;
 
 
 /*
@@ -25224,7 +85159,7 @@ L150:
     Purpose
     =======
 
-    DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds.
+    SLASQ3 checks for deflation, computes a shift (TAU) and calls dqds.
     In case of failure it changes shifts, and tries again until output
     is positive.
 
@@ -25237,22 +85172,22 @@ L150:
     N0     (input) INTEGER
            Last index.
 
-    Z      (input) DOUBLE PRECISION array, dimension ( 4*N )
+    Z      (input) REAL array, dimension ( 4*N )
            Z holds the qd array.
 
     PP     (input) INTEGER
            PP=0 for ping, PP=1 for pong.
 
-    DMIN   (output) DOUBLE PRECISION
+    DMIN   (output) REAL
            Minimum value of d.
 
-    SIGMA  (output) DOUBLE PRECISION
+    SIGMA  (output) REAL
            Sum of shifts used in current segment.
 
-    DESIG  (input/output) DOUBLE PRECISION
+    DESIG  (input/output) REAL
            Lower order part of SIGMA
 
-    QMAX   (input) DOUBLE PRECISION
+    QMAX   (input) REAL
            Maximum value of q.
 
     NFAIL  (output) INTEGER
@@ -25268,7 +85203,7 @@ L150:
            Shift type.
 
     IEEE   (input) LOGICAL
-           Flag for IEEE or non IEEE arithmetic (passed to DLASQ5).
+           Flag for IEEE or non IEEE arithmetic (passed to SLASQ5).
 
     =====================================================================
 */
@@ -25279,12 +85214,12 @@ L150:
     /* Function Body */
 
     n0in = *n0;
-    eps = PRECISION;
-    safmin = SAFEMINIMUM;
-    tol = eps * 100.;
+    eps = slamch_("Precision");
+    safmin = slamch_("Safe minimum");
+    tol = eps * 100.f;
 /* Computing 2nd power */
-    d__1 = tol;
-    tol2 = d__1 * d__1;
+    r__1 = tol;
+    tol2 = r__1 * r__1;
 
 /*     Check for deflation. */
 
@@ -25331,10 +85266,10 @@ L40:
 	z__[nn - 7] = s;
     }
     if (z__[nn - 5] > z__[nn - 3] * tol2) {
-	t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
+	t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5f;
 	s = z__[nn - 3] * (z__[nn - 5] / t);
 	if (s <= t) {
-	    s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.)));
+	    s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.f) + 1.f)));
 	} else {
 	    s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s)));
 	}
@@ -25351,8 +85286,8 @@ L50:
 
 /*     Reverse the qd-array, if warranted. */
 
-    if (*dmin__ <= 0. || *n0 < n0in) {
-	if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) {
+    if (*dmin__ <= 0.f || *n0 < n0in) {
+	if (z__[(*i0 << 2) + *pp - 3] * 1.5f < z__[(*n0 << 2) + *pp - 3]) {
 	    ipn4 = *i0 + *n0 << 2;
 	    i__1 = *i0 + *n0 - 1 << 1;
 	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
@@ -25375,21 +85310,21 @@ L50:
 		z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
 	    }
 /* Computing MIN */
-	    d__1 = dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
-	    dmin2 = min(d__1,d__2);
+	    r__1 = dmin2, r__2 = z__[(*n0 << 2) + *pp - 1];
+	    dmin2 = dmin(r__1,r__2);
 /* Computing MIN */
-	    d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
-		    , d__1 = min(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
-	    z__[(*n0 << 2) + *pp - 1] = min(d__1,d__2);
+	    r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*i0 << 2) + *pp - 1]
+		    , r__1 = min(r__1,r__2), r__2 = z__[(*i0 << 2) + *pp + 3];
+	    z__[(*n0 << 2) + *pp - 1] = dmin(r__1,r__2);
 /* Computing MIN */
-	    d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 =
-		     min(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4];
-	    z__[(*n0 << 2) - *pp] = min(d__1,d__2);
+	    r__1 = z__[(*n0 << 2) - *pp], r__2 = z__[(*i0 << 2) - *pp], r__1 =
+		     min(r__1,r__2), r__2 = z__[(*i0 << 2) - *pp + 4];
+	    z__[(*n0 << 2) - *pp] = dmin(r__1,r__2);
 /* Computing MAX */
-	    d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = max(d__1,
-		    d__2), d__2 = z__[(*i0 << 2) + *pp + 1];
-	    *qmax = max(d__1,d__2);
-	    *dmin__ = -0.;
+	    r__1 = *qmax, r__2 = z__[(*i0 << 2) + *pp - 3], r__1 = max(r__1,
+		    r__2), r__2 = z__[(*i0 << 2) + *pp + 1];
+	    *qmax = dmax(r__1,r__2);
+	    *dmin__ = -0.f;
 	}
     }
 
@@ -25398,20 +85333,20 @@ L50:
 
    Computing MIN
 */
-    d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*n0 << 2) + *pp - 9], d__1 =
-	     min(d__1,d__2), d__2 = dmin2 + z__[(*n0 << 2) - *pp];
-    if (*dmin__ < 0. || safmin * *qmax < min(d__1,d__2)) {
+    r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*n0 << 2) + *pp - 9], r__1 =
+	     min(r__1,r__2), r__2 = dmin2 + z__[(*n0 << 2) - *pp];
+    if (*dmin__ < 0.f || safmin * *qmax < dmin(r__1,r__2)) {
 
 /*        Choose a shift. */
 
-	dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1,
+	slasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1,
 		 &dn2, &tau, &ttype);
 
 /*        Call dqds until DMIN > 0. */
 
 L80:
 
-	dlasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1,
+	slasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1,
 		&dn2, ieee);
 
 	*ndiv += *n0 - *i0 + 2;
@@ -25419,21 +85354,21 @@ L80:
 
 /*        Check status. */
 
-	if (*dmin__ >= 0. && dmin1 > 0.) {
+	if (*dmin__ >= 0.f && dmin1 > 0.f) {
 
 /*           Success. */
 
 	    goto L100;
 
-	} else if (*dmin__ < 0. && dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] <
-		tol * (*sigma + dn1) && abs(dn) < tol * *sigma) {
+	} else if (*dmin__ < 0.f && dmin1 > 0.f && z__[(*n0 - 1 << 2) - *pp] <
+		 tol * (*sigma + dn1) && dabs(dn) < tol * *sigma) {
 
 /*           Convergence hidden by negative DN. */
 
-	    z__[(*n0 - 1 << 2) - *pp + 2] = 0.;
-	    *dmin__ = 0.;
+	    z__[(*n0 - 1 << 2) - *pp + 2] = 0.f;
+	    *dmin__ = 0.f;
 	    goto L100;
-	} else if (*dmin__ < 0.) {
+	} else if (*dmin__ < 0.f) {
 
 /*           TAU too big. Select new TAU and try again. */
 
@@ -25442,18 +85377,18 @@ L80:
 
 /*              Failed twice. Play it safe. */
 
-		tau = 0.;
-	    } else if (dmin1 > 0.) {
+		tau = 0.f;
+	    } else if (dmin1 > 0.f) {
 
 /*              Late failure. Gives excellent shift. */
 
-		tau = (tau + *dmin__) * (1. - eps * 2.);
+		tau = (tau + *dmin__) * (1.f - eps * 2.f);
 		ttype += -11;
 	    } else {
 
 /*              Early failure. Divide by 4. */
 
-		tau *= .25;
+		tau *= .25f;
 		ttype += -12;
 	    }
 	    goto L80;
@@ -25461,7 +85396,7 @@ L80:
 
 /*           NaN. */
 
-	    tau = 0.;
+	    tau = 0.f;
 	    goto L80;
 	} else {
 
@@ -25474,10 +85409,10 @@ L80:
 /*     Risk of underflow. */
 
 L90:
-    dlasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2);
+    slasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2);
     *ndiv += *n0 - *i0 + 2;
     ++(*iter);
-    tau = 0.;
+    tau = 0.f;
 
 L100:
     if (tau < *sigma) {
@@ -25492,30 +85427,29 @@ L100:
 
     return 0;
 
-/*     End of DLASQ3 */
+/*     End of SLASQ3 */
 
-} /* dlasq3_ */
+} /* slasq3_ */
 
-/* Subroutine */ int dlasq4_(integer *i0, integer *n0, doublereal *z__,
-	integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1,
-	doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2,
-	doublereal *tau, integer *ttype)
+/* Subroutine */ int slasq4_(integer *i0, integer *n0, real *z__, integer *pp,
+	 integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn,
+	real *dn1, real *dn2, real *tau, integer *ttype)
 {
     /* Initialized data */
 
-    static doublereal g = 0.;
+    static real g = 0.f;
 
     /* System generated locals */
     integer i__1;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static doublereal s, a2, b1, b2;
+    static real s, a2, b1, b2;
     static integer i4, nn, np;
-    static doublereal gam, gap1, gap2;
+    static real gam, gap1, gap2;
 
 
 /*
@@ -25528,7 +85462,7 @@ L100:
     Purpose
     =======
 
-    DLASQ4 computes an approximation TAU to the smallest eigenvalue
+    SLASQ4 computes an approximation TAU to the smallest eigenvalue
     using values of d from the previous transform.
 
     I0    (input) INTEGER
@@ -25537,7 +85471,7 @@ L100:
     N0    (input) INTEGER
           Last index.
 
-    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
+    Z     (input) REAL array, dimension ( 4*N )
           Z holds the qd array.
 
     PP    (input) INTEGER
@@ -25546,25 +85480,25 @@ L100:
     NOIN  (input) INTEGER
           The value of N0 at start of EIGTEST.
 
-    DMIN  (input) DOUBLE PRECISION
+    DMIN  (input) REAL
           Minimum value of d.
 
-    DMIN1 (input) DOUBLE PRECISION
+    DMIN1 (input) REAL
           Minimum value of d, excluding D( N0 ).
 
-    DMIN2 (input) DOUBLE PRECISION
+    DMIN2 (input) REAL
           Minimum value of d, excluding D( N0 ) and D( N0-1 ).
 
-    DN    (input) DOUBLE PRECISION
+    DN    (input) REAL
           d(N)
 
-    DN1   (input) DOUBLE PRECISION
+    DN1   (input) REAL
           d(N-1)
 
-    DN2   (input) DOUBLE PRECISION
+    DN2   (input) REAL
           d(N-2)
 
-    TAU   (output) DOUBLE PRECISION
+    TAU   (output) REAL
           This is the shift.
 
     TTYPE (output) INTEGER
@@ -25587,7 +85521,7 @@ L100:
        TTYPE records the type of shift.
 */
 
-    if (*dmin__ <= 0.) {
+    if (*dmin__ <= 0.f) {
 	*tau = -(*dmin__);
 	*ttype = -1;
 	return 0;
@@ -25607,30 +85541,30 @@ L100:
 /*           Cases 2 and 3. */
 
 	    if (*dmin__ == *dn && *dmin1 == *dn1) {
-		gap2 = *dmin2 - a2 - *dmin2 * .25;
-		if (gap2 > 0. && gap2 > b2) {
+		gap2 = *dmin2 - a2 - *dmin2 * .25f;
+		if (gap2 > 0.f && gap2 > b2) {
 		    gap1 = a2 - *dn - b2 / gap2 * b2;
 		} else {
 		    gap1 = a2 - *dn - (b1 + b2);
 		}
-		if (gap1 > 0. && gap1 > b1) {
+		if (gap1 > 0.f && gap1 > b1) {
 /* Computing MAX */
-		    d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
-		    s = max(d__1,d__2);
+		    r__1 = *dn - b1 / gap1 * b1, r__2 = *dmin__ * .5f;
+		    s = dmax(r__1,r__2);
 		    *ttype = -2;
 		} else {
-		    s = 0.;
+		    s = 0.f;
 		    if (*dn > b1) {
 			s = *dn - b1;
 		    }
 		    if (a2 > b1 + b2) {
 /* Computing MIN */
-			d__1 = s, d__2 = a2 - (b1 + b2);
-			s = min(d__1,d__2);
+			r__1 = s, r__2 = a2 - (b1 + b2);
+			s = dmin(r__1,r__2);
 		    }
 /* Computing MAX */
-		    d__1 = s, d__2 = *dmin__ * .333;
-		    s = max(d__1,d__2);
+		    r__1 = s, r__2 = *dmin__ * .333f;
+		    s = dmax(r__1,r__2);
 		    *ttype = -3;
 		}
 	    } else {
@@ -25638,10 +85572,10 @@ L100:
 /*              Case 4. */
 
 		*ttype = -4;
-		s = *dmin__ * .25;
+		s = *dmin__ * .25f;
 		if (*dmin__ == *dn) {
 		    gam = *dn;
-		    a2 = 0.;
+		    a2 = 0.f;
 		    if (z__[nn - 5] > z__[nn - 7]) {
 			return 0;
 		    }
@@ -25667,7 +85601,7 @@ L100:
 		a2 += b2;
 		i__1 = (*i0 << 2) - 1 + *pp;
 		for (i4 = np; i4 >= i__1; i4 += -4) {
-		    if (b2 == 0.) {
+		    if (b2 == 0.f) {
 			goto L20;
 		    }
 		    b1 = b2;
@@ -25676,18 +85610,18 @@ L100:
 		    }
 		    b2 *= z__[i4] / z__[i4 - 2];
 		    a2 += b2;
-		    if (max(b2,b1) * 100. < a2 || .563 < a2) {
+		    if (dmax(b2,b1) * 100.f < a2 || .563f < a2) {
 			goto L20;
 		    }
 /* L10: */
 		}
 L20:
-		a2 *= 1.05;
+		a2 *= 1.05f;
 
 /*              Rayleigh quotient residual bound. */
 
-		if (a2 < .563) {
-		    s = gam * (1. - sqrt(a2)) / (a2 + 1.);
+		if (a2 < .563f) {
+		    s = gam * (1.f - sqrt(a2)) / (a2 + 1.f);
 		}
 	    }
 	} else if (*dmin__ == *dn2) {
@@ -25695,7 +85629,7 @@ L20:
 /*           Case 5. */
 
 	    *ttype = -5;
-	    s = *dmin__ * .25;
+	    s = *dmin__ * .25f;
 
 /*           Compute contribution to norm squared from I > NN-2. */
 
@@ -25706,7 +85640,7 @@ L20:
 	    if (z__[np - 8] > b2 || z__[np - 4] > b1) {
 		return 0;
 	    }
-	    a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
+	    a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.f);
 
 /*           Approximate contribution to norm squared from I < NN-2. */
 
@@ -25715,7 +85649,7 @@ L20:
 		a2 += b2;
 		i__1 = (*i0 << 2) - 1 + *pp;
 		for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
-		    if (b2 == 0.) {
+		    if (b2 == 0.f) {
 			goto L40;
 		    }
 		    b1 = b2;
@@ -25724,28 +85658,28 @@ L20:
 		    }
 		    b2 *= z__[i4] / z__[i4 - 2];
 		    a2 += b2;
-		    if (max(b2,b1) * 100. < a2 || .563 < a2) {
+		    if (dmax(b2,b1) * 100.f < a2 || .563f < a2) {
 			goto L40;
 		    }
 /* L30: */
 		}
 L40:
-		a2 *= 1.05;
+		a2 *= 1.05f;
 	    }
 
-	    if (a2 < .563) {
-		s = gam * (1. - sqrt(a2)) / (a2 + 1.);
+	    if (a2 < .563f) {
+		s = gam * (1.f - sqrt(a2)) / (a2 + 1.f);
 	    }
 	} else {
 
 /*           Case 6, no information to guide us. */
 
 	    if (*ttype == -6) {
-		g += (1. - g) * .333;
+		g += (1.f - g) * .333f;
 	    } else if (*ttype == -18) {
-		g = .083250000000000005;
+		g = .083250000000000005f;
 	    } else {
-		g = .25;
+		g = .25f;
 	    }
 	    s = g * *dmin__;
 	    *ttype = -6;
@@ -25760,13 +85694,13 @@ L40:
 /*           Cases 7 and 8. */
 
 	    *ttype = -7;
-	    s = *dmin1 * .333;
+	    s = *dmin1 * .333f;
 	    if (z__[nn - 5] > z__[nn - 7]) {
 		return 0;
 	    }
 	    b1 = z__[nn - 5] / z__[nn - 7];
 	    b2 = b1;
-	    if (b2 == 0.) {
+	    if (b2 == 0.f) {
 		goto L60;
 	    }
 	    i__1 = (*i0 << 2) - 1 + *pp;
@@ -25777,34 +85711,34 @@ L40:
 		}
 		b1 *= z__[i4] / z__[i4 - 2];
 		b2 += b1;
-		if (max(b1,a2) * 100. < b2) {
+		if (dmax(b1,a2) * 100.f < b2) {
 		    goto L60;
 		}
 /* L50: */
 	    }
 L60:
-	    b2 = sqrt(b2 * 1.05);
+	    b2 = sqrt(b2 * 1.05f);
 /* Computing 2nd power */
-	    d__1 = b2;
-	    a2 = *dmin1 / (d__1 * d__1 + 1.);
-	    gap2 = *dmin2 * .5 - a2;
-	    if (gap2 > 0. && gap2 > b2 * a2) {
+	    r__1 = b2;
+	    a2 = *dmin1 / (r__1 * r__1 + 1.f);
+	    gap2 = *dmin2 * .5f - a2;
+	    if (gap2 > 0.f && gap2 > b2 * a2) {
 /* Computing MAX */
-		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
-		s = max(d__1,d__2);
+		r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2);
+		s = dmax(r__1,r__2);
 	    } else {
 /* Computing MAX */
-		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
-		s = max(d__1,d__2);
+		r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f);
+		s = dmax(r__1,r__2);
 		*ttype = -8;
 	    }
 	} else {
 
 /*           Case 9. */
 
-	    s = *dmin1 * .25;
+	    s = *dmin1 * .25f;
 	    if (*dmin1 == *dn1) {
-		s = *dmin1 * .5;
+		s = *dmin1 * .5f;
 	    }
 	    *ttype = -9;
 	}
@@ -25817,15 +85751,15 @@ L60:
           Cases 10 and 11.
 */
 
-	if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
+	if (*dmin2 == *dn2 && z__[nn - 5] * 2.f < z__[nn - 7]) {
 	    *ttype = -10;
-	    s = *dmin2 * .333;
+	    s = *dmin2 * .333f;
 	    if (z__[nn - 5] > z__[nn - 7]) {
 		return 0;
 	    }
 	    b1 = z__[nn - 5] / z__[nn - 7];
 	    b2 = b1;
-	    if (b2 == 0.) {
+	    if (b2 == 0.f) {
 		goto L80;
 	    }
 	    i__1 = (*i0 << 2) - 1 + *pp;
@@ -25835,59 +85769,58 @@ L60:
 		}
 		b1 *= z__[i4] / z__[i4 - 2];
 		b2 += b1;
-		if (b1 * 100. < b2) {
+		if (b1 * 100.f < b2) {
 		    goto L80;
 		}
 /* L70: */
 	    }
 L80:
-	    b2 = sqrt(b2 * 1.05);
+	    b2 = sqrt(b2 * 1.05f);
 /* Computing 2nd power */
-	    d__1 = b2;
-	    a2 = *dmin2 / (d__1 * d__1 + 1.);
+	    r__1 = b2;
+	    a2 = *dmin2 / (r__1 * r__1 + 1.f);
 	    gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[
 		    nn - 9]) - a2;
-	    if (gap2 > 0. && gap2 > b2 * a2) {
+	    if (gap2 > 0.f && gap2 > b2 * a2) {
 /* Computing MAX */
-		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
-		s = max(d__1,d__2);
+		r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2);
+		s = dmax(r__1,r__2);
 	    } else {
 /* Computing MAX */
-		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
-		s = max(d__1,d__2);
+		r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f);
+		s = dmax(r__1,r__2);
 	    }
 	} else {
-	    s = *dmin2 * .25;
+	    s = *dmin2 * .25f;
 	    *ttype = -11;
 	}
     } else if (*n0in > *n0 + 2) {
 
 /*        Case 12, more than two eigenvalues deflated. No information. */
 
-	s = 0.;
+	s = 0.f;
 	*ttype = -12;
     }
 
     *tau = s;
     return 0;
 
-/*     End of DLASQ4 */
+/*     End of SLASQ4 */
 
-} /* dlasq4_ */
+} /* slasq4_ */
 
-/* Subroutine */ int dlasq5_(integer *i0, integer *n0, doublereal *z__,
-	integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1,
-	doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2,
-	 logical *ieee)
+/* Subroutine */ int slasq5_(integer *i0, integer *n0, real *z__, integer *pp,
+	 real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real *
+	dnm1, real *dnm2, logical *ieee)
 {
     /* System generated locals */
     integer i__1;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Local variables */
-    static doublereal d__;
+    static real d__;
     static integer j4, j4p2;
-    static doublereal emin, temp;
+    static real emin, temp;
 
 
 /*
@@ -25900,7 +85833,7 @@ L80:
     Purpose
     =======
 
-    DLASQ5 computes one dqds transform in ping-pong form, one
+    SLASQ5 computes one dqds transform in ping-pong form, one
     version for IEEE machines another for non IEEE machines.
 
     Arguments
@@ -25912,32 +85845,32 @@ L80:
     N0    (input) INTEGER
           Last index.
 
-    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
+    Z     (input) REAL array, dimension ( 4*N )
           Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
           an extra argument.
 
     PP    (input) INTEGER
           PP=0 for ping, PP=1 for pong.
 
-    TAU   (input) DOUBLE PRECISION
+    TAU   (input) REAL
           This is the shift.
 
-    DMIN  (output) DOUBLE PRECISION
+    DMIN  (output) REAL
           Minimum value of d.
 
-    DMIN1 (output) DOUBLE PRECISION
+    DMIN1 (output) REAL
           Minimum value of d, excluding D( N0 ).
 
-    DMIN2 (output) DOUBLE PRECISION
+    DMIN2 (output) REAL
           Minimum value of d, excluding D( N0 ) and D( N0-1 ).
 
-    DN    (output) DOUBLE PRECISION
+    DN    (output) REAL
           d(N0), the last value of d.
 
-    DNM1  (output) DOUBLE PRECISION
+    DNM1  (output) REAL
           d(N0-1).
 
-    DNM2  (output) DOUBLE PRECISION
+    DNM2  (output) REAL
           d(N0-2).
 
     IEEE  (input) LOGICAL
@@ -25971,11 +85904,11 @@ L80:
 		z__[j4 - 2] = d__ + z__[j4 - 1];
 		temp = z__[j4 + 1] / z__[j4 - 2];
 		d__ = d__ * temp - *tau;
-		*dmin__ = min(*dmin__,d__);
+		*dmin__ = dmin(*dmin__,d__);
 		z__[j4] = z__[j4 - 1] * temp;
 /* Computing MIN */
-		d__1 = z__[j4];
-		emin = min(d__1,emin);
+		r__1 = z__[j4];
+		emin = dmin(r__1,emin);
 /* L10: */
 	    }
 	} else {
@@ -25984,11 +85917,11 @@ L80:
 		z__[j4 - 3] = d__ + z__[j4];
 		temp = z__[j4 + 2] / z__[j4 - 3];
 		d__ = d__ * temp - *tau;
-		*dmin__ = min(*dmin__,d__);
+		*dmin__ = dmin(*dmin__,d__);
 		z__[j4 - 1] = z__[j4] * temp;
 /* Computing MIN */
-		d__1 = z__[j4 - 1];
-		emin = min(d__1,emin);
+		r__1 = z__[j4 - 1];
+		emin = dmin(r__1,emin);
 /* L20: */
 	    }
 	}
@@ -26002,7 +85935,7 @@ L80:
 	z__[j4 - 2] = *dnm2 + z__[j4p2];
 	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
 	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
-	*dmin__ = min(*dmin__,*dnm1);
+	*dmin__ = dmin(*dmin__,*dnm1);
 
 	*dmin1 = *dmin__;
 	j4 += 4;
@@ -26010,7 +85943,7 @@ L80:
 	z__[j4 - 2] = *dnm1 + z__[j4p2];
 	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
 	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
-	*dmin__ = min(*dmin__,*dn);
+	*dmin__ = dmin(*dmin__,*dn);
 
     } else {
 
@@ -26020,32 +85953,32 @@ L80:
 	    i__1 = *n0 - 3 << 2;
 	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
 		z__[j4 - 2] = d__ + z__[j4 - 1];
-		if (d__ < 0.) {
+		if (d__ < 0.f) {
 		    return 0;
 		} else {
 		    z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
 		    d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau;
 		}
-		*dmin__ = min(*dmin__,d__);
+		*dmin__ = dmin(*dmin__,d__);
 /* Computing MIN */
-		d__1 = emin, d__2 = z__[j4];
-		emin = min(d__1,d__2);
+		r__1 = emin, r__2 = z__[j4];
+		emin = dmin(r__1,r__2);
 /* L30: */
 	    }
 	} else {
 	    i__1 = *n0 - 3 << 2;
 	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
 		z__[j4 - 3] = d__ + z__[j4];
-		if (d__ < 0.) {
+		if (d__ < 0.f) {
 		    return 0;
 		} else {
 		    z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
 		    d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau;
 		}
-		*dmin__ = min(*dmin__,d__);
+		*dmin__ = dmin(*dmin__,d__);
 /* Computing MIN */
-		d__1 = emin, d__2 = z__[j4 - 1];
-		emin = min(d__1,d__2);
+		r__1 = emin, r__2 = z__[j4 - 1];
+		emin = dmin(r__1,r__2);
 /* L40: */
 	    }
 	}
@@ -26057,25 +85990,25 @@ L80:
 	j4 = (*n0 - 2 << 2) - *pp;
 	j4p2 = j4 + (*pp << 1) - 1;
 	z__[j4 - 2] = *dnm2 + z__[j4p2];
-	if (*dnm2 < 0.) {
+	if (*dnm2 < 0.f) {
 	    return 0;
 	} else {
 	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
 	    *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
 	}
-	*dmin__ = min(*dmin__,*dnm1);
+	*dmin__ = dmin(*dmin__,*dnm1);
 
 	*dmin1 = *dmin__;
 	j4 += 4;
 	j4p2 = j4 + (*pp << 1) - 1;
 	z__[j4 - 2] = *dnm1 + z__[j4p2];
-	if (*dnm1 < 0.) {
+	if (*dnm1 < 0.f) {
 	    return 0;
 	} else {
 	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
 	    *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
 	}
-	*dmin__ = min(*dmin__,*dn);
+	*dmin__ = dmin(*dmin__,*dn);
 
     }
 
@@ -26083,24 +86016,24 @@ L80:
     z__[(*n0 << 2) - *pp] = emin;
     return 0;
 
-/*     End of DLASQ5 */
+/*     End of SLASQ5 */
 
-} /* dlasq5_ */
+} /* slasq5_ */
 
-/* Subroutine */ int dlasq6_(integer *i0, integer *n0, doublereal *z__,
-	integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2,
-	 doublereal *dn, doublereal *dnm1, doublereal *dnm2)
+/* Subroutine */ int slasq6_(integer *i0, integer *n0, real *z__, integer *pp,
+	 real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real *
+	dnm2)
 {
     /* System generated locals */
     integer i__1;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Local variables */
-    static doublereal d__;
+    static real d__;
     static integer j4, j4p2;
-    static doublereal emin, temp;
-
-    static doublereal safmin;
+    static real emin, temp;
+    extern doublereal slamch_(char *);
+    static real safmin;
 
 
 /*
@@ -26113,7 +86046,7 @@ L80:
     Purpose
     =======
 
-    DLASQ6 computes one dqd (shift equal to zero) transform in
+    SLASQ6 computes one dqd (shift equal to zero) transform in
     ping-pong form, with protection against underflow and overflow.
 
     Arguments
@@ -26125,29 +86058,29 @@ L80:
     N0    (input) INTEGER
           Last index.
 
-    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
+    Z     (input) REAL array, dimension ( 4*N )
           Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
           an extra argument.
 
     PP    (input) INTEGER
           PP=0 for ping, PP=1 for pong.
 
-    DMIN  (output) DOUBLE PRECISION
+    DMIN  (output) REAL
           Minimum value of d.
 
-    DMIN1 (output) DOUBLE PRECISION
+    DMIN1 (output) REAL
           Minimum value of d, excluding D( N0 ).
 
-    DMIN2 (output) DOUBLE PRECISION
+    DMIN2 (output) REAL
           Minimum value of d, excluding D( N0 ) and D( N0-1 ).
 
-    DN    (output) DOUBLE PRECISION
+    DN    (output) REAL
           d(N0), the last value of d.
 
-    DNM1  (output) DOUBLE PRECISION
+    DNM1  (output) REAL
           d(N0-1).
 
-    DNM2  (output) DOUBLE PRECISION
+    DNM2  (output) REAL
           d(N0-2).
 
     =====================================================================
@@ -26162,7 +86095,7 @@ L80:
 	return 0;
     }
 
-    safmin = SAFEMINIMUM;
+    safmin = slamch_("Safe minimum");
     j4 = (*i0 << 2) + *pp - 3;
     emin = z__[j4 + 4];
     d__ = z__[j4];
@@ -26172,11 +86105,11 @@ L80:
 	i__1 = *n0 - 3 << 2;
 	for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
 	    z__[j4 - 2] = d__ + z__[j4 - 1];
-	    if (z__[j4 - 2] == 0.) {
-		z__[j4] = 0.;
+	    if (z__[j4 - 2] == 0.f) {
+		z__[j4] = 0.f;
 		d__ = z__[j4 + 1];
 		*dmin__ = d__;
-		emin = 0.;
+		emin = 0.f;
 	    } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4
 		    - 2] < z__[j4 + 1]) {
 		temp = z__[j4 + 1] / z__[j4 - 2];
@@ -26186,21 +86119,21 @@ L80:
 		z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
 		d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]);
 	    }
-	    *dmin__ = min(*dmin__,d__);
+	    *dmin__ = dmin(*dmin__,d__);
 /* Computing MIN */
-	    d__1 = emin, d__2 = z__[j4];
-	    emin = min(d__1,d__2);
+	    r__1 = emin, r__2 = z__[j4];
+	    emin = dmin(r__1,r__2);
 /* L10: */
 	}
     } else {
 	i__1 = *n0 - 3 << 2;
 	for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
 	    z__[j4 - 3] = d__ + z__[j4];
-	    if (z__[j4 - 3] == 0.) {
-		z__[j4 - 1] = 0.;
+	    if (z__[j4 - 3] == 0.f) {
+		z__[j4 - 1] = 0.f;
 		d__ = z__[j4 + 2];
 		*dmin__ = d__;
-		emin = 0.;
+		emin = 0.f;
 	    } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4
 		    - 3] < z__[j4 + 2]) {
 		temp = z__[j4 + 2] / z__[j4 - 3];
@@ -26210,10 +86143,10 @@ L80:
 		z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
 		d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]);
 	    }
-	    *dmin__ = min(*dmin__,d__);
+	    *dmin__ = dmin(*dmin__,d__);
 /* Computing MIN */
-	    d__1 = emin, d__2 = z__[j4 - 1];
-	    emin = min(d__1,d__2);
+	    r__1 = emin, r__2 = z__[j4 - 1];
+	    emin = dmin(r__1,r__2);
 /* L20: */
 	}
     }
@@ -26225,11 +86158,11 @@ L80:
     j4 = (*n0 - 2 << 2) - *pp;
     j4p2 = j4 + (*pp << 1) - 1;
     z__[j4 - 2] = *dnm2 + z__[j4p2];
-    if (z__[j4 - 2] == 0.) {
-	z__[j4] = 0.;
+    if (z__[j4 - 2] == 0.f) {
+	z__[j4] = 0.f;
 	*dnm1 = z__[j4p2 + 2];
 	*dmin__ = *dnm1;
-	emin = 0.;
+	emin = 0.f;
     } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
 	    z__[j4p2 + 2]) {
 	temp = z__[j4p2 + 2] / z__[j4 - 2];
@@ -26239,17 +86172,17 @@ L80:
 	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
 	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]);
     }
-    *dmin__ = min(*dmin__,*dnm1);
+    *dmin__ = dmin(*dmin__,*dnm1);
 
     *dmin1 = *dmin__;
     j4 += 4;
     j4p2 = j4 + (*pp << 1) - 1;
     z__[j4 - 2] = *dnm1 + z__[j4p2];
-    if (z__[j4 - 2] == 0.) {
-	z__[j4] = 0.;
+    if (z__[j4 - 2] == 0.f) {
+	z__[j4] = 0.f;
 	*dn = z__[j4p2 + 2];
 	*dmin__ = *dn;
-	emin = 0.;
+	emin = 0.f;
     } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
 	    z__[j4p2 + 2]) {
 	temp = z__[j4p2 + 2] / z__[j4 - 2];
@@ -26259,28 +86192,27 @@ L80:
 	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
 	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]);
     }
-    *dmin__ = min(*dmin__,*dn);
+    *dmin__ = dmin(*dmin__,*dn);
 
     z__[j4 + 2] = *dn;
     z__[(*n0 << 2) - *pp] = emin;
     return 0;
 
-/*     End of DLASQ6 */
+/*     End of SLASQ6 */
 
-} /* dlasq6_ */
+} /* slasq6_ */
 
-/* Subroutine */ int dlasr_(char *side, char *pivot, char *direct, integer *m,
-	 integer *n, doublereal *c__, doublereal *s, doublereal *a, integer *
-	lda)
+/* Subroutine */ int slasr_(char *side, char *pivot, char *direct, integer *m,
+	 integer *n, real *c__, real *s, real *a, integer *lda)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2;
 
     /* Local variables */
     static integer i__, j, info;
-    static doublereal temp;
+    static real temp;
     extern logical lsame_(char *, char *);
-    static doublereal ctemp, stemp;
+    static real ctemp, stemp;
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
 
@@ -26294,7 +86226,7 @@ L80:
     Purpose
     =======
 
-    DLASR   performs the transformation
+    SLASR   performs the transformation
 
        A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
 
@@ -26363,7 +86295,7 @@ L80:
             The number of columns of the matrix A.  If n <= 1, an
             immediate return is effected.
 
-    C, S    (input) DOUBLE PRECISION arrays, dimension
+    C, S    (input) REAL arrays, dimension
                     (M-1) if SIDE = 'L'
                     (N-1) if SIDE = 'R'
             c(k) and s(k) contain the cosine and sine that define the
@@ -26372,7 +86304,7 @@ L80:
             R( k ) = (  c( k )  s( k ) ).
                      ( -s( k )  c( k ) )
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             The m by n matrix A.  On exit, A is overwritten by P*A if
             SIDE = 'R' or by A*P' if SIDE = 'L'.
 
@@ -26410,7 +86342,7 @@ L80:
 	info = 9;
     }
     if (info != 0) {
-	xerbla_("DLASR ", &info);
+	xerbla_("SLASR ", &info);
 	return 0;
     }
 
@@ -26429,7 +86361,7 @@ L80:
 		for (j = 1; j <= i__1; ++j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if (ctemp != 1. || stemp != 0.) {
+		    if (ctemp != 1.f || stemp != 0.f) {
 			i__2 = *n;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    temp = a[j + 1 + i__ * a_dim1];
@@ -26446,7 +86378,7 @@ L80:
 		for (j = *m - 1; j >= 1; --j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if (ctemp != 1. || stemp != 0.) {
+		    if (ctemp != 1.f || stemp != 0.f) {
 			i__1 = *n;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    temp = a[j + 1 + i__ * a_dim1];
@@ -26466,7 +86398,7 @@ L80:
 		for (j = 2; j <= i__1; ++j) {
 		    ctemp = c__[j - 1];
 		    stemp = s[j - 1];
-		    if (ctemp != 1. || stemp != 0.) {
+		    if (ctemp != 1.f || stemp != 0.f) {
 			i__2 = *n;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    temp = a[j + i__ * a_dim1];
@@ -26483,7 +86415,7 @@ L80:
 		for (j = *m; j >= 2; --j) {
 		    ctemp = c__[j - 1];
 		    stemp = s[j - 1];
-		    if (ctemp != 1. || stemp != 0.) {
+		    if (ctemp != 1.f || stemp != 0.f) {
 			i__1 = *n;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    temp = a[j + i__ * a_dim1];
@@ -26503,7 +86435,7 @@ L80:
 		for (j = 1; j <= i__1; ++j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if (ctemp != 1. || stemp != 0.) {
+		    if (ctemp != 1.f || stemp != 0.f) {
 			i__2 = *n;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    temp = a[j + i__ * a_dim1];
@@ -26520,7 +86452,7 @@ L80:
 		for (j = *m - 1; j >= 1; --j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if (ctemp != 1. || stemp != 0.) {
+		    if (ctemp != 1.f || stemp != 0.f) {
 			i__1 = *n;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    temp = a[j + i__ * a_dim1];
@@ -26545,7 +86477,7 @@ L80:
 		for (j = 1; j <= i__1; ++j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if (ctemp != 1. || stemp != 0.) {
+		    if (ctemp != 1.f || stemp != 0.f) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    temp = a[i__ + (j + 1) * a_dim1];
@@ -26562,7 +86494,7 @@ L80:
 		for (j = *n - 1; j >= 1; --j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if (ctemp != 1. || stemp != 0.) {
+		    if (ctemp != 1.f || stemp != 0.f) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    temp = a[i__ + (j + 1) * a_dim1];
@@ -26582,7 +86514,7 @@ L80:
 		for (j = 2; j <= i__1; ++j) {
 		    ctemp = c__[j - 1];
 		    stemp = s[j - 1];
-		    if (ctemp != 1. || stemp != 0.) {
+		    if (ctemp != 1.f || stemp != 0.f) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    temp = a[i__ + j * a_dim1];
@@ -26599,7 +86531,7 @@ L80:
 		for (j = *n; j >= 2; --j) {
 		    ctemp = c__[j - 1];
 		    stemp = s[j - 1];
-		    if (ctemp != 1. || stemp != 0.) {
+		    if (ctemp != 1.f || stemp != 0.f) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    temp = a[i__ + j * a_dim1];
@@ -26619,7 +86551,7 @@ L80:
 		for (j = 1; j <= i__1; ++j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if (ctemp != 1. || stemp != 0.) {
+		    if (ctemp != 1.f || stemp != 0.f) {
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
 			    temp = a[i__ + j * a_dim1];
@@ -26636,7 +86568,7 @@ L80:
 		for (j = *n - 1; j >= 1; --j) {
 		    ctemp = c__[j];
 		    stemp = s[j];
-		    if (ctemp != 1. || stemp != 0.) {
+		    if (ctemp != 1.f || stemp != 0.f) {
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
 			    temp = a[i__ + j * a_dim1];
@@ -26655,25 +86587,24 @@ L80:
 
     return 0;
 
-/*     End of DLASR */
+/*     End of SLASR */
 
-} /* dlasr_ */
+} /* slasr_ */
 
-/* Subroutine */ int dlasrt_(char *id, integer *n, doublereal *d__, integer *
-	info)
+/* Subroutine */ int slasrt_(char *id, integer *n, real *d__, integer *info)
 {
     /* System generated locals */
     integer i__1, i__2;
 
     /* Local variables */
     static integer i__, j;
-    static doublereal d1, d2, d3;
+    static real d1, d2, d3;
     static integer dir;
-    static doublereal tmp;
+    static real tmp;
     static integer endd;
     extern logical lsame_(char *, char *);
     static integer stack[64]	/* was [2][32] */;
-    static doublereal dmnmx;
+    static real dmnmx;
     static integer start;
     extern /* Subroutine */ int xerbla_(char *, integer *);
     static integer stkpnt;
@@ -26705,7 +86636,7 @@ L80:
     N       (input) INTEGER
             The length of the array D.
 
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
+    D       (input/output) REAL array, dimension (N)
             On entry, the array to be sorted.
             On exit, D has been sorted into increasing order
             (D(1) <= ... <= D(N) ) or into decreasing order
@@ -26739,7 +86670,7 @@ L80:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DLASRT", &i__1);
+	xerbla_("SLASRT", &i__1);
 	return 0;
     }
 
@@ -26917,20 +86848,20 @@ L110:
     }
     return 0;
 
-/*     End of DLASRT */
+/*     End of SLASRT */
 
-} /* dlasrt_ */
+} /* slasrt_ */
 
-/* Subroutine */ int dlassq_(integer *n, doublereal *x, integer *incx,
-	doublereal *scale, doublereal *sumsq)
+/* Subroutine */ int slassq_(integer *n, real *x, integer *incx, real *scale,
+	real *sumsq)
 {
     /* System generated locals */
     integer i__1, i__2;
-    doublereal d__1;
+    real r__1;
 
     /* Local variables */
     static integer ix;
-    static doublereal absxi;
+    static real absxi;
 
 
 /*
@@ -26943,7 +86874,7 @@ L110:
     Purpose
     =======
 
-    DLASSQ  returns the values  scl  and  smsq  such that
+    SLASSQ  returns the values  scl  and  smsq  such that
 
        ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
 
@@ -26963,7 +86894,7 @@ L110:
     N       (input) INTEGER
             The number of elements to be used from the vector X.
 
-    X       (input) DOUBLE PRECISION array, dimension (N)
+    X       (input) REAL array, dimension (N)
             The vector for which a scaled sum of squares is computed.
                x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
 
@@ -26971,12 +86902,12 @@ L110:
             The increment between successive values of the vector X.
             INCX > 0.
 
-    SCALE   (input/output) DOUBLE PRECISION
+    SCALE   (input/output) REAL
             On entry, the value  scale  in the equation above.
             On exit, SCALE is overwritten with  scl , the scaling factor
             for the sum of squares.
 
-    SUMSQ   (input/output) DOUBLE PRECISION
+    SUMSQ   (input/output) REAL
             On entry, the value  sumsq  in the equation above.
             On exit, SUMSQ is overwritten with  smsq , the basic sum of
             squares from which  scl  has been factored out.
@@ -26993,17 +86924,17 @@ L110:
 	i__1 = (*n - 1) * *incx + 1;
 	i__2 = *incx;
 	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-	    if (x[ix] != 0.) {
-		absxi = (d__1 = x[ix], abs(d__1));
+	    if (x[ix] != 0.f) {
+		absxi = (r__1 = x[ix], dabs(r__1));
 		if (*scale < absxi) {
 /* Computing 2nd power */
-		    d__1 = *scale / absxi;
-		    *sumsq = *sumsq * (d__1 * d__1) + 1;
+		    r__1 = *scale / absxi;
+		    *sumsq = *sumsq * (r__1 * r__1) + 1;
 		    *scale = absxi;
 		} else {
 /* Computing 2nd power */
-		    d__1 = absxi / *scale;
-		    *sumsq += d__1 * d__1;
+		    r__1 = absxi / *scale;
+		    *sumsq += r__1 * r__1;
 		}
 	    }
 /* L10: */
@@ -27011,29 +86942,28 @@ L110:
     }
     return 0;
 
-/*     End of DLASSQ */
+/*     End of SLASSQ */
 
-} /* dlassq_ */
+} /* slassq_ */
 
-/* Subroutine */ int dlasv2_(doublereal *f, doublereal *g, doublereal *h__,
-	doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal *
-	csr, doublereal *snl, doublereal *csl)
+/* Subroutine */ int slasv2_(real *f, real *g, real *h__, real *ssmin, real *
+	ssmax, real *snr, real *csr, real *snl, real *csl)
 {
     /* System generated locals */
-    doublereal d__1;
+    real r__1;
 
     /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+    double sqrt(doublereal), r_sign(real *, real *);
 
     /* Local variables */
-    static doublereal a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt,
-	     clt, crt, slt, srt;
+    static real a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, clt,
+	    crt, slt, srt;
     static integer pmax;
-    static doublereal temp;
+    static real temp;
     static logical swap;
-    static doublereal tsign;
-
+    static real tsign;
     static logical gasmal;
+    extern doublereal slamch_(char *);
 
 
 /*
@@ -27046,7 +86976,7 @@ L110:
     Purpose
     =======
 
-    DLASV2 computes the singular value decomposition of a 2-by-2
+    SLASV2 computes the singular value decomposition of a 2-by-2
     triangular matrix
        [  F   G  ]
        [  0   H  ].
@@ -27060,28 +86990,28 @@ L110:
     Arguments
     =========
 
-    F       (input) DOUBLE PRECISION
+    F       (input) REAL
             The (1,1) element of the 2-by-2 matrix.
 
-    G       (input) DOUBLE PRECISION
+    G       (input) REAL
             The (1,2) element of the 2-by-2 matrix.
 
-    H       (input) DOUBLE PRECISION
+    H       (input) REAL
             The (2,2) element of the 2-by-2 matrix.
 
-    SSMIN   (output) DOUBLE PRECISION
+    SSMIN   (output) REAL
             abs(SSMIN) is the smaller singular value.
 
-    SSMAX   (output) DOUBLE PRECISION
+    SSMAX   (output) REAL
             abs(SSMAX) is the larger singular value.
 
-    SNL     (output) DOUBLE PRECISION
-    CSL     (output) DOUBLE PRECISION
+    SNL     (output) REAL
+    CSL     (output) REAL
             The vector (CSL, SNL) is a unit left singular vector for the
             singular value abs(SSMAX).
 
-    SNR     (output) DOUBLE PRECISION
-    CSR     (output) DOUBLE PRECISION
+    SNR     (output) REAL
+    CSR     (output) REAL
             The vector (CSR, SNR) is a unit right singular vector for the
             singular value abs(SSMAX).
 
@@ -27111,9 +87041,9 @@ L110:
 
 
     ft = *f;
-    fa = abs(ft);
+    fa = dabs(ft);
     ht = *h__;
-    ha = abs(*h__);
+    ha = dabs(*h__);
 
 /*
        PMAX points to the maximum absolute element of matrix
@@ -27137,35 +87067,35 @@ L110:
 
     }
     gt = *g;
-    ga = abs(gt);
-    if (ga == 0.) {
+    ga = dabs(gt);
+    if (ga == 0.f) {
 
 /*        Diagonal matrix */
 
 	*ssmin = ha;
 	*ssmax = fa;
-	clt = 1.;
-	crt = 1.;
-	slt = 0.;
-	srt = 0.;
+	clt = 1.f;
+	crt = 1.f;
+	slt = 0.f;
+	srt = 0.f;
     } else {
 	gasmal = TRUE_;
 	if (ga > fa) {
 	    pmax = 2;
-	    if (fa / ga < EPSILON) {
+	    if (fa / ga < slamch_("EPS")) {
 
 /*              Case of very large GA */
 
 		gasmal = FALSE_;
 		*ssmax = ga;
-		if (ha > 1.) {
+		if (ha > 1.f) {
 		    *ssmin = fa / (ga / ha);
 		} else {
 		    *ssmin = fa / ga * ha;
 		}
-		clt = 1.;
+		clt = 1.f;
 		slt = ht / gt;
-		srt = 1.;
+		srt = 1.f;
 		crt = ft / gt;
 	    }
 	}
@@ -27178,7 +87108,7 @@ L110:
 
 /*              Copes with infinite F or H */
 
-		l = 1.;
+		l = 1.f;
 	    } else {
 		l = d__ / fa;
 	    }
@@ -27189,7 +87119,7 @@ L110:
 
 /*           Note that abs(M) .le. 1/macheps */
 
-	    t = 2. - l;
+	    t = 2.f - l;
 
 /*           Note that T .ge. 1 */
 
@@ -27199,34 +87129,34 @@ L110:
 
 /*           Note that 1 .le. S .le. 1 + 1/macheps */
 
-	    if (l == 0.) {
-		r__ = abs(m);
+	    if (l == 0.f) {
+		r__ = dabs(m);
 	    } else {
 		r__ = sqrt(l * l + mm);
 	    }
 
 /*           Note that 0 .le. R .le. 1 + 1/macheps */
 
-	    a = (s + r__) * .5;
+	    a = (s + r__) * .5f;
 
 /*           Note that 1 .le. A .le. 1 + abs(M) */
 
 	    *ssmin = ha / a;
 	    *ssmax = fa * a;
-	    if (mm == 0.) {
+	    if (mm == 0.f) {
 
 /*              Note that M is very tiny */
 
-		if (l == 0.) {
-		    t = d_sign(&c_b2804, &ft) * d_sign(&c_b15, &gt);
+		if (l == 0.f) {
+		    t = r_sign(&c_b8920, &ft) * r_sign(&c_b871, &gt);
 		} else {
-		    t = gt / d_sign(&d__, &ft) + m / t;
+		    t = gt / r_sign(&d__, &ft) + m / t;
 		}
 	    } else {
-		t = (m / (s + t) + m / (r__ + l)) * (a + 1.);
+		t = (m / (s + t) + m / (r__ + l)) * (a + 1.f);
 	    }
-	    l = sqrt(t * t + 4.);
-	    crt = 2. / l;
+	    l = sqrt(t * t + 4.f);
+	    crt = 2.f / l;
 	    srt = t / l;
 	    clt = (crt + srt * m) / a;
 	    slt = ht / ft * srt / a;
@@ -27247,33 +87177,35 @@ L110:
 /*     Correct signs of SSMAX and SSMIN */
 
     if (pmax == 1) {
-	tsign = d_sign(&c_b15, csr) * d_sign(&c_b15, csl) * d_sign(&c_b15, f);
+	tsign = r_sign(&c_b871, csr) * r_sign(&c_b871, csl) * r_sign(&c_b871,
+		f);
     }
     if (pmax == 2) {
-	tsign = d_sign(&c_b15, snr) * d_sign(&c_b15, csl) * d_sign(&c_b15, g);
+	tsign = r_sign(&c_b871, snr) * r_sign(&c_b871, csl) * r_sign(&c_b871,
+		g);
     }
     if (pmax == 3) {
-	tsign = d_sign(&c_b15, snr) * d_sign(&c_b15, snl) * d_sign(&c_b15,
+	tsign = r_sign(&c_b871, snr) * r_sign(&c_b871, snl) * r_sign(&c_b871,
 		h__);
     }
-    *ssmax = d_sign(ssmax, &tsign);
-    d__1 = tsign * d_sign(&c_b15, f) * d_sign(&c_b15, h__);
-    *ssmin = d_sign(ssmin, &d__1);
+    *ssmax = r_sign(ssmax, &tsign);
+    r__1 = tsign * r_sign(&c_b871, f) * r_sign(&c_b871, h__);
+    *ssmin = r_sign(ssmin, &r__1);
     return 0;
 
-/*     End of DLASV2 */
+/*     End of SLASV2 */
 
-} /* dlasv2_ */
+} /* slasv2_ */
 
-/* Subroutine */ int dlaswp_(integer *n, doublereal *a, integer *lda, integer
-	*k1, integer *k2, integer *ipiv, integer *incx)
+/* Subroutine */ int slaswp_(integer *n, real *a, integer *lda, integer *k1,
+	integer *k2, integer *ipiv, integer *incx)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
     static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc;
-    static doublereal temp;
+    static real temp;
 
 
 /*
@@ -27286,7 +87218,7 @@ L110:
     Purpose
     =======
 
-    DLASWP performs a series of row interchanges on the matrix A.
+    SLASWP performs a series of row interchanges on the matrix A.
     One row interchange is initiated for each of rows K1 through K2 of A.
 
     Arguments
@@ -27295,7 +87227,7 @@ L110:
     N       (input) INTEGER
             The number of columns of the matrix A.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On entry, the matrix of column dimension N to which the row
             interchanges will be applied.
             On exit, the permuted matrix.
@@ -27401,32 +87333,28 @@ L110:
 
     return 0;
 
-/*     End of DLASWP */
+/*     End of SLASWP */
 
-} /* dlaswp_ */
+} /* slaswp_ */
 
-/* Subroutine */ int dlatrd_(char *uplo, integer *n, integer *nb, doublereal *
-	a, integer *lda, doublereal *e, doublereal *tau, doublereal *w,
-	integer *ldw)
+/* Subroutine */ int slatrd_(char *uplo, integer *n, integer *nb, real *a,
+	integer *lda, real *e, real *tau, real *w, integer *ldw)
 {
     /* System generated locals */
     integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__, iw;
-    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    static doublereal alpha;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
+    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
+    static real alpha;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *), daxpy_(integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *),
-	    dsymv_(char *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *), dlarfg_(integer *, doublereal *, doublereal *, integer *,
-	     doublereal *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *), saxpy_(
+	    integer *, real *, real *, integer *, real *, integer *), ssymv_(
+	    char *, integer *, real *, real *, integer *, real *, integer *,
+	    real *, real *, integer *), slarfg_(integer *, real *,
+	    real *, integer *, real *);
 
 
 /*
@@ -27439,17 +87367,17 @@ L110:
     Purpose
     =======
 
-    DLATRD reduces NB rows and columns of a real symmetric matrix A to
+    SLATRD reduces NB rows and columns of a real symmetric matrix A to
     symmetric tridiagonal form by an orthogonal similarity
     transformation Q' * A * Q, and returns the matrices V and W which are
     needed to apply the transformation to the unreduced part of A.
 
-    If UPLO = 'U', DLATRD reduces the last NB rows and columns of a
+    If UPLO = 'U', SLATRD reduces the last NB rows and columns of a
     matrix, of which the upper triangle is supplied;
-    if UPLO = 'L', DLATRD reduces the first NB rows and columns of a
+    if UPLO = 'L', SLATRD reduces the first NB rows and columns of a
     matrix, of which the lower triangle is supplied.
 
-    This is an auxiliary routine called by DSYTRD.
+    This is an auxiliary routine called by SSYTRD.
 
     Arguments
     =========
@@ -27466,7 +87394,7 @@ L110:
     NB      (input) INTEGER
             The number of rows and columns to be reduced.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On entry, the symmetric matrix A.  If UPLO = 'U', the leading
             n-by-n upper triangular part of A contains the upper
             triangular part of the matrix A, and the strictly lower
@@ -27490,18 +87418,18 @@ L110:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= (1,N).
 
-    E       (output) DOUBLE PRECISION array, dimension (N-1)
+    E       (output) REAL array, dimension (N-1)
             If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
             elements of the last NB columns of the reduced matrix;
             if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
             the first NB columns of the reduced matrix.
 
-    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+    TAU     (output) REAL array, dimension (N-1)
             The scalar factors of the elementary reflectors, stored in
             TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
             See Further Details.
 
-    W       (output) DOUBLE PRECISION array, dimension (LDW,NB)
+    W       (output) REAL array, dimension (LDW,NB)
             The n-by-nb matrix W required to update the unreduced part
             of A.
 
@@ -27590,13 +87518,13 @@ L110:
 /*              Update A(1:i,i) */
 
 		i__2 = *n - i__;
-		dgemv_("No transpose", &i__, &i__2, &c_b151, &a[(i__ + 1) *
+		sgemv_("No transpose", &i__, &i__2, &c_b1150, &a[(i__ + 1) *
 			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
-			c_b15, &a[i__ * a_dim1 + 1], &c__1);
+			c_b871, &a[i__ * a_dim1 + 1], &c__1);
 		i__2 = *n - i__;
-		dgemv_("No transpose", &i__, &i__2, &c_b151, &w[(iw + 1) *
+		sgemv_("No transpose", &i__, &i__2, &c_b1150, &w[(iw + 1) *
 			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b15, &a[i__ * a_dim1 + 1], &c__1);
+			c_b871, &a[i__ * a_dim1 + 1], &c__1);
 	    }
 	    if (i__ > 1) {
 
@@ -27606,46 +87534,46 @@ L110:
 */
 
 		i__2 = i__ - 1;
-		dlarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 +
+		slarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 +
 			1], &c__1, &tau[i__ - 1]);
 		e[i__ - 1] = a[i__ - 1 + i__ * a_dim1];
-		a[i__ - 1 + i__ * a_dim1] = 1.;
+		a[i__ - 1 + i__ * a_dim1] = 1.f;
 
 /*              Compute W(1:i-1,i) */
 
 		i__2 = i__ - 1;
-		dsymv_("Upper", &i__2, &c_b15, &a[a_offset], lda, &a[i__ *
-			a_dim1 + 1], &c__1, &c_b29, &w[iw * w_dim1 + 1], &
+		ssymv_("Upper", &i__2, &c_b871, &a[a_offset], lda, &a[i__ *
+			a_dim1 + 1], &c__1, &c_b1101, &w[iw * w_dim1 + 1], &
 			c__1);
 		if (i__ < *n) {
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
-		    dgemv_("Transpose", &i__2, &i__3, &c_b15, &w[(iw + 1) *
+		    sgemv_("Transpose", &i__2, &i__3, &c_b871, &w[(iw + 1) *
 			    w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, &
-			    c_b29, &w[i__ + 1 + iw * w_dim1], &c__1);
+			    c_b1101, &w[i__ + 1 + iw * w_dim1], &c__1);
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
-		    dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[(i__ + 1)
-			     * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], &
-			    c__1, &c_b15, &w[iw * w_dim1 + 1], &c__1);
+		    sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[(i__ +
+			    1) * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1],
+			    &c__1, &c_b871, &w[iw * w_dim1 + 1], &c__1);
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
-		    dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[(i__ + 1) *
+		    sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[(i__ + 1) *
 			    a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, &
-			    c_b29, &w[i__ + 1 + iw * w_dim1], &c__1);
+			    c_b1101, &w[i__ + 1 + iw * w_dim1], &c__1);
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
-		    dgemv_("No transpose", &i__2, &i__3, &c_b151, &w[(iw + 1)
-			    * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
-			    c__1, &c_b15, &w[iw * w_dim1 + 1], &c__1);
+		    sgemv_("No transpose", &i__2, &i__3, &c_b1150, &w[(iw + 1)
+			     * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
+			    c__1, &c_b871, &w[iw * w_dim1 + 1], &c__1);
 		}
 		i__2 = i__ - 1;
-		dscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
+		sscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
 		i__2 = i__ - 1;
-		alpha = tau[i__ - 1] * -.5 * ddot_(&i__2, &w[iw * w_dim1 + 1],
-			 &c__1, &a[i__ * a_dim1 + 1], &c__1);
+		alpha = tau[i__ - 1] * -.5f * sdot_(&i__2, &w[iw * w_dim1 + 1]
+			, &c__1, &a[i__ * a_dim1 + 1], &c__1);
 		i__2 = i__ - 1;
-		daxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
+		saxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
 			w_dim1 + 1], &c__1);
 	    }
 
@@ -27662,14 +87590,14 @@ L110:
 
 	    i__2 = *n - i__ + 1;
 	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + a_dim1],
-		    lda, &w[i__ + w_dim1], ldw, &c_b15, &a[i__ + i__ * a_dim1]
-		    , &c__1);
+	    sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[i__ + a_dim1],
+		    lda, &w[i__ + w_dim1], ldw, &c_b871, &a[i__ + i__ *
+		    a_dim1], &c__1);
 	    i__2 = *n - i__ + 1;
 	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &w[i__ + w_dim1],
-		    ldw, &a[i__ + a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1]
-		    , &c__1);
+	    sgemv_("No transpose", &i__2, &i__3, &c_b1150, &w[i__ + w_dim1],
+		    ldw, &a[i__ + a_dim1], lda, &c_b871, &a[i__ + i__ *
+		    a_dim1], &c__1);
 	    if (i__ < *n) {
 
 /*
@@ -27680,44 +87608,45 @@ L110:
 		i__2 = *n - i__;
 /* Computing MIN */
 		i__3 = i__ + 2;
-		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) +
+		slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) +
 			i__ * a_dim1], &c__1, &tau[i__]);
 		e[i__] = a[i__ + 1 + i__ * a_dim1];
-		a[i__ + 1 + i__ * a_dim1] = 1.;
+		a[i__ + 1 + i__ * a_dim1] = 1.f;
 
 /*              Compute W(i+1:n,i) */
 
 		i__2 = *n - i__;
-		dsymv_("Lower", &i__2, &c_b15, &a[i__ + 1 + (i__ + 1) *
+		ssymv_("Lower", &i__2, &c_b871, &a[i__ + 1 + (i__ + 1) *
 			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b29, &w[i__ + 1 + i__ * w_dim1], &c__1);
+			c_b1101, &w[i__ + 1 + i__ * w_dim1], &c__1)
+			;
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b15, &w[i__ + 1 + w_dim1]
-			, ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &w[
-			i__ * w_dim1 + 1], &c__1);
+		sgemv_("Transpose", &i__2, &i__3, &c_b871, &w[i__ + 1 +
+			w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b1101, &w[i__ * w_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[i__ + 1 +
-			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b15, &w[
-			i__ + 1 + i__ * w_dim1], &c__1);
+		sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[i__ + 1 +
+			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b871, &
+			w[i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[i__ + 1 + a_dim1]
-			, lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &w[
-			i__ * w_dim1 + 1], &c__1);
+		sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[i__ + 1 +
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b1101, &w[i__ * w_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b151, &w[i__ + 1 +
-			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b15, &w[
-			i__ + 1 + i__ * w_dim1], &c__1);
+		sgemv_("No transpose", &i__2, &i__3, &c_b1150, &w[i__ + 1 +
+			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b871, &
+			w[i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
-		dscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
+		sscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
-		alpha = tau[i__] * -.5 * ddot_(&i__2, &w[i__ + 1 + i__ *
+		alpha = tau[i__] * -.5f * sdot_(&i__2, &w[i__ + 1 + i__ *
 			w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
 		i__2 = *n - i__;
-		daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
+		saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
 			i__ + 1 + i__ * w_dim1], &c__1);
 	    }
 
@@ -27727,22 +87656,356 @@ L110:
 
     return 0;
 
-/*     End of DLATRD */
+/*     End of SLATRD */
 
-} /* dlatrd_ */
+} /* slatrd_ */
 
-/* Subroutine */ int dorg2r_(integer *m, integer *n, integer *k, doublereal *
-	a, integer *lda, doublereal *tau, doublereal *work, integer *info)
+/* Subroutine */ int slauu2_(char *uplo, integer *n, real *a, integer *lda,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__;
+    static real aii;
+    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLAUU2 computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
+
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
+
+    This is the unblocked form of the algorithm, calling Level 2 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAUU2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute the product U * U'. */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    aii = a[i__ + i__ * a_dim1];
+	    if (i__ < *n) {
+		i__2 = *n - i__ + 1;
+		a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1],
+			lda, &a[i__ + i__ * a_dim1], lda);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		sgemv_("No transpose", &i__2, &i__3, &c_b871, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			aii, &a[i__ * a_dim1 + 1], &c__1);
+	    } else {
+		sscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
+	    }
+/* L10: */
+	}
+
+    } else {
+
+/*        Compute the product L' * L. */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    aii = a[i__ + i__ * a_dim1];
+	    if (i__ < *n) {
+		i__2 = *n - i__ + 1;
+		a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1], &
+			c__1, &a[i__ + i__ * a_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		sgemv_("Transpose", &i__2, &i__3, &c_b871, &a[i__ + 1 +
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii,
+			 &a[i__ + a_dim1], lda);
+	    } else {
+		sscal_(&i__, &aii, &a[i__ + a_dim1], lda);
+	    }
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of SLAUU2 */
+
+} /* slauu2_ */
+
+/* Subroutine */ int slauum_(char *uplo, integer *n, real *a, integer *lda,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, ib, nb;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int strmm_(char *, char *, char *, char *,
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), ssyrk_(char *, char *, integer
+	    *, integer *, real *, real *, integer *, real *, real *, integer *
+	    ), slauu2_(char *, integer *, real *, integer *,
+	    integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLAUUM computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
+
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
+
+    This is the blocked form of the algorithm, calling Level 3 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAUUM", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "SLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+
+    if (nb <= 1 || nb >= *n) {
+
+/*        Use unblocked code */
+
+	slauu2_(uplo, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code */
+
+	if (upper) {
+
+/*           Compute the product U * U'. */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		strmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib,
+			&c_b871, &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1
+			+ 1], lda)
+			;
+		slauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    sgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
+			    c_b871, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__
+			    + (i__ + ib) * a_dim1], lda, &c_b871, &a[i__ *
+			    a_dim1 + 1], lda);
+		    i__3 = *n - i__ - ib + 1;
+		    ssyrk_("Upper", "No transpose", &ib, &i__3, &c_b871, &a[
+			    i__ + (i__ + ib) * a_dim1], lda, &c_b871, &a[i__
+			    + i__ * a_dim1], lda);
+		}
+/* L10: */
+	    }
+	} else {
+
+/*           Compute the product L' * L. */
+
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		strmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
+			c_b871, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1],
+			 lda);
+		slauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    sgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
+			    c_b871, &a[i__ + ib + i__ * a_dim1], lda, &a[i__
+			    + ib + a_dim1], lda, &c_b871, &a[i__ + a_dim1],
+			    lda);
+		    i__3 = *n - i__ - ib + 1;
+		    ssyrk_("Lower", "Transpose", &ib, &i__3, &c_b871, &a[i__
+			    + ib + i__ * a_dim1], lda, &c_b871, &a[i__ + i__ *
+			     a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SLAUUM */
+
+} /* slauum_ */
+
+/* Subroutine */ int sorg2r_(integer *m, integer *n, integer *k, real *a,
+	integer *lda, real *tau, real *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2;
-    doublereal d__1;
+    real r__1;
 
     /* Local variables */
     static integer i__, j, l;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *), dlarf_(char *, integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    slarf_(char *, integer *, integer *, real *, integer *, real *,
+	    real *, integer *, real *), xerbla_(char *, integer *);
 
 
 /*
@@ -27755,13 +88018,13 @@ L110:
     Purpose
     =======
 
-    DORG2R generates an m by n real matrix Q with orthonormal columns,
+    SORG2R generates an m by n real matrix Q with orthonormal columns,
     which is defined as the first n columns of a product of k elementary
     reflectors of order m
 
           Q  =  H(1) H(2) . . . H(k)
 
-    as returned by DGEQRF.
+    as returned by SGEQRF.
 
     Arguments
     =========
@@ -27776,21 +88039,21 @@ L110:
             The number of elementary reflectors whose product defines the
             matrix Q. N >= K >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On entry, the i-th column must contain the vector which
             defines the elementary reflector H(i), for i = 1,2,...,k, as
-            returned by DGEQRF in the first k columns of its array
+            returned by SGEQRF in the first k columns of its array
             argument A.
             On exit, the m-by-n matrix Q.
 
     LDA     (input) INTEGER
             The first dimension of the array A. LDA >= max(1,M).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
+    TAU     (input) REAL array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEQRF.
+            reflector H(i), as returned by SGEQRF.
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+    WORK    (workspace) REAL array, dimension (N)
 
     INFO    (output) INTEGER
             = 0: successful exit
@@ -27822,7 +88085,7 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORG2R", &i__1);
+	xerbla_("SORG2R", &i__1);
 	return 0;
     }
 
@@ -27838,10 +88101,10 @@ L110:
     for (j = *k + 1; j <= i__1; ++j) {
 	i__2 = *m;
 	for (l = 1; l <= i__2; ++l) {
-	    a[l + j * a_dim1] = 0.;
+	    a[l + j * a_dim1] = 0.f;
 /* L10: */
 	}
-	a[j + j * a_dim1] = 1.;
+	a[j + j * a_dim1] = 1.f;
 /* L20: */
     }
 
@@ -27850,37 +88113,37 @@ L110:
 /*        Apply H(i) to A(i:m,i:n) from the left */
 
 	if (i__ < *n) {
-	    a[i__ + i__ * a_dim1] = 1.;
+	    a[i__ + i__ * a_dim1] = 1.f;
 	    i__1 = *m - i__ + 1;
 	    i__2 = *n - i__;
-	    dlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[
+	    slarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[
 		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
 	}
 	if (i__ < *m) {
 	    i__1 = *m - i__;
-	    d__1 = -tau[i__];
-	    dscal_(&i__1, &d__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
+	    r__1 = -tau[i__];
+	    sscal_(&i__1, &r__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
 	}
-	a[i__ + i__ * a_dim1] = 1. - tau[i__];
+	a[i__ + i__ * a_dim1] = 1.f - tau[i__];
 
 /*        Set A(1:i-1,i) to zero */
 
 	i__1 = i__ - 1;
 	for (l = 1; l <= i__1; ++l) {
-	    a[l + i__ * a_dim1] = 0.;
+	    a[l + i__ * a_dim1] = 0.f;
 /* L30: */
 	}
 /* L40: */
     }
     return 0;
 
-/*     End of DORG2R */
+/*     End of SORG2R */
 
-} /* dorg2r_ */
+} /* sorg2r_ */
 
-/* Subroutine */ int dorgbr_(char *vect, integer *m, integer *n, integer *k,
-	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
-	integer *lwork, integer *info)
+/* Subroutine */ int sorgbr_(char *vect, integer *m, integer *n, integer *k,
+	real *a, integer *lda, real *tau, real *work, integer *lwork, integer
+	*info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
@@ -27893,10 +88156,10 @@ L110:
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int dorglq_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    integer *), dorgqr_(integer *, integer *, integer *, doublereal *,
-	     integer *, doublereal *, doublereal *, integer *, integer *);
+    extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *), sorgqr_(
+	    integer *, integer *, integer *, real *, integer *, real *, real *
+	    , integer *, integer *);
     static integer lwkopt;
     static logical lquery;
 
@@ -27911,23 +88174,23 @@ L110:
     Purpose
     =======
 
-    DORGBR generates one of the real orthogonal matrices Q or P**T
-    determined by DGEBRD when reducing a real matrix A to bidiagonal
+    SORGBR generates one of the real orthogonal matrices Q or P**T
+    determined by SGEBRD when reducing a real matrix A to bidiagonal
     form: A = Q * B * P**T.  Q and P**T are defined as products of
     elementary reflectors H(i) or G(i) respectively.
 
     If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
     is of order M:
-    if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n
+    if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n
     columns of Q, where m >= n >= k;
-    if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an
+    if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an
     M-by-M matrix.
 
     If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
     is of order N:
-    if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m
+    if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m
     rows of P**T, where n >= m >= k;
-    if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as
+    if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as
     an N-by-N matrix.
 
     Arguments
@@ -27935,7 +88198,7 @@ L110:
 
     VECT    (input) CHARACTER*1
             Specifies whether the matrix Q or the matrix P**T is
-            required, as defined in the transformation applied by DGEBRD:
+            required, as defined in the transformation applied by SGEBRD:
             = 'Q':  generate Q;
             = 'P':  generate P**T.
 
@@ -27951,27 +88214,27 @@ L110:
 
     K       (input) INTEGER
             If VECT = 'Q', the number of columns in the original M-by-K
-            matrix reduced by DGEBRD.
+            matrix reduced by SGEBRD.
             If VECT = 'P', the number of rows in the original K-by-N
-            matrix reduced by DGEBRD.
+            matrix reduced by SGEBRD.
             K >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On entry, the vectors which define the elementary reflectors,
-            as returned by DGEBRD.
+            as returned by SGEBRD.
             On exit, the M-by-N matrix Q or P**T.
 
     LDA     (input) INTEGER
             The leading dimension of the array A. LDA >= max(1,M).
 
-    TAU     (input) DOUBLE PRECISION array, dimension
+    TAU     (input) REAL array, dimension
                                   (min(M,K)) if VECT = 'Q'
                                   (min(N,K)) if VECT = 'P'
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i) or G(i), which determines Q or P**T, as
-            returned by DGEBRD in its array argument TAUQ or TAUP.
+            returned by SGEBRD in its array argument TAUQ or TAUP.
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -28023,19 +88286,19 @@ L110:
 
     if (*info == 0) {
 	if (wantq) {
-	    nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
+	    nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 	} else {
-	    nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
+	    nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
 		    ftnlen)1);
 	}
 	lwkopt = max(1,mn) * nb;
-	work[1] = (doublereal) lwkopt;
+	work[1] = (real) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORGBR", &i__1);
+	xerbla_("SORGBR", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -28044,14 +88307,14 @@ L110:
 /*     Quick return if possible */
 
     if (*m == 0 || *n == 0) {
-	work[1] = 1.;
+	work[1] = 1.f;
 	return 0;
     }
 
     if (wantq) {
 
 /*
-          Form Q, determined by a call to DGEBRD to reduce an m-by-k
+          Form Q, determined by a call to SGEBRD to reduce an m-by-k
           matrix
 */
 
@@ -28059,7 +88322,7 @@ L110:
 
 /*           If m >= k, assume m >= n >= k */
 
-	    dorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+	    sorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
 		    iinfo);
 
 	} else {
@@ -28073,7 +88336,7 @@ L110:
 */
 
 	    for (j = *m; j >= 2; --j) {
-		a[j * a_dim1 + 1] = 0.;
+		a[j * a_dim1 + 1] = 0.f;
 		i__1 = *m;
 		for (i__ = j + 1; i__ <= i__1; ++i__) {
 		    a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1];
@@ -28081,10 +88344,10 @@ L110:
 		}
 /* L20: */
 	    }
-	    a[a_dim1 + 1] = 1.;
+	    a[a_dim1 + 1] = 1.f;
 	    i__1 = *m;
 	    for (i__ = 2; i__ <= i__1; ++i__) {
-		a[i__ + a_dim1] = 0.;
+		a[i__ + a_dim1] = 0.f;
 /* L30: */
 	    }
 	    if (*m > 1) {
@@ -28094,14 +88357,14 @@ L110:
 		i__1 = *m - 1;
 		i__2 = *m - 1;
 		i__3 = *m - 1;
-		dorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[
+		sorgqr_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[
 			1], &work[1], lwork, &iinfo);
 	    }
 	}
     } else {
 
 /*
-          Form P', determined by a call to DGEBRD to reduce a k-by-n
+          Form P', determined by a call to SGEBRD to reduce a k-by-n
           matrix
 */
 
@@ -28109,7 +88372,7 @@ L110:
 
 /*           If k < n, assume k <= m <= n */
 
-	    dorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+	    sorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
 		    iinfo);
 
 	} else {
@@ -28122,10 +88385,10 @@ L110:
              those of the unit matrix
 */
 
-	    a[a_dim1 + 1] = 1.;
+	    a[a_dim1 + 1] = 1.f;
 	    i__1 = *n;
 	    for (i__ = 2; i__ <= i__1; ++i__) {
-		a[i__ + a_dim1] = 0.;
+		a[i__ + a_dim1] = 0.f;
 /* L40: */
 	    }
 	    i__1 = *n;
@@ -28134,7 +88397,7 @@ L110:
 		    a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1];
 /* L50: */
 		}
-		a[j * a_dim1 + 1] = 0.;
+		a[j * a_dim1 + 1] = 0.f;
 /* L60: */
 	    }
 	    if (*n > 1) {
@@ -28144,21 +88407,20 @@ L110:
 		i__1 = *n - 1;
 		i__2 = *n - 1;
 		i__3 = *n - 1;
-		dorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[
+		sorglq_(&i__1, &i__2, &i__3, &a[(a_dim1 << 1) + 2], lda, &tau[
 			1], &work[1], lwork, &iinfo);
 	    }
 	}
     }
-    work[1] = (doublereal) lwkopt;
+    work[1] = (real) lwkopt;
     return 0;
 
-/*     End of DORGBR */
+/*     End of SORGBR */
 
-} /* dorgbr_ */
+} /* sorgbr_ */
 
-/* Subroutine */ int dorghr_(integer *n, integer *ilo, integer *ihi,
-	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
-	integer *lwork, integer *info)
+/* Subroutine */ int sorghr_(integer *n, integer *ilo, integer *ihi, real *a,
+	integer *lda, real *tau, real *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2;
@@ -28168,9 +88430,8 @@ L110:
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    integer *);
+    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *);
     static integer lwkopt;
     static logical lquery;
 
@@ -28185,9 +88446,9 @@ L110:
     Purpose
     =======
 
-    DORGHR generates a real orthogonal matrix Q which is defined as the
+    SORGHR generates a real orthogonal matrix Q which is defined as the
     product of IHI-ILO elementary reflectors of order N, as returned by
-    DGEHRD:
+    SGEHRD:
 
     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
 
@@ -28200,23 +88461,23 @@ L110:
     ILO     (input) INTEGER
     IHI     (input) INTEGER
             ILO and IHI must have the same values as in the previous call
-            of DGEHRD. Q is equal to the unit matrix except in the
+            of SGEHRD. Q is equal to the unit matrix except in the
             submatrix Q(ilo+1:ihi,ilo+1:ihi).
             1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On entry, the vectors which define the elementary reflectors,
-            as returned by DGEHRD.
+            as returned by SGEHRD.
             On exit, the N-by-N orthogonal matrix Q.
 
     LDA     (input) INTEGER
             The leading dimension of the array A. LDA >= max(1,N).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (N-1)
+    TAU     (input) REAL array, dimension (N-1)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEHRD.
+            reflector H(i), as returned by SGEHRD.
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -28263,15 +88524,15 @@ L110:
     }
 
     if (*info == 0) {
-	nb = ilaenv_(&c__1, "DORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (
+	nb = ilaenv_(&c__1, "SORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (
 		ftnlen)1);
 	lwkopt = max(1,nh) * nb;
-	work[1] = (doublereal) lwkopt;
+	work[1] = (real) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORGHR", &i__1);
+	xerbla_("SORGHR", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -28280,7 +88541,7 @@ L110:
 /*     Quick return if possible */
 
     if (*n == 0) {
-	work[1] = 1.;
+	work[1] = 1.f;
 	return 0;
     }
 
@@ -28294,7 +88555,7 @@ L110:
     for (j = *ihi; j >= i__1; --j) {
 	i__2 = j - 1;
 	for (i__ = 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = 0.;
+	    a[i__ + j * a_dim1] = 0.f;
 /* L10: */
 	}
 	i__2 = *ihi;
@@ -28304,7 +88565,7 @@ L110:
 	}
 	i__2 = *n;
 	for (i__ = *ihi + 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = 0.;
+	    a[i__ + j * a_dim1] = 0.f;
 /* L30: */
 	}
 /* L40: */
@@ -28313,20 +88574,20 @@ L110:
     for (j = 1; j <= i__1; ++j) {
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = 0.;
+	    a[i__ + j * a_dim1] = 0.f;
 /* L50: */
 	}
-	a[j + j * a_dim1] = 1.;
+	a[j + j * a_dim1] = 1.f;
 /* L60: */
     }
     i__1 = *n;
     for (j = *ihi + 1; j <= i__1; ++j) {
 	i__2 = *n;
 	for (i__ = 1; i__ <= i__2; ++i__) {
-	    a[i__ + j * a_dim1] = 0.;
+	    a[i__ + j * a_dim1] = 0.f;
 /* L70: */
 	}
-	a[j + j * a_dim1] = 1.;
+	a[j + j * a_dim1] = 1.f;
 /* L80: */
     }
 
@@ -28334,28 +88595,28 @@ L110:
 
 /*        Generate Q(ilo+1:ihi,ilo+1:ihi) */
 
-	dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*
+	sorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*
 		ilo], &work[1], lwork, &iinfo);
     }
-    work[1] = (doublereal) lwkopt;
+    work[1] = (real) lwkopt;
     return 0;
 
-/*     End of DORGHR */
+/*     End of SORGHR */
 
-} /* dorghr_ */
+} /* sorghr_ */
 
-/* Subroutine */ int dorgl2_(integer *m, integer *n, integer *k, doublereal *
-	a, integer *lda, doublereal *tau, doublereal *work, integer *info)
+/* Subroutine */ int sorgl2_(integer *m, integer *n, integer *k, real *a,
+	integer *lda, real *tau, real *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2;
-    doublereal d__1;
+    real r__1;
 
     /* Local variables */
     static integer i__, j, l;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *), dlarf_(char *, integer *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    slarf_(char *, integer *, integer *, real *, integer *, real *,
+	    real *, integer *, real *), xerbla_(char *, integer *);
 
 
 /*
@@ -28368,13 +88629,13 @@ L110:
     Purpose
     =======
 
-    DORGL2 generates an m by n real matrix Q with orthonormal rows,
+    SORGL2 generates an m by n real matrix Q with orthonormal rows,
     which is defined as the first m rows of a product of k elementary
     reflectors of order n
 
           Q  =  H(k) . . . H(2) H(1)
 
-    as returned by DGELQF.
+    as returned by SGELQF.
 
     Arguments
     =========
@@ -28389,20 +88650,20 @@ L110:
             The number of elementary reflectors whose product defines the
             matrix Q. M >= K >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On entry, the i-th row must contain the vector which defines
             the elementary reflector H(i), for i = 1,2,...,k, as returned
-            by DGELQF in the first k rows of its array argument A.
+            by SGELQF in the first k rows of its array argument A.
             On exit, the m-by-n matrix Q.
 
     LDA     (input) INTEGER
             The first dimension of the array A. LDA >= max(1,M).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
+    TAU     (input) REAL array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGELQF.
+            reflector H(i), as returned by SGELQF.
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (M)
+    WORK    (workspace) REAL array, dimension (M)
 
     INFO    (output) INTEGER
             = 0: successful exit
@@ -28434,7 +88695,7 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORGL2", &i__1);
+	xerbla_("SORGL2", &i__1);
 	return 0;
     }
 
@@ -28452,11 +88713,11 @@ L110:
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = *m;
 	    for (l = *k + 1; l <= i__2; ++l) {
-		a[l + j * a_dim1] = 0.;
+		a[l + j * a_dim1] = 0.f;
 /* L10: */
 	    }
 	    if (j > *k && j <= *m) {
-		a[j + j * a_dim1] = 1.;
+		a[j + j * a_dim1] = 1.f;
 	    }
 /* L20: */
 	}
@@ -28468,50 +88729,49 @@ L110:
 
 	if (i__ < *n) {
 	    if (i__ < *m) {
-		a[i__ + i__ * a_dim1] = 1.;
+		a[i__ + i__ * a_dim1] = 1.f;
 		i__1 = *m - i__;
 		i__2 = *n - i__ + 1;
-		dlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &
+		slarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &
 			tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
 	    }
 	    i__1 = *n - i__;
-	    d__1 = -tau[i__];
-	    dscal_(&i__1, &d__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    r__1 = -tau[i__];
+	    sscal_(&i__1, &r__1, &a[i__ + (i__ + 1) * a_dim1], lda);
 	}
-	a[i__ + i__ * a_dim1] = 1. - tau[i__];
+	a[i__ + i__ * a_dim1] = 1.f - tau[i__];
 
 /*        Set A(i,1:i-1) to zero */
 
 	i__1 = i__ - 1;
 	for (l = 1; l <= i__1; ++l) {
-	    a[i__ + l * a_dim1] = 0.;
+	    a[i__ + l * a_dim1] = 0.f;
 /* L30: */
 	}
 /* L40: */
     }
     return 0;
 
-/*     End of DORGL2 */
+/*     End of SORGL2 */
 
-} /* dorgl2_ */
+} /* sorgl2_ */
 
-/* Subroutine */ int dorglq_(integer *m, integer *n, integer *k, doublereal *
-	a, integer *lda, doublereal *tau, doublereal *work, integer *lwork,
-	integer *info)
+/* Subroutine */ int sorglq_(integer *m, integer *n, integer *k, real *a,
+	integer *lda, real *tau, real *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int dorgl2_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *),
-	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int sorgl2_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *), slarfb_(char *, char *,
+	    char *, char *, integer *, integer *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
     static integer ldwork, lwkopt;
     static logical lquery;
 
@@ -28526,13 +88786,13 @@ L110:
     Purpose
     =======
 
-    DORGLQ generates an M-by-N real matrix Q with orthonormal rows,
+    SORGLQ generates an M-by-N real matrix Q with orthonormal rows,
     which is defined as the first M rows of a product of K elementary
     reflectors of order N
 
           Q  =  H(k) . . . H(2) H(1)
 
-    as returned by DGELQF.
+    as returned by SGELQF.
 
     Arguments
     =========
@@ -28547,20 +88807,20 @@ L110:
             The number of elementary reflectors whose product defines the
             matrix Q. M >= K >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On entry, the i-th row must contain the vector which defines
             the elementary reflector H(i), for i = 1,2,...,k, as returned
-            by DGELQF in the first k rows of its array argument A.
+            by SGELQF in the first k rows of its array argument A.
             On exit, the M-by-N matrix Q.
 
     LDA     (input) INTEGER
             The first dimension of the array A. LDA >= max(1,M).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
+    TAU     (input) REAL array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGELQF.
+            reflector H(i), as returned by SGELQF.
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -28592,9 +88852,9 @@ L110:
 
     /* Function Body */
     *info = 0;
-    nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
     lwkopt = max(1,*m) * nb;
-    work[1] = (doublereal) lwkopt;
+    work[1] = (real) lwkopt;
     lquery = *lwork == -1;
     if (*m < 0) {
 	*info = -1;
@@ -28609,7 +88869,7 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORGLQ", &i__1);
+	xerbla_("SORGLQ", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -28618,7 +88878,7 @@ L110:
 /*     Quick return if possible */
 
     if (*m <= 0) {
-	work[1] = 1.;
+	work[1] = 1.f;
 	return 0;
     }
 
@@ -28632,7 +88892,7 @@ L110:
 
    Computing MAX
 */
-	i__1 = 0, i__2 = ilaenv_(&c__3, "DORGLQ", " ", m, n, k, &c_n1, (
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SORGLQ", " ", m, n, k, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = max(i__1,i__2);
 	if (nx < *k) {
@@ -28650,7 +88910,7 @@ L110:
 
 		nb = *lwork / ldwork;
 /* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "DORGLQ", " ", m, n, k, &c_n1,
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SORGLQ", " ", m, n, k, &c_n1,
 			 (ftnlen)6, (ftnlen)1);
 		nbmin = max(i__1,i__2);
 	    }
@@ -28675,7 +88935,7 @@ L110:
 	for (j = 1; j <= i__1; ++j) {
 	    i__2 = *m;
 	    for (i__ = kk + 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] = 0.;
+		a[i__ + j * a_dim1] = 0.f;
 /* L10: */
 	    }
 /* L20: */
@@ -28690,7 +88950,7 @@ L110:
 	i__1 = *m - kk;
 	i__2 = *n - kk;
 	i__3 = *k - kk;
-	dorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+	sorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
 		tau[kk + 1], &work[1], &iinfo);
     }
 
@@ -28711,14 +88971,14 @@ L110:
 */
 
 		i__2 = *n - i__ + 1;
-		dlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ *
+		slarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ *
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 
 /*              Apply H' to A(i+ib:m,i:n) from the right */
 
 		i__2 = *m - i__ - ib + 1;
 		i__3 = *n - i__ + 1;
-		dlarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, &
+		slarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, &
 			i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
 			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
 			1], &ldwork);
@@ -28727,7 +88987,7 @@ L110:
 /*           Apply H' to columns i:n of current block */
 
 	    i__2 = *n - i__ + 1;
-	    dorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+	    sorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
 		    work[1], &iinfo);
 
 /*           Set columns 1:i-1 of current block to zero */
@@ -28736,7 +88996,7 @@ L110:
 	    for (j = 1; j <= i__2; ++j) {
 		i__3 = i__ + ib - 1;
 		for (l = i__; l <= i__3; ++l) {
-		    a[l + j * a_dim1] = 0.;
+		    a[l + j * a_dim1] = 0.f;
 /* L30: */
 		}
 /* L40: */
@@ -28745,30 +89005,29 @@ L110:
 	}
     }
 
-    work[1] = (doublereal) iws;
+    work[1] = (real) iws;
     return 0;
 
-/*     End of DORGLQ */
+/*     End of SORGLQ */
 
-} /* dorglq_ */
+} /* sorglq_ */
 
-/* Subroutine */ int dorgqr_(integer *m, integer *n, integer *k, doublereal *
-	a, integer *lda, doublereal *tau, doublereal *work, integer *lwork,
-	integer *info)
+/* Subroutine */ int sorgqr_(integer *m, integer *n, integer *k, real *a,
+	integer *lda, real *tau, real *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
-    extern /* Subroutine */ int dorg2r_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *),
-	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *), slarfb_(char *, char *,
+	    char *, char *, integer *, integer *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
     static integer ldwork, lwkopt;
     static logical lquery;
 
@@ -28783,13 +89042,13 @@ L110:
     Purpose
     =======
 
-    DORGQR generates an M-by-N real matrix Q with orthonormal columns,
+    SORGQR generates an M-by-N real matrix Q with orthonormal columns,
     which is defined as the first N columns of a product of K elementary
     reflectors of order M
 
           Q  =  H(1) H(2) . . . H(k)
 
-    as returned by DGEQRF.
+    as returned by SGEQRF.
 
     Arguments
     =========
@@ -28804,21 +89063,21 @@ L110:
             The number of elementary reflectors whose product defines the
             matrix Q. N >= K >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On entry, the i-th column must contain the vector which
             defines the elementary reflector H(i), for i = 1,2,...,k, as
-            returned by DGEQRF in the first k columns of its array
+            returned by SGEQRF in the first k columns of its array
             argument A.
             On exit, the M-by-N matrix Q.
 
     LDA     (input) INTEGER
             The first dimension of the array A. LDA >= max(1,M).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
+    TAU     (input) REAL array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEQRF.
+            reflector H(i), as returned by SGEQRF.
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -28850,9 +89109,9 @@ L110:
 
     /* Function Body */
     *info = 0;
-    nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
     lwkopt = max(1,*n) * nb;
-    work[1] = (doublereal) lwkopt;
+    work[1] = (real) lwkopt;
     lquery = *lwork == -1;
     if (*m < 0) {
 	*info = -1;
@@ -28867,7 +89126,7 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORGQR", &i__1);
+	xerbla_("SORGQR", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -28876,7 +89135,7 @@ L110:
 /*     Quick return if possible */
 
     if (*n <= 0) {
-	work[1] = 1.;
+	work[1] = 1.f;
 	return 0;
     }
 
@@ -28890,7 +89149,7 @@ L110:
 
    Computing MAX
 */
-	i__1 = 0, i__2 = ilaenv_(&c__3, "DORGQR", " ", m, n, k, &c_n1, (
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SORGQR", " ", m, n, k, &c_n1, (
 		ftnlen)6, (ftnlen)1);
 	nx = max(i__1,i__2);
 	if (nx < *k) {
@@ -28908,7 +89167,7 @@ L110:
 
 		nb = *lwork / ldwork;
 /* Computing MAX */
-		i__1 = 2, i__2 = ilaenv_(&c__2, "DORGQR", " ", m, n, k, &c_n1,
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SORGQR", " ", m, n, k, &c_n1,
 			 (ftnlen)6, (ftnlen)1);
 		nbmin = max(i__1,i__2);
 	    }
@@ -28933,7 +89192,7 @@ L110:
 	for (j = kk + 1; j <= i__1; ++j) {
 	    i__2 = kk;
 	    for (i__ = 1; i__ <= i__2; ++i__) {
-		a[i__ + j * a_dim1] = 0.;
+		a[i__ + j * a_dim1] = 0.f;
 /* L10: */
 	    }
 /* L20: */
@@ -28948,7 +89207,7 @@ L110:
 	i__1 = *m - kk;
 	i__2 = *n - kk;
 	i__3 = *k - kk;
-	dorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+	sorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
 		tau[kk + 1], &work[1], &iinfo);
     }
 
@@ -28969,14 +89228,14 @@ L110:
 */
 
 		i__2 = *m - i__ + 1;
-		dlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ *
+		slarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ *
 			a_dim1], lda, &tau[i__], &work[1], &ldwork);
 
 /*              Apply H to A(i:m,i+ib:n) from the left */
 
 		i__2 = *m - i__ + 1;
 		i__3 = *n - i__ - ib + 1;
-		dlarfb_("Left", "No transpose", "Forward", "Columnwise", &
+		slarfb_("Left", "No transpose", "Forward", "Columnwise", &
 			i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
 			1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &
 			work[ib + 1], &ldwork);
@@ -28985,7 +89244,7 @@ L110:
 /*           Apply H to rows i:m of current block */
 
 	    i__2 = *m - i__ + 1;
-	    dorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+	    sorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
 		    work[1], &iinfo);
 
 /*           Set rows 1:i-1 of current block to zero */
@@ -28994,7 +89253,7 @@ L110:
 	    for (j = i__; j <= i__2; ++j) {
 		i__3 = i__ - 1;
 		for (l = 1; l <= i__3; ++l) {
-		    a[l + j * a_dim1] = 0.;
+		    a[l + j * a_dim1] = 0.f;
 /* L30: */
 		}
 /* L40: */
@@ -29003,29 +89262,28 @@ L110:
 	}
     }
 
-    work[1] = (doublereal) iws;
+    work[1] = (real) iws;
     return 0;
 
-/*     End of DORGQR */
+/*     End of SORGQR */
 
-} /* dorgqr_ */
+} /* sorgqr_ */
 
-/* Subroutine */ int dorm2l_(char *side, char *trans, integer *m, integer *n,
-	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *info)
+/* Subroutine */ int sorm2l_(char *side, char *trans, integer *m, integer *n,
+	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
 
     /* Local variables */
     static integer i__, i1, i2, i3, mi, ni, nq;
-    static doublereal aii;
+    static real aii;
     static logical left;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *);
     static logical notran;
 
 
@@ -29039,7 +89297,7 @@ L110:
     Purpose
     =======
 
-    DORM2L overwrites the general real m by n matrix C with
+    SORM2L overwrites the general real m by n matrix C with
 
           Q * C  if SIDE = 'L' and TRANS = 'N', or
 
@@ -29054,7 +89312,7 @@ L110:
 
           Q = H(k) . . . H(2) H(1)
 
-    as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n
+    as returned by SGEQLF. Q is of order m if SIDE = 'L' and of order n
     if SIDE = 'R'.
 
     Arguments
@@ -29080,10 +89338,10 @@ L110:
             If SIDE = 'L', M >= K >= 0;
             if SIDE = 'R', N >= K >= 0.
 
-    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+    A       (input) REAL array, dimension (LDA,K)
             The i-th column must contain the vector which defines the
             elementary reflector H(i), for i = 1,2,...,k, as returned by
-            DGEQLF in the last k columns of its array argument A.
+            SGEQLF in the last k columns of its array argument A.
             A is modified by the routine but restored on exit.
 
     LDA     (input) INTEGER
@@ -29091,18 +89349,18 @@ L110:
             If SIDE = 'L', LDA >= max(1,M);
             if SIDE = 'R', LDA >= max(1,N).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
+    TAU     (input) REAL array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEQLF.
+            reflector H(i), as returned by SGEQLF.
 
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+    C       (input/output) REAL array, dimension (LDC,N)
             On entry, the m by n matrix C.
             On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension
+    WORK    (workspace) REAL array, dimension
                                      (N) if SIDE = 'L',
                                      (M) if SIDE = 'R'
 
@@ -29155,7 +89413,7 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORM2L", &i__1);
+	xerbla_("SORM2L", &i__1);
 	return 0;
     }
 
@@ -29199,34 +89457,33 @@ L110:
 /*        Apply H(i) */
 
 	aii = a[nq - *k + i__ + i__ * a_dim1];
-	a[nq - *k + i__ + i__ * a_dim1] = 1.;
-	dlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[
+	a[nq - *k + i__ + i__ * a_dim1] = 1.f;
+	slarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[
 		c_offset], ldc, &work[1]);
 	a[nq - *k + i__ + i__ * a_dim1] = aii;
 /* L10: */
     }
     return 0;
 
-/*     End of DORM2L */
+/*     End of SORM2L */
 
-} /* dorm2l_ */
+} /* sorm2l_ */
 
-/* Subroutine */ int dorm2r_(char *side, char *trans, integer *m, integer *n,
-	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *info)
+/* Subroutine */ int sorm2r_(char *side, char *trans, integer *m, integer *n,
+	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
 
     /* Local variables */
     static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
-    static doublereal aii;
+    static real aii;
     static logical left;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *);
     static logical notran;
 
 
@@ -29240,7 +89497,7 @@ L110:
     Purpose
     =======
 
-    DORM2R overwrites the general real m by n matrix C with
+    SORM2R overwrites the general real m by n matrix C with
 
           Q * C  if SIDE = 'L' and TRANS = 'N', or
 
@@ -29255,7 +89512,7 @@ L110:
 
           Q = H(1) H(2) . . . H(k)
 
-    as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
+    as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n
     if SIDE = 'R'.
 
     Arguments
@@ -29281,10 +89538,10 @@ L110:
             If SIDE = 'L', M >= K >= 0;
             if SIDE = 'R', N >= K >= 0.
 
-    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+    A       (input) REAL array, dimension (LDA,K)
             The i-th column must contain the vector which defines the
             elementary reflector H(i), for i = 1,2,...,k, as returned by
-            DGEQRF in the first k columns of its array argument A.
+            SGEQRF in the first k columns of its array argument A.
             A is modified by the routine but restored on exit.
 
     LDA     (input) INTEGER
@@ -29292,18 +89549,18 @@ L110:
             If SIDE = 'L', LDA >= max(1,M);
             if SIDE = 'R', LDA >= max(1,N).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
+    TAU     (input) REAL array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEQRF.
+            reflector H(i), as returned by SGEQRF.
 
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+    C       (input/output) REAL array, dimension (LDC,N)
             On entry, the m by n matrix C.
             On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension
+    WORK    (workspace) REAL array, dimension
                                      (N) if SIDE = 'L',
                                      (M) if SIDE = 'R'
 
@@ -29356,7 +89613,7 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORM2R", &i__1);
+	xerbla_("SORM2R", &i__1);
 	return 0;
     }
 
@@ -29404,22 +89661,21 @@ L110:
 /*        Apply H(i) */
 
 	aii = a[i__ + i__ * a_dim1];
-	a[i__ + i__ * a_dim1] = 1.;
-	dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[
+	a[i__ + i__ * a_dim1] = 1.f;
+	slarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[
 		ic + jc * c_dim1], ldc, &work[1]);
 	a[i__ + i__ * a_dim1] = aii;
 /* L10: */
     }
     return 0;
 
-/*     End of DORM2R */
+/*     End of SORM2R */
 
-} /* dorm2r_ */
+} /* sorm2r_ */
 
-/* Subroutine */ int dormbr_(char *vect, char *side, char *trans, integer *m,
-	integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau,
-	doublereal *c__, integer *ldc, doublereal *work, integer *lwork,
-	integer *info)
+/* Subroutine */ int sormbr_(char *vect, char *side, char *trans, integer *m,
+	integer *n, integer *k, real *a, integer *lda, real *tau, real *c__,
+	integer *ldc, real *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     address a__1[2];
@@ -29437,17 +89693,16 @@ L110:
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, integer *);
-    static logical notran;
-    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, integer *);
-    static logical applyq;
+    static logical notran, applyq;
     static char transt[1];
+    extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
     static integer lwkopt;
     static logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
 
 
 /*
@@ -29460,19 +89715,19 @@ L110:
     Purpose
     =======
 
-    If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C
+    If VECT = 'Q', SORMBR overwrites the general real M-by-N matrix C
     with
                     SIDE = 'L'     SIDE = 'R'
     TRANS = 'N':      Q * C          C * Q
     TRANS = 'T':      Q**T * C       C * Q**T
 
-    If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C
+    If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C
     with
                     SIDE = 'L'     SIDE = 'R'
     TRANS = 'N':      P * C          C * P
     TRANS = 'T':      P**T * C       C * P**T
 
-    Here Q and P**T are the orthogonal matrices determined by DGEBRD when
+    Here Q and P**T are the orthogonal matrices determined by SGEBRD when
     reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
     P**T are defined as products of elementary reflectors H(i) and G(i)
     respectively.
@@ -29511,29 +89766,29 @@ L110:
 
     K       (input) INTEGER
             If VECT = 'Q', the number of columns in the original
-            matrix reduced by DGEBRD.
+            matrix reduced by SGEBRD.
             If VECT = 'P', the number of rows in the original
-            matrix reduced by DGEBRD.
+            matrix reduced by SGEBRD.
             K >= 0.
 
-    A       (input) DOUBLE PRECISION array, dimension
+    A       (input) REAL array, dimension
                                   (LDA,min(nq,K)) if VECT = 'Q'
                                   (LDA,nq)        if VECT = 'P'
             The vectors which define the elementary reflectors H(i) and
             G(i), whose products determine the matrices Q and P, as
-            returned by DGEBRD.
+            returned by SGEBRD.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.
             If VECT = 'Q', LDA >= max(1,nq);
             if VECT = 'P', LDA >= max(1,min(nq,K)).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (min(nq,K))
+    TAU     (input) REAL array, dimension (min(nq,K))
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i) or G(i) which determines Q or P, as returned
-            by DGEBRD in the array argument TAUQ or TAUP.
+            by SGEBRD in the array argument TAUQ or TAUP.
 
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+    C       (input/output) REAL array, dimension (LDC,N)
             On entry, the M-by-N matrix C.
             On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q
             or P*C or P**T*C or C*P or C*P**T.
@@ -29541,7 +89796,7 @@ L110:
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -29626,7 +89881,7 @@ L110:
 		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
 		i__1 = *m - 1;
 		i__2 = *m - 1;
-		nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__1, n, &i__2, &c_n1, (
+		nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__1, n, &i__2, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    } else {
 /* Writing concatenation */
@@ -29635,7 +89890,7 @@ L110:
 		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
 		i__1 = *n - 1;
 		i__2 = *n - 1;
-		nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__1, &i__2, &c_n1, (
+		nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__1, &i__2, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    }
 	} else {
@@ -29646,7 +89901,7 @@ L110:
 		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
 		i__1 = *m - 1;
 		i__2 = *m - 1;
-		nb = ilaenv_(&c__1, "DORMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
+		nb = ilaenv_(&c__1, "SORMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    } else {
 /* Writing concatenation */
@@ -29655,17 +89910,17 @@ L110:
 		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
 		i__1 = *n - 1;
 		i__2 = *n - 1;
-		nb = ilaenv_(&c__1, "DORMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
+		nb = ilaenv_(&c__1, "SORMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    }
 	}
 	lwkopt = max(1,nw) * nb;
-	work[1] = (doublereal) lwkopt;
+	work[1] = (real) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORMBR", &i__1);
+	xerbla_("SORMBR", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -29673,7 +89928,7 @@ L110:
 
 /*     Quick return if possible */
 
-    work[1] = 1.;
+    work[1] = 1.f;
     if (*m == 0 || *n == 0) {
 	return 0;
     }
@@ -29684,13 +89939,13 @@ L110:
 
 	if (nq >= *k) {
 
-/*           Q was determined by a call to DGEBRD with nq >= k */
+/*           Q was determined by a call to SGEBRD with nq >= k */
 
-	    dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+	    sormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
 		    c_offset], ldc, &work[1], lwork, &iinfo);
 	} else if (nq > 1) {
 
-/*           Q was determined by a call to DGEBRD with nq < k */
+/*           Q was determined by a call to SGEBRD with nq < k */
 
 	    if (left) {
 		mi = *m - 1;
@@ -29704,7 +89959,7 @@ L110:
 		i2 = 2;
 	    }
 	    i__1 = nq - 1;
-	    dormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1]
+	    sormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1]
 		    , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
 	}
     } else {
@@ -29718,13 +89973,13 @@ L110:
 	}
 	if (nq > *k) {
 
-/*           P was determined by a call to DGEBRD with nq > k */
+/*           P was determined by a call to SGEBRD with nq > k */
 
-	    dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+	    sormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
 		    c_offset], ldc, &work[1], lwork, &iinfo);
 	} else if (nq > 1) {
 
-/*           P was determined by a call to DGEBRD with nq <= k */
+/*           P was determined by a call to SGEBRD with nq <= k */
 
 	    if (left) {
 		mi = *m - 1;
@@ -29738,34 +89993,33 @@ L110:
 		i2 = 2;
 	    }
 	    i__1 = nq - 1;
-	    dormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda,
+	    sormlq_(side, transt, &mi, &ni, &i__1, &a[(a_dim1 << 1) + 1], lda,
 		     &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &
 		    iinfo);
 	}
     }
-    work[1] = (doublereal) lwkopt;
+    work[1] = (real) lwkopt;
     return 0;
 
-/*     End of DORMBR */
+/*     End of SORMBR */
 
-} /* dormbr_ */
+} /* sormbr_ */
 
-/* Subroutine */ int dorml2_(char *side, char *trans, integer *m, integer *n,
-	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *info)
+/* Subroutine */ int sorml2_(char *side, char *trans, integer *m, integer *n,
+	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
 
     /* Local variables */
     static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
-    static doublereal aii;
+    static real aii;
     static logical left;
-    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *);
     static logical notran;
 
 
@@ -29779,7 +90033,7 @@ L110:
     Purpose
     =======
 
-    DORML2 overwrites the general real m by n matrix C with
+    SORML2 overwrites the general real m by n matrix C with
 
           Q * C  if SIDE = 'L' and TRANS = 'N', or
 
@@ -29794,7 +90048,7 @@ L110:
 
           Q = H(k) . . . H(2) H(1)
 
-    as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n
+    as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n
     if SIDE = 'R'.
 
     Arguments
@@ -29820,29 +90074,29 @@ L110:
             If SIDE = 'L', M >= K >= 0;
             if SIDE = 'R', N >= K >= 0.
 
-    A       (input) DOUBLE PRECISION array, dimension
+    A       (input) REAL array, dimension
                                  (LDA,M) if SIDE = 'L',
                                  (LDA,N) if SIDE = 'R'
             The i-th row must contain the vector which defines the
             elementary reflector H(i), for i = 1,2,...,k, as returned by
-            DGELQF in the first k rows of its array argument A.
+            SGELQF in the first k rows of its array argument A.
             A is modified by the routine but restored on exit.
 
     LDA     (input) INTEGER
             The leading dimension of the array A. LDA >= max(1,K).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
+    TAU     (input) REAL array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGELQF.
+            reflector H(i), as returned by SGELQF.
 
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+    C       (input/output) REAL array, dimension (LDC,N)
             On entry, the m by n matrix C.
             On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension
+    WORK    (workspace) REAL array, dimension
                                      (N) if SIDE = 'L',
                                      (M) if SIDE = 'R'
 
@@ -29895,7 +90149,7 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORML2", &i__1);
+	xerbla_("SORML2", &i__1);
 	return 0;
     }
 
@@ -29943,21 +90197,21 @@ L110:
 /*        Apply H(i) */
 
 	aii = a[i__ + i__ * a_dim1];
-	a[i__ + i__ * a_dim1] = 1.;
-	dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[
+	a[i__ + i__ * a_dim1] = 1.f;
+	slarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[
 		ic + jc * c_dim1], ldc, &work[1]);
 	a[i__ + i__ * a_dim1] = aii;
 /* L10: */
     }
     return 0;
 
-/*     End of DORML2 */
+/*     End of SORML2 */
 
-} /* dorml2_ */
+} /* sorml2_ */
 
-/* Subroutine */ int dormlq_(char *side, char *trans, integer *m, integer *n,
-	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
+/* Subroutine */ int sormlq_(char *side, char *trans, integer *m, integer *n,
+	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     address a__1[2];
@@ -29970,20 +90224,20 @@ L110:
 
     /* Local variables */
     static integer i__;
-    static doublereal t[4160]	/* was [65][64] */;
+    static real t[4160]	/* was [65][64] */;
     static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
     static logical left;
     extern logical lsame_(char *, char *);
     static integer nbmin, iinfo;
-    extern /* Subroutine */ int dorml2_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *), dlarfb_(char
-	    *, char *, char *, char *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int sorml2_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *), slarfb_(char *, char *, char *, char *
+	    , integer *, integer *, integer *, real *, integer *, real *,
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
     static logical notran;
     static integer ldwork;
     static char transt[1];
@@ -30001,7 +90255,7 @@ L110:
     Purpose
     =======
 
-    DORMLQ overwrites the general real M-by-N matrix C with
+    SORMLQ overwrites the general real M-by-N matrix C with
 
                     SIDE = 'L'     SIDE = 'R'
     TRANS = 'N':      Q * C          C * Q
@@ -30012,7 +90266,7 @@ L110:
 
           Q = H(k) . . . H(2) H(1)
 
-    as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N
+    as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N
     if SIDE = 'R'.
 
     Arguments
@@ -30038,29 +90292,29 @@ L110:
             If SIDE = 'L', M >= K >= 0;
             if SIDE = 'R', N >= K >= 0.
 
-    A       (input) DOUBLE PRECISION array, dimension
+    A       (input) REAL array, dimension
                                  (LDA,M) if SIDE = 'L',
                                  (LDA,N) if SIDE = 'R'
             The i-th row must contain the vector which defines the
             elementary reflector H(i), for i = 1,2,...,k, as returned by
-            DGELQF in the first k rows of its array argument A.
+            SGELQF in the first k rows of its array argument A.
             A is modified by the routine but restored on exit.
 
     LDA     (input) INTEGER
             The leading dimension of the array A. LDA >= max(1,K).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
+    TAU     (input) REAL array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGELQF.
+            reflector H(i), as returned by SGELQF.
 
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+    C       (input/output) REAL array, dimension (LDC,N)
             On entry, the M-by-N matrix C.
             On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -30141,16 +90395,16 @@ L110:
 	i__3[0] = 1, a__1[0] = side;
 	i__3[1] = 1, a__1[1] = trans;
 	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1, (
+	i__1 = 64, i__2 = ilaenv_(&c__1, "SORMLQ", ch__1, m, n, k, &c_n1, (
 		ftnlen)6, (ftnlen)2);
 	nb = min(i__1,i__2);
 	lwkopt = max(1,nw) * nb;
-	work[1] = (doublereal) lwkopt;
+	work[1] = (real) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORMLQ", &i__1);
+	xerbla_("SORMLQ", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -30159,7 +90413,7 @@ L110:
 /*     Quick return if possible */
 
     if (*m == 0 || *n == 0 || *k == 0) {
-	work[1] = 1.;
+	work[1] = 1.f;
 	return 0;
     }
 
@@ -30176,7 +90430,7 @@ L110:
 	    i__3[0] = 1, a__1[0] = side;
 	    i__3[1] = 1, a__1[1] = trans;
 	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1, (
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "SORMLQ", ch__1, m, n, k, &c_n1, (
 		    ftnlen)6, (ftnlen)2);
 	    nbmin = max(i__1,i__2);
 	}
@@ -30188,7 +90442,7 @@ L110:
 
 /*        Use unblocked code */
 
-	dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+	sorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
 		c_offset], ldc, &work[1], &iinfo);
     } else {
 
@@ -30231,7 +90485,7 @@ L110:
 */
 
 	    i__4 = nq - i__ + 1;
-	    dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1],
+	    slarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1],
 		    lda, &tau[i__], t, &c__65);
 	    if (left) {
 
@@ -30249,22 +90503,22 @@ L110:
 
 /*           Apply H or H' */
 
-	    dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__
+	    slarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__
 		    + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1],
 		    ldc, &work[1], &ldwork);
 /* L10: */
 	}
     }
-    work[1] = (doublereal) lwkopt;
+    work[1] = (real) lwkopt;
     return 0;
 
-/*     End of DORMLQ */
+/*     End of SORMLQ */
 
-} /* dormlq_ */
+} /* sormlq_ */
 
-/* Subroutine */ int dormql_(char *side, char *trans, integer *m, integer *n,
-	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
+/* Subroutine */ int sormql_(char *side, char *trans, integer *m, integer *n,
+	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     address a__1[2];
@@ -30277,20 +90531,20 @@ L110:
 
     /* Local variables */
     static integer i__;
-    static doublereal t[4160]	/* was [65][64] */;
+    static real t[4160]	/* was [65][64] */;
     static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws;
     static logical left;
     extern logical lsame_(char *, char *);
     static integer nbmin, iinfo;
-    extern /* Subroutine */ int dorm2l_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *), dlarfb_(char
-	    *, char *, char *, char *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int sorm2l_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *), slarfb_(char *, char *, char *, char *
+	    , integer *, integer *, integer *, real *, integer *, real *,
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
     static logical notran;
     static integer ldwork, lwkopt;
     static logical lquery;
@@ -30306,7 +90560,7 @@ L110:
     Purpose
     =======
 
-    DORMQL overwrites the general real M-by-N matrix C with
+    SORMQL overwrites the general real M-by-N matrix C with
 
                     SIDE = 'L'     SIDE = 'R'
     TRANS = 'N':      Q * C          C * Q
@@ -30317,7 +90571,7 @@ L110:
 
           Q = H(k) . . . H(2) H(1)
 
-    as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N
+    as returned by SGEQLF. Q is of order M if SIDE = 'L' and of order N
     if SIDE = 'R'.
 
     Arguments
@@ -30343,10 +90597,10 @@ L110:
             If SIDE = 'L', M >= K >= 0;
             if SIDE = 'R', N >= K >= 0.
 
-    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+    A       (input) REAL array, dimension (LDA,K)
             The i-th column must contain the vector which defines the
             elementary reflector H(i), for i = 1,2,...,k, as returned by
-            DGEQLF in the last k columns of its array argument A.
+            SGEQLF in the last k columns of its array argument A.
             A is modified by the routine but restored on exit.
 
     LDA     (input) INTEGER
@@ -30354,18 +90608,18 @@ L110:
             If SIDE = 'L', LDA >= max(1,M);
             if SIDE = 'R', LDA >= max(1,N).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
+    TAU     (input) REAL array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEQLF.
+            reflector H(i), as returned by SGEQLF.
 
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+    C       (input/output) REAL array, dimension (LDC,N)
             On entry, the M-by-N matrix C.
             On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -30446,16 +90700,16 @@ L110:
 	i__3[0] = 1, a__1[0] = side;
 	i__3[1] = 1, a__1[1] = trans;
 	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQL", ch__1, m, n, k, &c_n1, (
+	i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQL", ch__1, m, n, k, &c_n1, (
 		ftnlen)6, (ftnlen)2);
 	nb = min(i__1,i__2);
 	lwkopt = max(1,nw) * nb;
-	work[1] = (doublereal) lwkopt;
+	work[1] = (real) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORMQL", &i__1);
+	xerbla_("SORMQL", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -30464,7 +90718,7 @@ L110:
 /*     Quick return if possible */
 
     if (*m == 0 || *n == 0 || *k == 0) {
-	work[1] = 1.;
+	work[1] = 1.f;
 	return 0;
     }
 
@@ -30481,7 +90735,7 @@ L110:
 	    i__3[0] = 1, a__1[0] = side;
 	    i__3[1] = 1, a__1[1] = trans;
 	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQL", ch__1, m, n, k, &c_n1, (
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQL", ch__1, m, n, k, &c_n1, (
 		    ftnlen)6, (ftnlen)2);
 	    nbmin = max(i__1,i__2);
 	}
@@ -30493,7 +90747,7 @@ L110:
 
 /*        Use unblocked code */
 
-	dorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+	sorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
 		c_offset], ldc, &work[1], &iinfo);
     } else {
 
@@ -30528,7 +90782,7 @@ L110:
 */
 
 	    i__4 = nq - *k + i__ + ib - 1;
-	    dlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1]
+	    slarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1]
 		    , lda, &tau[i__], t, &c__65);
 	    if (left) {
 
@@ -30544,22 +90798,22 @@ L110:
 
 /*           Apply H or H' */
 
-	    dlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[
+	    slarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[
 		    i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, &
 		    work[1], &ldwork);
 /* L10: */
 	}
     }
-    work[1] = (doublereal) lwkopt;
+    work[1] = (real) lwkopt;
     return 0;
 
-/*     End of DORMQL */
+/*     End of SORMQL */
 
-} /* dormql_ */
+} /* sormql_ */
 
-/* Subroutine */ int dormqr_(char *side, char *trans, integer *m, integer *n,
-	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
+/* Subroutine */ int sormqr_(char *side, char *trans, integer *m, integer *n,
+	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     address a__1[2];
@@ -30572,20 +90826,20 @@ L110:
 
     /* Local variables */
     static integer i__;
-    static doublereal t[4160]	/* was [65][64] */;
+    static real t[4160]	/* was [65][64] */;
     static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
     static logical left;
     extern logical lsame_(char *, char *);
     static integer nbmin, iinfo;
-    extern /* Subroutine */ int dorm2r_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *), dlarfb_(char
-	    *, char *, char *, char *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
-	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *), slarfb_(char *, char *, char *, char *
+	    , integer *, integer *, integer *, real *, integer *, real *,
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
     static logical notran;
     static integer ldwork, lwkopt;
     static logical lquery;
@@ -30601,7 +90855,7 @@ L110:
     Purpose
     =======
 
-    DORMQR overwrites the general real M-by-N matrix C with
+    SORMQR overwrites the general real M-by-N matrix C with
 
                     SIDE = 'L'     SIDE = 'R'
     TRANS = 'N':      Q * C          C * Q
@@ -30612,7 +90866,7 @@ L110:
 
           Q = H(1) H(2) . . . H(k)
 
-    as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
+    as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N
     if SIDE = 'R'.
 
     Arguments
@@ -30638,10 +90892,10 @@ L110:
             If SIDE = 'L', M >= K >= 0;
             if SIDE = 'R', N >= K >= 0.
 
-    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+    A       (input) REAL array, dimension (LDA,K)
             The i-th column must contain the vector which defines the
             elementary reflector H(i), for i = 1,2,...,k, as returned by
-            DGEQRF in the first k columns of its array argument A.
+            SGEQRF in the first k columns of its array argument A.
             A is modified by the routine but restored on exit.
 
     LDA     (input) INTEGER
@@ -30649,18 +90903,18 @@ L110:
             If SIDE = 'L', LDA >= max(1,M);
             if SIDE = 'R', LDA >= max(1,N).
 
-    TAU     (input) DOUBLE PRECISION array, dimension (K)
+    TAU     (input) REAL array, dimension (K)
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DGEQRF.
+            reflector H(i), as returned by SGEQRF.
 
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+    C       (input/output) REAL array, dimension (LDC,N)
             On entry, the M-by-N matrix C.
             On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -30741,16 +90995,16 @@ L110:
 	i__3[0] = 1, a__1[0] = side;
 	i__3[1] = 1, a__1[1] = trans;
 	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, (
+	i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQR", ch__1, m, n, k, &c_n1, (
 		ftnlen)6, (ftnlen)2);
 	nb = min(i__1,i__2);
 	lwkopt = max(1,nw) * nb;
-	work[1] = (doublereal) lwkopt;
+	work[1] = (real) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DORMQR", &i__1);
+	xerbla_("SORMQR", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -30759,7 +91013,7 @@ L110:
 /*     Quick return if possible */
 
     if (*m == 0 || *n == 0 || *k == 0) {
-	work[1] = 1.;
+	work[1] = 1.f;
 	return 0;
     }
 
@@ -30776,7 +91030,7 @@ L110:
 	    i__3[0] = 1, a__1[0] = side;
 	    i__3[1] = 1, a__1[1] = trans;
 	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, (
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQR", ch__1, m, n, k, &c_n1, (
 		    ftnlen)6, (ftnlen)2);
 	    nbmin = max(i__1,i__2);
 	}
@@ -30788,7 +91042,7 @@ L110:
 
 /*        Use unblocked code */
 
-	dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+	sorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
 		c_offset], ldc, &work[1], &iinfo);
     } else {
 
@@ -30825,7 +91079,7 @@ L110:
 */
 
 	    i__4 = nq - i__ + 1;
-	    dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ *
+	    slarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ *
 		    a_dim1], lda, &tau[i__], t, &c__65)
 		    ;
 	    if (left) {
@@ -30844,22 +91098,22 @@ L110:
 
 /*           Apply H or H' */
 
-	    dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
+	    slarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
 		    i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc *
 		    c_dim1], ldc, &work[1], &ldwork);
 /* L10: */
 	}
     }
-    work[1] = (doublereal) lwkopt;
+    work[1] = (real) lwkopt;
     return 0;
 
-/*     End of DORMQR */
+/*     End of SORMQR */
 
-} /* dormqr_ */
+} /* sormqr_ */
 
-/* Subroutine */ int dormtr_(char *side, char *uplo, char *trans, integer *m,
-	integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *
-	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
+/* Subroutine */ int sormtr_(char *side, char *uplo, char *trans, integer *m,
+	integer *n, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *lwork, integer *info)
 {
     /* System generated locals */
     address a__1[2];
@@ -30878,14 +91132,14 @@ L110:
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int dormql_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, integer *),
-	    dormqr_(char *, char *, integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *, integer *);
+    extern /* Subroutine */ int sormql_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
     static integer lwkopt;
     static logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
 
 
 /*
@@ -30898,7 +91152,7 @@ L110:
     Purpose
     =======
 
-    DORMTR overwrites the general real M-by-N matrix C with
+    SORMTR overwrites the general real M-by-N matrix C with
 
                     SIDE = 'L'     SIDE = 'R'
     TRANS = 'N':      Q * C          C * Q
@@ -30906,7 +91160,7 @@ L110:
 
     where Q is a real orthogonal matrix of order nq, with nq = m if
     SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
-    nq-1 elementary reflectors, as returned by DSYTRD:
+    nq-1 elementary reflectors, as returned by SSYTRD:
 
     if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
 
@@ -30921,9 +91175,9 @@ L110:
 
     UPLO    (input) CHARACTER*1
             = 'U': Upper triangle of A contains elementary reflectors
-                   from DSYTRD;
+                   from SSYTRD;
             = 'L': Lower triangle of A contains elementary reflectors
-                   from DSYTRD.
+                   from SSYTRD.
 
     TRANS   (input) CHARACTER*1
             = 'N':  No transpose, apply Q;
@@ -30935,30 +91189,30 @@ L110:
     N       (input) INTEGER
             The number of columns of the matrix C. N >= 0.
 
-    A       (input) DOUBLE PRECISION array, dimension
+    A       (input) REAL array, dimension
                                  (LDA,M) if SIDE = 'L'
                                  (LDA,N) if SIDE = 'R'
             The vectors which define the elementary reflectors, as
-            returned by DSYTRD.
+            returned by SSYTRD.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.
             LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
 
-    TAU     (input) DOUBLE PRECISION array, dimension
+    TAU     (input) REAL array, dimension
                                  (M-1) if SIDE = 'L'
                                  (N-1) if SIDE = 'R'
             TAU(i) must contain the scalar factor of the elementary
-            reflector H(i), as returned by DSYTRD.
+            reflector H(i), as returned by SSYTRD.
 
-    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+    C       (input/output) REAL array, dimension (LDC,N)
             On entry, the M-by-N matrix C.
             On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
 
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -31037,7 +91291,7 @@ L110:
 		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
 		i__2 = *m - 1;
 		i__3 = *m - 1;
-		nb = ilaenv_(&c__1, "DORMQL", ch__1, &i__2, n, &i__3, &c_n1, (
+		nb = ilaenv_(&c__1, "SORMQL", ch__1, &i__2, n, &i__3, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    } else {
 /* Writing concatenation */
@@ -31046,7 +91300,7 @@ L110:
 		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
 		i__2 = *n - 1;
 		i__3 = *n - 1;
-		nb = ilaenv_(&c__1, "DORMQL", ch__1, m, &i__2, &i__3, &c_n1, (
+		nb = ilaenv_(&c__1, "SORMQL", ch__1, m, &i__2, &i__3, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    }
 	} else {
@@ -31057,7 +91311,7 @@ L110:
 		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
 		i__2 = *m - 1;
 		i__3 = *m - 1;
-		nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__2, n, &i__3, &c_n1, (
+		nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__2, n, &i__3, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    } else {
 /* Writing concatenation */
@@ -31066,17 +91320,17 @@ L110:
 		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
 		i__2 = *n - 1;
 		i__3 = *n - 1;
-		nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__2, &i__3, &c_n1, (
+		nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__2, &i__3, &c_n1, (
 			ftnlen)6, (ftnlen)2);
 	    }
 	}
 	lwkopt = max(1,nw) * nb;
-	work[1] = (doublereal) lwkopt;
+	work[1] = (real) lwkopt;
     }
 
     if (*info != 0) {
 	i__2 = -(*info);
-	xerbla_("DORMTR", &i__2);
+	xerbla_("SORMTR", &i__2);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -31085,7 +91339,7 @@ L110:
 /*     Quick return if possible */
 
     if (*m == 0 || *n == 0 || nq == 1) {
-	work[1] = 1.;
+	work[1] = 1.f;
 	return 0;
     }
 
@@ -31099,14 +91353,14 @@ L110:
 
     if (upper) {
 
-/*        Q was determined by a call to DSYTRD with UPLO = 'U' */
+/*        Q was determined by a call to SSYTRD with UPLO = 'U' */
 
 	i__2 = nq - 1;
-	dormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, &
+	sormql_(side, trans, &mi, &ni, &i__2, &a[(a_dim1 << 1) + 1], lda, &
 		tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
     } else {
 
-/*        Q was determined by a call to DSYTRD with UPLO = 'L' */
+/*        Q was determined by a call to SSYTRD with UPLO = 'L' */
 
 	if (left) {
 	    i1 = 2;
@@ -31116,37 +91370,34 @@ L110:
 	    i2 = 2;
 	}
 	i__2 = nq - 1;
-	dormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], &
+	sormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], &
 		c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
     }
-    work[1] = (doublereal) lwkopt;
+    work[1] = (real) lwkopt;
     return 0;
 
-/*     End of DORMTR */
+/*     End of SORMTR */
 
-} /* dormtr_ */
+} /* sormtr_ */
 
-/* Subroutine */ int dpotf2_(char *uplo, integer *n, doublereal *a, integer *
-	lda, integer *info)
+/* Subroutine */ int spotf2_(char *uplo, integer *n, real *a, integer *lda,
+	integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
-    doublereal d__1;
+    real r__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
     static integer j;
-    static doublereal ajj;
-    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
+    static real ajj;
+    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *);
     static logical upper;
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
@@ -31161,7 +91412,7 @@ L110:
     Purpose
     =======
 
-    DPOTF2 computes the Cholesky factorization of a real symmetric
+    SPOTF2 computes the Cholesky factorization of a real symmetric
     positive definite matrix A.
 
     The factorization has the form
@@ -31183,7 +91434,7 @@ L110:
     N       (input) INTEGER
             The order of the matrix A.  N >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On entry, the symmetric matrix A.  If UPLO = 'U', the leading
             n by n upper triangular part of A contains the upper
             triangular part of the matrix A, and the strictly lower
@@ -31228,7 +91479,7 @@ L110:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DPOTF2", &i__1);
+	xerbla_("SPOTF2", &i__1);
 	return 0;
     }
 
@@ -31248,9 +91499,9 @@ L110:
 /*           Compute U(J,J) and test for non-positive-definiteness. */
 
 	    i__2 = j - 1;
-	    ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j * a_dim1 + 1], &c__1,
+	    ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j * a_dim1 + 1], &c__1,
 		    &a[j * a_dim1 + 1], &c__1);
-	    if (ajj <= 0.) {
+	    if (ajj <= 0.f) {
 		a[j + j * a_dim1] = ajj;
 		goto L30;
 	    }
@@ -31262,12 +91513,12 @@ L110:
 	    if (j < *n) {
 		i__2 = j - 1;
 		i__3 = *n - j;
-		dgemv_("Transpose", &i__2, &i__3, &c_b151, &a[(j + 1) *
-			a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b15, &
-			a[j + (j + 1) * a_dim1], lda);
+		sgemv_("Transpose", &i__2, &i__3, &c_b1150, &a[(j + 1) *
+			a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b871,
+			&a[j + (j + 1) * a_dim1], lda);
 		i__2 = *n - j;
-		d__1 = 1. / ajj;
-		dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda);
+		r__1 = 1.f / ajj;
+		sscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda);
 	    }
 /* L10: */
 	}
@@ -31281,9 +91532,9 @@ L110:
 /*           Compute L(J,J) and test for non-positive-definiteness. */
 
 	    i__2 = j - 1;
-	    ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j + a_dim1], lda, &a[j
+	    ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j + a_dim1], lda, &a[j
 		    + a_dim1], lda);
-	    if (ajj <= 0.) {
+	    if (ajj <= 0.f) {
 		a[j + j * a_dim1] = ajj;
 		goto L30;
 	    }
@@ -31295,12 +91546,12 @@ L110:
 	    if (j < *n) {
 		i__2 = *n - j;
 		i__3 = j - 1;
-		dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[j + 1 +
-			a_dim1], lda, &a[j + a_dim1], lda, &c_b15, &a[j + 1 +
-			j * a_dim1], &c__1);
+		sgemv_("No transpose", &i__2, &i__3, &c_b1150, &a[j + 1 +
+			a_dim1], lda, &a[j + a_dim1], lda, &c_b871, &a[j + 1
+			+ j * a_dim1], &c__1);
 		i__2 = *n - j;
-		d__1 = 1. / ajj;
-		dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+		r__1 = 1.f / ajj;
+		sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
 	    }
 /* L20: */
 	}
@@ -31313,31 +91564,29 @@ L30:
 L40:
     return 0;
 
-/*     End of DPOTF2 */
+/*     End of SPOTF2 */
 
-} /* dpotf2_ */
+} /* spotf2_ */
 
-/* Subroutine */ int dpotrf_(char *uplo, integer *n, doublereal *a, integer *
-	lda, integer *info)
+/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda,
+	integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
     static integer j, jb, nb;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
-	    integer *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *);
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
     static logical upper;
-    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
-	     integer *), dpotf2_(char *, integer *,
-	    doublereal *, integer *, integer *), xerbla_(char *,
-	    integer *);
+    extern /* Subroutine */ int strsm_(char *, char *, char *, char *,
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), ssyrk_(char *, char *, integer
+	    *, integer *, real *, real *, integer *, real *, real *, integer *
+	    ), spotf2_(char *, integer *, real *, integer *,
+	    integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
 
@@ -31352,7 +91601,7 @@ L40:
     Purpose
     =======
 
-    DPOTRF computes the Cholesky factorization of a real symmetric
+    SPOTRF computes the Cholesky factorization of a real symmetric
     positive definite matrix A.
 
     The factorization has the form
@@ -31372,7 +91621,7 @@ L40:
     N       (input) INTEGER
             The order of the matrix A.  N >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On entry, the symmetric matrix A.  If UPLO = 'U', the leading
             N-by-N upper triangular part of A contains the upper
             triangular part of the matrix A, and the strictly lower
@@ -31417,7 +91666,7 @@ L40:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DPOTRF", &i__1);
+	xerbla_("SPOTRF", &i__1);
 	return 0;
     }
 
@@ -31429,13 +91678,13 @@ L40:
 
 /*     Determine the block size for this environment. */
 
-    nb = ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+    nb = ilaenv_(&c__1, "SPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
     if (nb <= 1 || nb >= *n) {
 
 /*        Use unblocked code. */
 
-	dpotf2_(uplo, n, &a[a_offset], lda, info);
+	spotf2_(uplo, n, &a[a_offset], lda, info);
     } else {
 
 /*        Use blocked code. */
@@ -31457,9 +91706,9 @@ L40:
 		i__3 = nb, i__4 = *n - j + 1;
 		jb = min(i__3,i__4);
 		i__3 = j - 1;
-		dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b151, &a[j *
-			a_dim1 + 1], lda, &c_b15, &a[j + j * a_dim1], lda);
-		dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
+		ssyrk_("Upper", "Transpose", &jb, &i__3, &c_b1150, &a[j *
+			a_dim1 + 1], lda, &c_b871, &a[j + j * a_dim1], lda);
+		spotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
 		if (*info != 0) {
 		    goto L30;
 		}
@@ -31469,13 +91718,13 @@ L40:
 
 		    i__3 = *n - j - jb + 1;
 		    i__4 = j - 1;
-		    dgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, &
-			    c_b151, &a[j * a_dim1 + 1], lda, &a[(j + jb) *
-			    a_dim1 + 1], lda, &c_b15, &a[j + (j + jb) *
+		    sgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, &
+			    c_b1150, &a[j * a_dim1 + 1], lda, &a[(j + jb) *
+			    a_dim1 + 1], lda, &c_b871, &a[j + (j + jb) *
 			    a_dim1], lda);
 		    i__3 = *n - j - jb + 1;
-		    dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, &
-			    i__3, &c_b15, &a[j + j * a_dim1], lda, &a[j + (j
+		    strsm_("Left", "Upper", "Transpose", "Non-unit", &jb, &
+			    i__3, &c_b871, &a[j + j * a_dim1], lda, &a[j + (j
 			    + jb) * a_dim1], lda);
 		}
 /* L10: */
@@ -31498,9 +91747,9 @@ L40:
 		i__3 = nb, i__4 = *n - j + 1;
 		jb = min(i__3,i__4);
 		i__3 = j - 1;
-		dsyrk_("Lower", "No transpose", &jb, &i__3, &c_b151, &a[j +
-			a_dim1], lda, &c_b15, &a[j + j * a_dim1], lda);
-		dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
+		ssyrk_("Lower", "No transpose", &jb, &i__3, &c_b1150, &a[j +
+			a_dim1], lda, &c_b871, &a[j + j * a_dim1], lda);
+		spotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
 		if (*info != 0) {
 		    goto L30;
 		}
@@ -31510,12 +91759,12 @@ L40:
 
 		    i__3 = *n - j - jb + 1;
 		    i__4 = j - 1;
-		    dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &
-			    c_b151, &a[j + jb + a_dim1], lda, &a[j + a_dim1],
-			    lda, &c_b15, &a[j + jb + j * a_dim1], lda);
+		    sgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &
+			    c_b1150, &a[j + jb + a_dim1], lda, &a[j + a_dim1],
+			     lda, &c_b871, &a[j + jb + j * a_dim1], lda);
 		    i__3 = *n - j - jb + 1;
-		    dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, &
-			    jb, &c_b15, &a[j + j * a_dim1], lda, &a[j + jb +
+		    strsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, &
+			    jb, &c_b871, &a[j + j * a_dim1], lda, &a[j + jb +
 			    j * a_dim1], lda);
 		}
 /* L20: */
@@ -31530,17 +91779,257 @@ L30:
 L40:
     return 0;
 
-/*     End of DPOTRF */
+/*     End of SPOTRF */
 
-} /* dpotrf_ */
+} /* spotrf_ */
 
-/* Subroutine */ int dstedc_(char *compz, integer *n, doublereal *d__,
-	doublereal *e, doublereal *z__, integer *ldz, doublereal *work,
-	integer *lwork, integer *iwork, integer *liwork, integer *info)
+/* Subroutine */ int spotri_(char *uplo, integer *n, real *a, integer *lda,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), slauum_(
+	    char *, integer *, real *, integer *, integer *), strtri_(
+	    char *, char *, integer *, real *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    SPOTRI computes the inverse of a real symmetric positive definite
+    matrix A using the Cholesky factorization A = U**T*U or A = L*L**T
+    computed by SPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the triangular factor U or L from the Cholesky
+            factorization A = U**T*U or A = L*L**T, as computed by
+            SPOTRF.
+            On exit, the upper or lower triangle of the (symmetric)
+            inverse of A, overwriting the input factor U or L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the (i,i) element of the factor U or L is
+                  zero, and the inverse could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SPOTRI", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Invert the triangular Cholesky factor U or L. */
+
+    strtri_(uplo, "Non-unit", n, &a[a_offset], lda, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*     Form inv(U)*inv(U)' or inv(L)'*inv(L). */
+
+    slauum_(uplo, n, &a[a_offset], lda, info);
+
+    return 0;
+
+/*     End of SPOTRI */
+
+} /* spotri_ */
+
+/* Subroutine */ int spotrs_(char *uplo, integer *n, integer *nrhs, real *a,
+	integer *lda, real *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    static logical upper;
+    extern /* Subroutine */ int strsm_(char *, char *, char *, char *,
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    SPOTRS solves a system of linear equations A*X = B with a symmetric
+    positive definite matrix A using the Cholesky factorization
+    A = U**T*U or A = L*L**T computed by SPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The triangular factor U or L from the Cholesky factorization
+            A = U**T*U or A = L*L**T, as computed by SPOTRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    B       (input/output) REAL array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SPOTRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*
+          Solve A*X = B where A = U'*U.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b871, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b871,
+		&a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A*X = B where A = L*L'.
+
+          Solve L*X = B, overwriting B with X.
+*/
+
+	strsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b871,
+		&a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	strsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b871, &a[
+		a_offset], lda, &b[b_offset], ldb);
+    }
+
+    return 0;
+
+/*     End of SPOTRS */
+
+} /* spotrs_ */
+
+/* Subroutine */ int sstedc_(char *compz, integer *n, real *d__, real *e,
+	real *z__, integer *ldz, real *work, integer *lwork, integer *iwork,
+	integer *liwork, integer *info)
 {
     /* System generated locals */
     integer z_dim1, z_offset, i__1, i__2;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Builtin functions */
     double log(doublereal);
@@ -31549,39 +92038,36 @@ L40:
 
     /* Local variables */
     static integer i__, j, k, m;
-    static doublereal p;
+    static real p;
     static integer ii, end, lgn;
-    static doublereal eps, tiny;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
+    static real eps, tiny;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static integer lwmin;
-    extern /* Subroutine */ int dlaed0_(integer *, integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
-	     integer *, doublereal *, integer *, integer *);
-    static integer start;
-
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dlacpy_(char *, integer *, integer
-	    *, doublereal *, integer *, doublereal *, integer *),
-	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *);
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer lwmin, start;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
+	    integer *), slaed0_(integer *, integer *, integer *, real *, real
+	    *, real *, integer *, real *, integer *, real *, integer *,
+	    integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
-	     integer *), dlasrt_(char *, integer *, doublereal *, integer *);
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *,
+	    real *, integer *), slaset_(char *, integer *, integer *,
+	    real *, real *, real *, integer *);
     static integer liwmin, icompz;
-    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *);
-    static doublereal orgnrm;
+    static real orgnrm;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *),
+	     slasrt_(char *, integer *, real *, integer *);
     static logical lquery;
-    static integer smlsiz, dtrtrw, storez;
+    static integer smlsiz;
+    extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *,
+	    real *, integer *, real *, integer *);
+    static integer storez, strtrw;
 
 
 /*
@@ -31594,10 +92080,10 @@ L40:
     Purpose
     =======
 
-    DSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+    SSTEDC computes all eigenvalues and, optionally, eigenvectors of a
     symmetric tridiagonal matrix using the divide and conquer method.
     The eigenvectors of a full or band real symmetric matrix can also be
-    found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this
+    found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this
     matrix to tridiagonal form.
 
     This code makes very mild assumptions about floating point
@@ -31605,7 +92091,7 @@ L40:
     add/subtract, or on those binary machines without guard digits
     which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
     It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.  See DLAED3 for details.
+    without guard digits, but we know of none.  See SLAED3 for details.
 
     Arguments
     =========
@@ -31621,15 +92107,15 @@ L40:
     N       (input) INTEGER
             The dimension of the symmetric tridiagonal matrix.  N >= 0.
 
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
+    D       (input/output) REAL array, dimension (N)
             On entry, the diagonal elements of the tridiagonal matrix.
             On exit, if INFO = 0, the eigenvalues in ascending order.
 
-    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+    E       (input/output) REAL array, dimension (N-1)
             On entry, the subdiagonal elements of the tridiagonal matrix.
             On exit, E has been destroyed.
 
-    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+    Z       (input/output) REAL array, dimension (LDZ,N)
             On entry, if COMPZ = 'V', then Z contains the orthogonal
             matrix used in the reduction to tridiagonal form.
             On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
@@ -31642,7 +92128,7 @@ L40:
             The leading dimension of the array Z.  LDZ >= 1.
             If eigenvectors are desired, then LDZ >= max(1,N).
 
-    WORK    (workspace/output) DOUBLE PRECISION array,
+    WORK    (workspace/output) REAL array,
                                            dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
@@ -31724,7 +92210,7 @@ L40:
 	liwmin = 1;
 	lwmin = 1;
     } else {
-	lgn = (integer) (log((doublereal) (*n)) / log(2.));
+	lgn = (integer) (log((real) (*n)) / log(2.f));
 	if (pow_ii(&c__2, &lgn) < *n) {
 	    ++lgn;
 	}
@@ -31756,13 +92242,13 @@ L40:
     }
 
     if (*info == 0) {
-	work[1] = (doublereal) lwmin;
+	work[1] = (real) lwmin;
 	iwork[1] = liwmin;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DSTEDC", &i__1);
+	xerbla_("SSTEDC", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -31775,12 +92261,12 @@ L40:
     }
     if (*n == 1) {
 	if (icompz != 0) {
-	    z__[z_dim1 + 1] = 1.;
+	    z__[z_dim1 + 1] = 1.f;
 	}
 	return 0;
     }
 
-    smlsiz = ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+    smlsiz = ilaenv_(&c__9, "SSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
 	    ftnlen)6, (ftnlen)1);
 
 /*
@@ -31788,15 +92274,15 @@ L40:
        will use the Divide and Conquer routine to compute only the
        eigenvalues, which requires (3N + 3N**2) real workspace and
        (2 + 5N + 2N lg(N)) integer workspace.
-       Since on many architectures DSTERF is much faster than any other
+       Since on many architectures SSTERF is much faster than any other
        algorithm for finding eigenvalues only, it is used here
        as the default.
 
-       If COMPZ = 'N', use DSTERF to compute the eigenvalues.
+       If COMPZ = 'N', use SSTERF to compute the eigenvalues.
 */
 
     if (icompz == 0) {
-	dsterf_(n, &d__[1], &e[1], info);
+	ssterf_(n, &d__[1], &e[1], info);
 	return 0;
     }
 
@@ -31807,14 +92293,14 @@ L40:
 
     if (*n <= smlsiz) {
 	if (icompz == 0) {
-	    dsterf_(n, &d__[1], &e[1], info);
+	    ssterf_(n, &d__[1], &e[1], info);
 	    return 0;
 	} else if (icompz == 2) {
-	    dsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
+	    ssteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
 		    info);
 	    return 0;
 	} else {
-	    dsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
+	    ssteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
 		    info);
 	    return 0;
 	}
@@ -31832,17 +92318,17 @@ L40:
     }
 
     if (icompz == 2) {
-	dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz);
+	slaset_("Full", n, n, &c_b1101, &c_b871, &z__[z_offset], ldz);
     }
 
 /*     Scale. */
 
-    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.) {
+    orgnrm = slanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.f) {
 	return 0;
     }
 
-    eps = EPSILON;
+    eps = slamch_("Epsilon");
 
     start = 1;
 
@@ -31861,9 +92347,9 @@ L10:
 	end = start;
 L20:
 	if (end < *n) {
-	    tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 =
-		    d__[end + 1], abs(d__2)));
-	    if ((d__1 = e[end], abs(d__1)) > tiny) {
+	    tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 =
+		    d__[end + 1], dabs(r__2)));
+	    if ((r__1 = e[end], dabs(r__1)) > tiny) {
 		++end;
 		goto L20;
 	    }
@@ -31881,20 +92367,20 @@ L20:
 
 /*           Scale. */
 
-	    orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
-	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &m, &c__1, &d__[start]
-		    , &m, info);
+	    orgnrm = slanst_("M", &m, &d__[start], &e[start]);
+	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &m, &c__1, &d__[
+		    start], &m, info);
 	    i__1 = m - 1;
 	    i__2 = m - 1;
-	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &i__1, &c__1, &e[
+	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &i__1, &c__1, &e[
 		    start], &i__2, info);
 
 	    if (icompz == 1) {
-		dtrtrw = 1;
+		strtrw = 1;
 	    } else {
-		dtrtrw = start;
+		strtrw = start;
 	    }
-	    dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[dtrtrw +
+	    slaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw +
 		    start * z_dim1], ldz, &work[1], n, &work[storez], &iwork[
 		    1], info);
 	    if (*info != 0) {
@@ -31905,8 +92391,8 @@ L20:
 
 /*           Scale back. */
 
-	    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &m, &c__1, &d__[start]
-		    , &m, info);
+	    slascl_("G", &c__0, &c__0, &c_b871, &orgnrm, &m, &c__1, &d__[
+		    start], &m, info);
 
 	} else {
 	    if (icompz == 1) {
@@ -31917,17 +92403,17 @@ L20:
        then multiply back into Z.
 */
 
-		dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[
+		ssteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[
 			m * m + 1], info);
-		dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
+		slacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
 			storez], n);
-		dgemm_("N", "N", n, &m, &m, &c_b15, &work[storez], ldz, &work[
-			1], &m, &c_b29, &z__[start * z_dim1 + 1], ldz);
+		sgemm_("N", "N", n, &m, &m, &c_b871, &work[storez], ldz, &
+			work[1], &m, &c_b1101, &z__[start * z_dim1 + 1], ldz);
 	    } else if (icompz == 2) {
-		dsteqr_("I", &m, &d__[start], &e[start], &z__[start + start *
+		ssteqr_("I", &m, &d__[start], &e[start], &z__[start + start *
 			z_dim1], ldz, &work[1], info);
 	    } else {
-		dsterf_(&m, &d__[start], &e[start], info);
+		ssterf_(&m, &d__[start], &e[start], info);
 	    }
 	    if (*info != 0) {
 		*info = start * (*n + 1) + end;
@@ -31952,7 +92438,7 @@ L20:
 
 /*        Use Quick Sort */
 
-	    dlasrt_("I", n, &d__[1], info);
+	    slasrt_("I", n, &d__[1], info);
 
 	} else {
 
@@ -31974,7 +92460,7 @@ L20:
 		if (k != i__) {
 		    d__[k] = d__[i__];
 		    d__[i__] = p;
-		    dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1
+		    sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1
 			    + 1], &c__1);
 		}
 /* L40: */
@@ -31982,64 +92468,60 @@ L20:
 	}
     }
 
-    work[1] = (doublereal) lwmin;
+    work[1] = (real) lwmin;
     iwork[1] = liwmin;
 
     return 0;
 
-/*     End of DSTEDC */
+/*     End of SSTEDC */
 
-} /* dstedc_ */
+} /* sstedc_ */
 
-/* Subroutine */ int dsteqr_(char *compz, integer *n, doublereal *d__,
-	doublereal *e, doublereal *z__, integer *ldz, doublereal *work,
-	integer *info)
+/* Subroutine */ int ssteqr_(char *compz, integer *n, real *d__, real *e,
+	real *z__, integer *ldz, real *work, integer *info)
 {
     /* System generated locals */
     integer z_dim1, z_offset, i__1, i__2;
-    doublereal d__1, d__2;
+    real r__1, r__2;
 
     /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+    double sqrt(doublereal), r_sign(real *, real *);
 
     /* Local variables */
-    static doublereal b, c__, f, g;
+    static real b, c__, f, g;
     static integer i__, j, k, l, m;
-    static doublereal p, r__, s;
+    static real p, r__, s;
     static integer l1, ii, mm, lm1, mm1, nm1;
-    static doublereal rt1, rt2, eps;
+    static real rt1, rt2, eps;
     static integer lsv;
-    static doublereal tst, eps2;
+    static real tst, eps2;
     static integer lend, jtot;
-    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
-	    *, doublereal *, doublereal *);
+    extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *)
+	    ;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *);
-    static doublereal anorm;
-    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlaev2_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *);
+    static real anorm;
+    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *,
+	    integer *, real *, real *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer *);
     static integer lendm1, lendp1;
-
+    extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real *
+	    , real *, real *);
+    extern doublereal slapy2_(real *, real *);
     static integer iscale;
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dlaset_(char *, integer *, integer
-	    *, doublereal *, doublereal *, doublereal *, integer *);
-    static doublereal safmin;
-    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *);
-    static doublereal safmax;
+    extern doublereal slamch_(char *);
+    static real safmin;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
-	    integer *);
+    static real safmax;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *);
     static integer lendsv;
-    static doublereal ssfmin;
+    extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
+	    ), slaset_(char *, integer *, integer *, real *, real *, real *,
+	    integer *);
+    static real ssfmin;
     static integer nmaxit, icompz;
-    static doublereal ssfmax;
+    static real ssfmax;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
 
 
 /*
@@ -32052,10 +92534,10 @@ L20:
     Purpose
     =======
 
-    DSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+    SSTEQR computes all eigenvalues and, optionally, eigenvectors of a
     symmetric tridiagonal matrix using the implicit QL or QR method.
     The eigenvectors of a full or band symmetric matrix can also be found
-    if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to
+    if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to
     tridiagonal form.
 
     Arguments
@@ -32074,16 +92556,16 @@ L20:
     N       (input) INTEGER
             The order of the matrix.  N >= 0.
 
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
+    D       (input/output) REAL array, dimension (N)
             On entry, the diagonal elements of the tridiagonal matrix.
             On exit, if INFO = 0, the eigenvalues in ascending order.
 
-    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+    E       (input/output) REAL array, dimension (N-1)
             On entry, the (n-1) subdiagonal elements of the tridiagonal
             matrix.
             On exit, E has been destroyed.
 
-    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ, N)
+    Z       (input/output) REAL array, dimension (LDZ, N)
             On entry, if  COMPZ = 'V', then Z contains the orthogonal
             matrix used in the reduction to tridiagonal form.
             On exit, if INFO = 0, then if  COMPZ = 'V', Z contains the
@@ -32096,7 +92578,7 @@ L20:
             The leading dimension of the array Z.  LDZ >= 1, and if
             eigenvectors are desired, then  LDZ >= max(1,N).
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
+    WORK    (workspace) REAL array, dimension (max(1,2*N-2))
             If COMPZ = 'N', then WORK is not referenced.
 
     INFO    (output) INTEGER
@@ -32144,7 +92626,7 @@ L20:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DSTEQR", &i__1);
+	xerbla_("SSTEQR", &i__1);
 	return 0;
     }
 
@@ -32156,20 +92638,20 @@ L20:
 
     if (*n == 1) {
 	if (icompz == 2) {
-	    z__[z_dim1 + 1] = 1.;
+	    z__[z_dim1 + 1] = 1.f;
 	}
 	return 0;
     }
 
 /*     Determine the unit roundoff and over/underflow thresholds. */
 
-    eps = EPSILON;
+    eps = slamch_("E");
 /* Computing 2nd power */
-    d__1 = eps;
-    eps2 = d__1 * d__1;
-    safmin = SAFEMINIMUM;
-    safmax = 1. / safmin;
-    ssfmax = sqrt(safmax) / 3.;
+    r__1 = eps;
+    eps2 = r__1 * r__1;
+    safmin = slamch_("S");
+    safmax = 1.f / safmin;
+    ssfmax = sqrt(safmax) / 3.f;
     ssfmin = sqrt(safmin) / eps2;
 
 /*
@@ -32178,7 +92660,7 @@ L20:
 */
 
     if (icompz == 2) {
-	dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz);
+	slaset_("Full", n, n, &c_b1101, &c_b871, &z__[z_offset], ldz);
     }
 
     nmaxit = *n * 30;
@@ -32198,18 +92680,18 @@ L10:
 	goto L160;
     }
     if (l1 > 1) {
-	e[l1 - 1] = 0.;
+	e[l1 - 1] = 0.f;
     }
     if (l1 <= nm1) {
 	i__1 = nm1;
 	for (m = l1; m <= i__1; ++m) {
-	    tst = (d__1 = e[m], abs(d__1));
-	    if (tst == 0.) {
+	    tst = (r__1 = e[m], dabs(r__1));
+	    if (tst == 0.f) {
 		goto L30;
 	    }
-	    if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m
-		    + 1], abs(d__2))) * eps) {
-		e[m] = 0.;
+	    if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m
+		    + 1], dabs(r__2))) * eps) {
+		e[m] = 0.f;
 		goto L30;
 	    }
 /* L20: */
@@ -32230,32 +92712,32 @@ L30:
 /*     Scale submatrix in rows and columns L to LEND */
 
     i__1 = lend - l + 1;
-    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
+    anorm = slanst_("I", &i__1, &d__[l], &e[l]);
     iscale = 0;
-    if (anorm == 0.) {
+    if (anorm == 0.f) {
 	goto L10;
     }
     if (anorm > ssfmax) {
 	iscale = 1;
 	i__1 = lend - l + 1;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
 		info);
 	i__1 = lend - l;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
 		info);
     } else if (anorm < ssfmin) {
 	iscale = 2;
 	i__1 = lend - l + 1;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
 		info);
 	i__1 = lend - l;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
 		info);
     }
 
 /*     Choose between QL and QR iteration */
 
-    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
+    if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) {
 	lend = lsv;
 	l = lendsv;
     }
@@ -32274,10 +92756,10 @@ L40:
 	    i__1 = lendm1;
 	    for (m = l; m <= i__1; ++m) {
 /* Computing 2nd power */
-		d__2 = (d__1 = e[m], abs(d__1));
-		tst = d__2 * d__2;
-		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
-			+ 1], abs(d__2)) + safmin) {
+		r__2 = (r__1 = e[m], dabs(r__1));
+		tst = r__2 * r__2;
+		if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
+			+ 1], dabs(r__2)) + safmin) {
 		    goto L60;
 		}
 /* L50: */
@@ -32288,7 +92770,7 @@ L40:
 
 L60:
 	if (m < lend) {
-	    e[m] = 0.;
+	    e[m] = 0.f;
 	}
 	p = d__[l];
 	if (m == l) {
@@ -32296,23 +92778,23 @@ L60:
 	}
 
 /*
-          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+          If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
           to compute its eigensystem.
 */
 
 	if (m == l + 1) {
 	    if (icompz > 0) {
-		dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
+		slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
 		work[l] = c__;
 		work[*n - 1 + l] = s;
-		dlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
+		slasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
 			z__[l * z_dim1 + 1], ldz);
 	    } else {
-		dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
+		slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
 	    }
 	    d__[l] = rt1;
 	    d__[l + 1] = rt2;
-	    e[l] = 0.;
+	    e[l] = 0.f;
 	    l += 2;
 	    if (l <= lend) {
 		goto L40;
@@ -32327,13 +92809,13 @@ L60:
 
 /*        Form shift. */
 
-	g = (d__[l + 1] - p) / (e[l] * 2.);
-	r__ = dlapy2_(&g, &c_b15);
-	g = d__[m] - p + e[l] / (g + d_sign(&r__, &g));
+	g = (d__[l + 1] - p) / (e[l] * 2.f);
+	r__ = slapy2_(&g, &c_b871);
+	g = d__[m] - p + e[l] / (g + r_sign(&r__, &g));
 
-	s = 1.;
-	c__ = 1.;
-	p = 0.;
+	s = 1.f;
+	c__ = 1.f;
+	p = 0.f;
 
 /*        Inner loop */
 
@@ -32342,12 +92824,12 @@ L60:
 	for (i__ = mm1; i__ >= i__1; --i__) {
 	    f = s * e[i__];
 	    b = c__ * e[i__];
-	    dlartg_(&g, &f, &c__, &s, &r__);
+	    slartg_(&g, &f, &c__, &s, &r__);
 	    if (i__ != m - 1) {
 		e[i__ + 1] = r__;
 	    }
 	    g = d__[i__ + 1] - p;
-	    r__ = (d__[i__] - g) * s + c__ * 2. * b;
+	    r__ = (d__[i__] - g) * s + c__ * 2.f * b;
 	    p = s * r__;
 	    d__[i__ + 1] = g + p;
 	    g = c__ * r__ - b;
@@ -32366,7 +92848,7 @@ L60:
 
 	if (icompz > 0) {
 	    mm = m - l + 1;
-	    dlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
+	    slasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
 		    * z_dim1 + 1], ldz);
 	}
 
@@ -32399,10 +92881,10 @@ L90:
 	    i__1 = lendp1;
 	    for (m = l; m >= i__1; --m) {
 /* Computing 2nd power */
-		d__2 = (d__1 = e[m - 1], abs(d__1));
-		tst = d__2 * d__2;
-		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
-			- 1], abs(d__2)) + safmin) {
+		r__2 = (r__1 = e[m - 1], dabs(r__1));
+		tst = r__2 * r__2;
+		if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
+			- 1], dabs(r__2)) + safmin) {
 		    goto L110;
 		}
 /* L100: */
@@ -32413,7 +92895,7 @@ L90:
 
 L110:
 	if (m > lend) {
-	    e[m - 1] = 0.;
+	    e[m - 1] = 0.f;
 	}
 	p = d__[l];
 	if (m == l) {
@@ -32421,24 +92903,24 @@ L110:
 	}
 
 /*
-          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+          If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
           to compute its eigensystem.
 */
 
 	if (m == l - 1) {
 	    if (icompz > 0) {
-		dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
+		slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
 			;
 		work[m] = c__;
 		work[*n - 1 + m] = s;
-		dlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
+		slasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
 			z__[(l - 1) * z_dim1 + 1], ldz);
 	    } else {
-		dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
+		slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
 	    }
 	    d__[l - 1] = rt1;
 	    d__[l] = rt2;
-	    e[l - 1] = 0.;
+	    e[l - 1] = 0.f;
 	    l += -2;
 	    if (l >= lend) {
 		goto L90;
@@ -32453,13 +92935,13 @@ L110:
 
 /*        Form shift. */
 
-	g = (d__[l - 1] - p) / (e[l - 1] * 2.);
-	r__ = dlapy2_(&g, &c_b15);
-	g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g));
+	g = (d__[l - 1] - p) / (e[l - 1] * 2.f);
+	r__ = slapy2_(&g, &c_b871);
+	g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g));
 
-	s = 1.;
-	c__ = 1.;
-	p = 0.;
+	s = 1.f;
+	c__ = 1.f;
+	p = 0.f;
 
 /*        Inner loop */
 
@@ -32468,12 +92950,12 @@ L110:
 	for (i__ = m; i__ <= i__1; ++i__) {
 	    f = s * e[i__];
 	    b = c__ * e[i__];
-	    dlartg_(&g, &f, &c__, &s, &r__);
+	    slartg_(&g, &f, &c__, &s, &r__);
 	    if (i__ != m) {
 		e[i__ - 1] = r__;
 	    }
 	    g = d__[i__] - p;
-	    r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b;
+	    r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b;
 	    p = s * r__;
 	    d__[i__] = g + p;
 	    g = c__ * r__ - b;
@@ -32492,7 +92974,7 @@ L110:
 
 	if (icompz > 0) {
 	    mm = l - m + 1;
-	    dlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
+	    slasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
 		    * z_dim1 + 1], ldz);
 	}
 
@@ -32518,17 +93000,17 @@ L130:
 L140:
     if (iscale == 1) {
 	i__1 = lendsv - lsv + 1;
-	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
 		n, info);
 	i__1 = lendsv - lsv;
-	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
+	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
 		info);
     } else if (iscale == 2) {
 	i__1 = lendsv - lsv + 1;
-	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
 		n, info);
 	i__1 = lendsv - lsv;
-	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
+	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
 		info);
     }
 
@@ -32542,7 +93024,7 @@ L140:
     }
     i__1 = *n - 1;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	if (e[i__] != 0.) {
+	if (e[i__] != 0.f) {
 	    ++(*info);
 	}
 /* L150: */
@@ -32556,7 +93038,7 @@ L160:
 
 /*        Use Quick Sort */
 
-	dlasrt_("I", n, &d__[1], info);
+	slasrt_("I", n, &d__[1], info);
 
     } else {
 
@@ -32578,7 +93060,7 @@ L160:
 	    if (k != i__) {
 		d__[k] = d__[i__];
 		d__[i__] = p;
-		dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+		sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
 			 &c__1);
 	    }
 /* L180: */
@@ -32588,47 +93070,46 @@ L160:
 L190:
     return 0;
 
-/*     End of DSTEQR */
+/*     End of SSTEQR */
 
-} /* dsteqr_ */
+} /* ssteqr_ */
 
-/* Subroutine */ int dsterf_(integer *n, doublereal *d__, doublereal *e,
-	integer *info)
+/* Subroutine */ int ssterf_(integer *n, real *d__, real *e, integer *info)
 {
     /* System generated locals */
     integer i__1;
-    doublereal d__1, d__2, d__3;
+    real r__1, r__2, r__3;
 
     /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+    double sqrt(doublereal), r_sign(real *, real *);
 
     /* Local variables */
-    static doublereal c__;
+    static real c__;
     static integer i__, l, m;
-    static doublereal p, r__, s;
+    static real p, r__, s;
     static integer l1;
-    static doublereal bb, rt1, rt2, eps, rte;
+    static real bb, rt1, rt2, eps, rte;
     static integer lsv;
-    static doublereal eps2, oldc;
+    static real eps2, oldc;
     static integer lend, jtot;
-    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
-	    *, doublereal *, doublereal *);
-    static doublereal gamma, alpha, sigma, anorm;
-
+    extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *)
+	    ;
+    static real gamma, alpha, sigma, anorm;
+    extern doublereal slapy2_(real *, real *);
     static integer iscale;
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *);
-    static doublereal oldgam, safmin;
+    static real oldgam;
+    extern doublereal slamch_(char *);
+    static real safmin;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static doublereal safmax;
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
-	    integer *);
+    static real safmax;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *);
     static integer lendsv;
-    static doublereal ssfmin;
+    static real ssfmin;
     static integer nmaxit;
-    static doublereal ssfmax;
+    static real ssfmax;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
 
 
 /*
@@ -32641,7 +93122,7 @@ L190:
     Purpose
     =======
 
-    DSTERF computes all eigenvalues of a symmetric tridiagonal matrix
+    SSTERF computes all eigenvalues of a symmetric tridiagonal matrix
     using the Pal-Walker-Kahan variant of the QL or QR algorithm.
 
     Arguments
@@ -32650,11 +93131,11 @@ L190:
     N       (input) INTEGER
             The order of the matrix.  N >= 0.
 
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
+    D       (input/output) REAL array, dimension (N)
             On entry, the n diagonal elements of the tridiagonal matrix.
             On exit, if INFO = 0, the eigenvalues in ascending order.
 
-    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+    E       (input/output) REAL array, dimension (N-1)
             On entry, the (n-1) subdiagonal elements of the tridiagonal
             matrix.
             On exit, E has been destroyed.
@@ -32684,7 +93165,7 @@ L190:
     if (*n < 0) {
 	*info = -1;
 	i__1 = -(*info);
-	xerbla_("DSTERF", &i__1);
+	xerbla_("SSTERF", &i__1);
 	return 0;
     }
     if (*n <= 1) {
@@ -32693,19 +93174,19 @@ L190:
 
 /*     Determine the unit roundoff for this environment. */
 
-    eps = EPSILON;
+    eps = slamch_("E");
 /* Computing 2nd power */
-    d__1 = eps;
-    eps2 = d__1 * d__1;
-    safmin = SAFEMINIMUM;
-    safmax = 1. / safmin;
-    ssfmax = sqrt(safmax) / 3.;
+    r__1 = eps;
+    eps2 = r__1 * r__1;
+    safmin = slamch_("S");
+    safmax = 1.f / safmin;
+    ssfmax = sqrt(safmax) / 3.f;
     ssfmin = sqrt(safmin) / eps2;
 
 /*     Compute the eigenvalues of the tridiagonal matrix. */
 
     nmaxit = *n * 30;
-    sigma = 0.;
+    sigma = 0.f;
     jtot = 0;
 
 /*
@@ -32721,13 +93202,13 @@ L10:
 	goto L170;
     }
     if (l1 > 1) {
-	e[l1 - 1] = 0.;
+	e[l1 - 1] = 0.f;
     }
     i__1 = *n - 1;
     for (m = l1; m <= i__1; ++m) {
-	if ((d__3 = e[m], abs(d__3)) <= sqrt((d__1 = d__[m], abs(d__1))) *
-		sqrt((d__2 = d__[m + 1], abs(d__2))) * eps) {
-	    e[m] = 0.;
+	if ((r__3 = e[m], dabs(r__3)) <= sqrt((r__1 = d__[m], dabs(r__1))) *
+		sqrt((r__2 = d__[m + 1], dabs(r__2))) * eps) {
+	    e[m] = 0.f;
 	    goto L30;
 	}
 /* L20: */
@@ -32747,37 +93228,37 @@ L30:
 /*     Scale submatrix in rows and columns L to LEND */
 
     i__1 = lend - l + 1;
-    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
+    anorm = slanst_("I", &i__1, &d__[l], &e[l]);
     iscale = 0;
     if (anorm > ssfmax) {
 	iscale = 1;
 	i__1 = lend - l + 1;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
 		info);
 	i__1 = lend - l;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
 		info);
     } else if (anorm < ssfmin) {
 	iscale = 2;
 	i__1 = lend - l + 1;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
 		info);
 	i__1 = lend - l;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
 		info);
     }
 
     i__1 = lend - 1;
     for (i__ = l; i__ <= i__1; ++i__) {
 /* Computing 2nd power */
-	d__1 = e[i__];
-	e[i__] = d__1 * d__1;
+	r__1 = e[i__];
+	e[i__] = r__1 * r__1;
 /* L40: */
     }
 
 /*     Choose between QL and QR iteration */
 
-    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
+    if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) {
 	lend = lsv;
 	l = lendsv;
     }
@@ -32794,8 +93275,8 @@ L50:
 	if (l != lend) {
 	    i__1 = lend - 1;
 	    for (m = l; m <= i__1; ++m) {
-		if ((d__2 = e[m], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m
-			+ 1], abs(d__1))) {
+		if ((r__2 = e[m], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[
+			m + 1], dabs(r__1))) {
 		    goto L70;
 		}
 /* L60: */
@@ -32805,7 +93286,7 @@ L50:
 
 L70:
 	if (m < lend) {
-	    e[m] = 0.;
+	    e[m] = 0.f;
 	}
 	p = d__[l];
 	if (m == l) {
@@ -32813,16 +93294,16 @@ L70:
 	}
 
 /*
-          If remaining matrix is 2 by 2, use DLAE2 to compute its
+          If remaining matrix is 2 by 2, use SLAE2 to compute its
           eigenvalues.
 */
 
 	if (m == l + 1) {
 	    rte = sqrt(e[l]);
-	    dlae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2);
+	    slae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2);
 	    d__[l] = rt1;
 	    d__[l + 1] = rt2;
-	    e[l] = 0.;
+	    e[l] = 0.f;
 	    l += 2;
 	    if (l <= lend) {
 		goto L50;
@@ -32838,12 +93319,12 @@ L70:
 /*        Form shift. */
 
 	rte = sqrt(e[l]);
-	sigma = (d__[l + 1] - p) / (rte * 2.);
-	r__ = dlapy2_(&sigma, &c_b15);
-	sigma = p - rte / (sigma + d_sign(&r__, &sigma));
+	sigma = (d__[l + 1] - p) / (rte * 2.f);
+	r__ = slapy2_(&sigma, &c_b871);
+	sigma = p - rte / (sigma + r_sign(&r__, &sigma));
 
-	c__ = 1.;
-	s = 0.;
+	c__ = 1.f;
+	s = 0.f;
 	gamma = d__[m] - sigma;
 	p = gamma * gamma;
 
@@ -32863,7 +93344,7 @@ L70:
 	    alpha = d__[i__];
 	    gamma = c__ * (alpha - sigma) - s * oldgam;
 	    d__[i__ + 1] = oldgam + (alpha - gamma);
-	    if (c__ != 0.) {
+	    if (c__ != 0.f) {
 		p = gamma * gamma / c__;
 	    } else {
 		p = oldc * bb;
@@ -32897,8 +93378,8 @@ L90:
 L100:
 	i__1 = lend + 1;
 	for (m = l; m >= i__1; --m) {
-	    if ((d__2 = e[m - 1], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m
-		    - 1], abs(d__1))) {
+	    if ((r__2 = e[m - 1], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[
+		    m - 1], dabs(r__1))) {
 		goto L120;
 	    }
 /* L110: */
@@ -32907,7 +93388,7 @@ L100:
 
 L120:
 	if (m > lend) {
-	    e[m - 1] = 0.;
+	    e[m - 1] = 0.f;
 	}
 	p = d__[l];
 	if (m == l) {
@@ -32915,16 +93396,16 @@ L120:
 	}
 
 /*
-          If remaining matrix is 2 by 2, use DLAE2 to compute its
+          If remaining matrix is 2 by 2, use SLAE2 to compute its
           eigenvalues.
 */
 
 	if (m == l - 1) {
 	    rte = sqrt(e[l - 1]);
-	    dlae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2);
+	    slae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2);
 	    d__[l] = rt1;
 	    d__[l - 1] = rt2;
-	    e[l - 1] = 0.;
+	    e[l - 1] = 0.f;
 	    l += -2;
 	    if (l >= lend) {
 		goto L100;
@@ -32940,12 +93421,12 @@ L120:
 /*        Form shift. */
 
 	rte = sqrt(e[l - 1]);
-	sigma = (d__[l - 1] - p) / (rte * 2.);
-	r__ = dlapy2_(&sigma, &c_b15);
-	sigma = p - rte / (sigma + d_sign(&r__, &sigma));
+	sigma = (d__[l - 1] - p) / (rte * 2.f);
+	r__ = slapy2_(&sigma, &c_b871);
+	sigma = p - rte / (sigma + r_sign(&r__, &sigma));
 
-	c__ = 1.;
-	s = 0.;
+	c__ = 1.f;
+	s = 0.f;
 	gamma = d__[m] - sigma;
 	p = gamma * gamma;
 
@@ -32965,7 +93446,7 @@ L120:
 	    alpha = d__[i__ + 1];
 	    gamma = c__ * (alpha - sigma) - s * oldgam;
 	    d__[i__] = oldgam + (alpha - gamma);
-	    if (c__ != 0.) {
+	    if (c__ != 0.f) {
 		p = gamma * gamma / c__;
 	    } else {
 		p = oldc * bb;
@@ -32995,12 +93476,12 @@ L140:
 L150:
     if (iscale == 1) {
 	i__1 = lendsv - lsv + 1;
-	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
 		n, info);
     }
     if (iscale == 2) {
 	i__1 = lendsv - lsv + 1;
-	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
 		n, info);
     }
 
@@ -33014,7 +93495,7 @@ L150:
     }
     i__1 = *n - 1;
     for (i__ = 1; i__ <= i__1; ++i__) {
-	if (e[i__] != 0.) {
+	if (e[i__] != 0.f) {
 	    ++(*info);
 	}
 /* L160: */
@@ -33024,64 +93505,61 @@ L150:
 /*     Sort eigenvalues in increasing order. */
 
 L170:
-    dlasrt_("I", n, &d__[1], info);
+    slasrt_("I", n, &d__[1], info);
 
 L180:
     return 0;
 
-/*     End of DSTERF */
+/*     End of SSTERF */
 
-} /* dsterf_ */
+} /* ssterf_ */
 
-/* Subroutine */ int dsyevd_(char *jobz, char *uplo, integer *n, doublereal *
-	a, integer *lda, doublereal *w, doublereal *work, integer *lwork,
-	integer *iwork, integer *liwork, integer *info)
+/* Subroutine */ int ssyevd_(char *jobz, char *uplo, integer *n, real *a,
+	integer *lda, real *w, real *work, integer *lwork, integer *iwork,
+	integer *liwork, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
-    doublereal d__1;
+    real r__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
-    static doublereal eps;
+    static real eps;
     static integer inde;
-    static doublereal anrm, rmin, rmax;
+    static real anrm, rmin, rmax;
     static integer lopt;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    static doublereal sigma;
+    static real sigma;
     extern logical lsame_(char *, char *);
-    static integer iinfo, lwmin, liopt;
+    static integer iinfo;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static integer lwmin, liopt;
     static logical lower, wantz;
-    static integer indwk2, llwrk2;
-
-    static integer iscale;
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dstedc_(char *, integer *,
-	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
-	     integer *, integer *, integer *, integer *), dlacpy_(
-	    char *, integer *, integer *, doublereal *, integer *, doublereal
-	    *, integer *);
-    static doublereal safmin;
+    static integer indwk2, llwrk2, iscale;
+    extern doublereal slamch_(char *);
+    static real safmin;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static doublereal bignum;
+    static real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *);
     static integer indtau;
-    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
-	     integer *);
-    extern doublereal dlansy_(char *, char *, integer *, doublereal *,
-	    integer *, doublereal *);
+    extern /* Subroutine */ int sstedc_(char *, integer *, real *, real *,
+	    real *, integer *, real *, integer *, integer *, integer *,
+	    integer *), slacpy_(char *, integer *, integer *, real *,
+	    integer *, real *, integer *);
     static integer indwrk, liwmin;
-    extern /* Subroutine */ int dormtr_(char *, char *, char *, integer *,
-	    integer *, doublereal *, integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *, integer *), dsytrd_(char *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
-	     integer *);
+    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
+    extern doublereal slansy_(char *, char *, integer *, real *, integer *,
+	    real *);
     static integer llwork;
-    static doublereal smlnum;
+    static real smlnum;
     static logical lquery;
+    extern /* Subroutine */ int sormtr_(char *, char *, char *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *), ssytrd_(char *,
+	    integer *, real *, integer *, real *, real *, real *, real *,
+	    integer *, integer *);
 
 
 /*
@@ -33094,7 +93572,7 @@ L180:
     Purpose
     =======
 
-    DSYEVD computes all eigenvalues and, optionally, eigenvectors of a
+    SSYEVD computes all eigenvalues and, optionally, eigenvectors of a
     real symmetric matrix A. If eigenvectors are desired, it uses a
     divide and conquer algorithm.
 
@@ -33105,8 +93583,8 @@ L180:
     Cray-2. It could conceivably fail on hexadecimal or decimal machines
     without guard digits, but we know of none.
 
-    Because of large use of BLAS of level 3, DSYEVD needs N**2 more
-    workspace than DSYEVX.
+    Because of large use of BLAS of level 3, SSYEVD needs N**2 more
+    workspace than SSYEVX.
 
     Arguments
     =========
@@ -33122,7 +93600,7 @@ L180:
     N       (input) INTEGER
             The order of the matrix A.  N >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+    A       (input/output) REAL array, dimension (LDA, N)
             On entry, the symmetric matrix A.  If UPLO = 'U', the
             leading N-by-N upper triangular part of A contains the
             upper triangular part of the matrix A.  If UPLO = 'L',
@@ -33137,10 +93615,10 @@ L180:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
-    W       (output) DOUBLE PRECISION array, dimension (N)
+    W       (output) REAL array, dimension (N)
             If INFO = 0, the eigenvalues in ascending order.
 
-    WORK    (workspace/output) DOUBLE PRECISION array,
+    WORK    (workspace/output) REAL array,
                                            dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
@@ -33238,13 +93716,13 @@ L180:
     }
 
     if (*info == 0) {
-	work[1] = (doublereal) lopt;
+	work[1] = (real) lopt;
 	iwork[1] = liopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DSYEVD", &i__1);
+	xerbla_("SSYEVD", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -33259,25 +93737,25 @@ L180:
     if (*n == 1) {
 	w[1] = a[a_dim1 + 1];
 	if (wantz) {
-	    a[a_dim1 + 1] = 1.;
+	    a[a_dim1 + 1] = 1.f;
 	}
 	return 0;
     }
 
 /*     Get machine constants. */
 
-    safmin = SAFEMINIMUM;
-    eps = PRECISION;
+    safmin = slamch_("Safe minimum");
+    eps = slamch_("Precision");
     smlnum = safmin / eps;
-    bignum = 1. / smlnum;
+    bignum = 1.f / smlnum;
     rmin = sqrt(smlnum);
     rmax = sqrt(bignum);
 
 /*     Scale matrix to allowable range, if necessary. */
 
-    anrm = dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]);
+    anrm = slansy_("M", uplo, n, &a[a_offset], lda, &work[1]);
     iscale = 0;
-    if (anrm > 0. && anrm < rmin) {
+    if (anrm > 0.f && anrm < rmin) {
 	iscale = 1;
 	sigma = rmin / anrm;
     } else if (anrm > rmax) {
@@ -33285,11 +93763,11 @@ L180:
 	sigma = rmax / anrm;
     }
     if (iscale == 1) {
-	dlascl_(uplo, &c__0, &c__0, &c_b15, &sigma, n, n, &a[a_offset], lda,
+	slascl_(uplo, &c__0, &c__0, &c_b871, &sigma, n, n, &a[a_offset], lda,
 		info);
     }
 
-/*     Call DSYTRD to reduce symmetric matrix to tridiagonal form. */
+/*     Call SSYTRD to reduce symmetric matrix to tridiagonal form. */
 
     inde = 1;
     indtau = inde + *n;
@@ -33298,25 +93776,25 @@ L180:
     indwk2 = indwrk + *n * *n;
     llwrk2 = *lwork - indwk2 + 1;
 
-    dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], &
+    ssytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], &
 	    work[indwrk], &llwork, &iinfo);
-    lopt = (integer) ((*n << 1) + work[indwrk]);
+    lopt = (*n << 1) + work[indwrk];
 
 /*
-       For eigenvalues only, call DSTERF.  For eigenvectors, first call
-       DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
-       tridiagonal matrix, then call DORMTR to multiply it by the
+       For eigenvalues only, call SSTERF.  For eigenvectors, first call
+       SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+       tridiagonal matrix, then call SORMTR to multiply it by the
        Householder transformations stored in A.
 */
 
     if (! wantz) {
-	dsterf_(n, &w[1], &work[inde], info);
+	ssterf_(n, &w[1], &work[inde], info);
     } else {
-	dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], &
+	sstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], &
 		llwrk2, &iwork[1], liwork, info);
-	dormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
+	sormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
 		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
-	dlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
+	slacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
 /*
    Computing MAX
    Computing 2nd power
@@ -33329,43 +93807,39 @@ L180:
 /*     If matrix was scaled, then rescale eigenvalues appropriately. */
 
     if (iscale == 1) {
-	d__1 = 1. / sigma;
-	dscal_(n, &d__1, &w[1], &c__1);
+	r__1 = 1.f / sigma;
+	sscal_(n, &r__1, &w[1], &c__1);
     }
 
-    work[1] = (doublereal) lopt;
+    work[1] = (real) lopt;
     iwork[1] = liopt;
 
     return 0;
 
-/*     End of DSYEVD */
+/*     End of SSYEVD */
 
-} /* dsyevd_ */
+} /* ssyevd_ */
 
-/* Subroutine */ int dsytd2_(char *uplo, integer *n, doublereal *a, integer *
-	lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info)
+/* Subroutine */ int ssytd2_(char *uplo, integer *n, real *a, integer *lda,
+	real *d__, real *e, real *tau, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__;
-    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    static doublereal taui;
-    extern /* Subroutine */ int dsyr2_(char *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    static doublereal alpha;
+    static real taui;
+    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
+    extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, integer *);
+    static real alpha;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *);
     static logical upper;
-    extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *), dlarfg_(integer *, doublereal *,
-	     doublereal *, integer *, doublereal *), xerbla_(char *, integer *
-	    );
+    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *,
+	    real *, integer *), ssymv_(char *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, real *, integer *),
+	    xerbla_(char *, integer *), slarfg_(integer *, real *,
+	    real *, integer *, real *);
 
 
 /*
@@ -33378,7 +93852,7 @@ L180:
     Purpose
     =======
 
-    DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal
+    SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal
     form T by an orthogonal similarity transformation: Q' * A * Q = T.
 
     Arguments
@@ -33393,7 +93867,7 @@ L180:
     N       (input) INTEGER
             The order of the matrix A.  N >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On entry, the symmetric matrix A.  If UPLO = 'U', the leading
             n-by-n upper triangular part of A contains the upper
             triangular part of the matrix A, and the strictly lower
@@ -33415,15 +93889,15 @@ L180:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
-    D       (output) DOUBLE PRECISION array, dimension (N)
+    D       (output) REAL array, dimension (N)
             The diagonal elements of the tridiagonal matrix T:
             D(i) = A(i,i).
 
-    E       (output) DOUBLE PRECISION array, dimension (N-1)
+    E       (output) REAL array, dimension (N-1)
             The off-diagonal elements of the tridiagonal matrix T:
             E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
 
-    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+    TAU     (output) REAL array, dimension (N-1)
             The scalar factors of the elementary reflectors (see Further
             Details).
 
@@ -33500,7 +93974,7 @@ L180:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DSYTD2", &i__1);
+	xerbla_("SSYTD2", &i__1);
 	return 0;
     }
 
@@ -33521,27 +93995,26 @@ L180:
              to annihilate A(1:i-1,i+1)
 */
 
-	    dlarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1
+	    slarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1
 		    + 1], &c__1, &taui);
 	    e[i__] = a[i__ + (i__ + 1) * a_dim1];
 
-	    if (taui != 0.) {
+	    if (taui != 0.f) {
 
 /*              Apply H(i) from both sides to A(1:i,1:i) */
 
-		a[i__ + (i__ + 1) * a_dim1] = 1.;
+		a[i__ + (i__ + 1) * a_dim1] = 1.f;
 
 /*              Compute  x := tau * A * v  storing x in TAU(1:i) */
 
-		dsymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
-			a_dim1 + 1], &c__1, &c_b29, &tau[1], &c__1)
-			;
+		ssymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
+			a_dim1 + 1], &c__1, &c_b1101, &tau[1], &c__1);
 
 /*              Compute  w := x - 1/2 * tau * (x'*v) * v */
 
-		alpha = taui * -.5 * ddot_(&i__, &tau[1], &c__1, &a[(i__ + 1)
-			* a_dim1 + 1], &c__1);
-		daxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
+		alpha = taui * -.5f * sdot_(&i__, &tau[1], &c__1, &a[(i__ + 1)
+			 * a_dim1 + 1], &c__1);
+		saxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
 			1], &c__1);
 
 /*
@@ -33549,8 +94022,8 @@ L180:
                    A := A - v * w' - w * v'
 */
 
-		dsyr2_(uplo, &i__, &c_b151, &a[(i__ + 1) * a_dim1 + 1], &c__1,
-			 &tau[1], &c__1, &a[a_offset], lda);
+		ssyr2_(uplo, &i__, &c_b1150, &a[(i__ + 1) * a_dim1 + 1], &
+			c__1, &tau[1], &c__1, &a[a_offset], lda);
 
 		a[i__ + (i__ + 1) * a_dim1] = e[i__];
 	    }
@@ -33574,30 +94047,30 @@ L180:
 	    i__2 = *n - i__;
 /* Computing MIN */
 	    i__3 = i__ + 2;
-	    dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+	    slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
 		     a_dim1], &c__1, &taui);
 	    e[i__] = a[i__ + 1 + i__ * a_dim1];
 
-	    if (taui != 0.) {
+	    if (taui != 0.f) {
 
 /*              Apply H(i) from both sides to A(i+1:n,i+1:n) */
 
-		a[i__ + 1 + i__ * a_dim1] = 1.;
+		a[i__ + 1 + i__ * a_dim1] = 1.f;
 
 /*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */
 
 		i__2 = *n - i__;
-		dsymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
-			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b29, &tau[
-			i__], &c__1);
+		ssymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b1101, &
+			tau[i__], &c__1);
 
 /*              Compute  w := x - 1/2 * tau * (x'*v) * v */
 
 		i__2 = *n - i__;
-		alpha = taui * -.5 * ddot_(&i__2, &tau[i__], &c__1, &a[i__ +
+		alpha = taui * -.5f * sdot_(&i__2, &tau[i__], &c__1, &a[i__ +
 			1 + i__ * a_dim1], &c__1);
 		i__2 = *n - i__;
-		daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
 			i__], &c__1);
 
 /*
@@ -33606,7 +94079,7 @@ L180:
 */
 
 		i__2 = *n - i__;
-		dsyr2_(uplo, &i__2, &c_b151, &a[i__ + 1 + i__ * a_dim1], &
+		ssyr2_(uplo, &i__2, &c_b1150, &a[i__ + 1 + i__ * a_dim1], &
 			c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) *
 			a_dim1], lda);
 
@@ -33621,13 +94094,13 @@ L180:
 
     return 0;
 
-/*     End of DSYTD2 */
+/*     End of SSYTD2 */
 
-} /* dsytd2_ */
+} /* ssytd2_ */
 
-/* Subroutine */ int dsytrd_(char *uplo, integer *n, doublereal *a, integer *
-	lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal *
-	work, integer *lwork, integer *info)
+/* Subroutine */ int ssytrd_(char *uplo, integer *n, real *a, integer *lda,
+	real *d__, real *e, real *tau, real *work, integer *lwork, integer *
+	info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
@@ -33637,15 +94110,15 @@ L180:
     extern logical lsame_(char *, char *);
     static integer nbmin, iinfo;
     static logical upper;
-    extern /* Subroutine */ int dsytd2_(char *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, doublereal *, integer *), dsyr2k_(char *, char *, integer *, integer *, doublereal
-	    *, doublereal *, integer *, doublereal *, integer *, doublereal *,
-	     doublereal *, integer *), dlatrd_(char *,
-	    integer *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *), xerbla_(char *,
-	    integer *);
+    extern /* Subroutine */ int ssytd2_(char *, integer *, real *, integer *,
+	    real *, real *, real *, integer *), ssyr2k_(char *, char *
+	    , integer *, integer *, real *, real *, integer *, real *,
+	    integer *, real *, real *, integer *), xerbla_(
+	    char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slatrd_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, real *, integer *);
     static integer ldwork, lwkopt;
     static logical lquery;
 
@@ -33660,7 +94133,7 @@ L180:
     Purpose
     =======
 
-    DSYTRD reduces a real symmetric matrix A to real symmetric
+    SSYTRD reduces a real symmetric matrix A to real symmetric
     tridiagonal form T by an orthogonal similarity transformation:
     Q**T * A * Q = T.
 
@@ -33674,7 +94147,7 @@ L180:
     N       (input) INTEGER
             The order of the matrix A.  N >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+    A       (input/output) REAL array, dimension (LDA,N)
             On entry, the symmetric matrix A.  If UPLO = 'U', the leading
             N-by-N upper triangular part of A contains the upper
             triangular part of the matrix A, and the strictly lower
@@ -33696,19 +94169,19 @@ L180:
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
-    D       (output) DOUBLE PRECISION array, dimension (N)
+    D       (output) REAL array, dimension (N)
             The diagonal elements of the tridiagonal matrix T:
             D(i) = A(i,i).
 
-    E       (output) DOUBLE PRECISION array, dimension (N-1)
+    E       (output) REAL array, dimension (N-1)
             The off-diagonal elements of the tridiagonal matrix T:
             E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
 
-    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+    TAU     (output) REAL array, dimension (N-1)
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (LWORK)
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -33801,15 +94274,15 @@ L180:
 
 /*        Determine the block size. */
 
-	nb = ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
+	nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
 		 (ftnlen)1);
 	lwkopt = *n * nb;
-	work[1] = (doublereal) lwkopt;
+	work[1] = (real) lwkopt;
     }
 
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DSYTRD", &i__1);
+	xerbla_("SSYTRD", &i__1);
 	return 0;
     } else if (lquery) {
 	return 0;
@@ -33818,7 +94291,7 @@ L180:
 /*     Quick return if possible */
 
     if (*n == 0) {
-	work[1] = 1.;
+	work[1] = 1.f;
 	return 0;
     }
 
@@ -33832,7 +94305,7 @@ L180:
 
    Computing MAX
 */
-	i__1 = nb, i__2 = ilaenv_(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, &
+	i__1 = nb, i__2 = ilaenv_(&c__3, "SSYTRD", uplo, n, &c_n1, &c_n1, &
 		c_n1, (ftnlen)6, (ftnlen)1);
 	nx = max(i__1,i__2);
 	if (nx < *n) {
@@ -33852,7 +94325,7 @@ L180:
 */
 		i__1 = *lwork / ldwork;
 		nb = max(i__1,1);
-		nbmin = ilaenv_(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
+		nbmin = ilaenv_(&c__2, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
 			 (ftnlen)6, (ftnlen)1);
 		if (nb < nbmin) {
 		    nx = *n;
@@ -33885,7 +94358,7 @@ L180:
 */
 
 	    i__3 = i__ + nb - 1;
-	    dlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
+	    slatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
 		    work[1], &ldwork);
 
 /*
@@ -33894,9 +94367,9 @@ L180:
 */
 
 	    i__3 = i__ - 1;
-	    dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b151, &a[i__ *
-		    a_dim1 + 1], lda, &work[1], &ldwork, &c_b15, &a[a_offset],
-		     lda);
+	    ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b1150, &a[i__ *
+		    a_dim1 + 1], lda, &work[1], &ldwork, &c_b871, &a[a_offset]
+		    , lda);
 
 /*
              Copy superdiagonal elements back into A, and diagonal
@@ -33914,7 +94387,7 @@ L180:
 
 /*        Use unblocked code to reduce the last or only block */
 
-	dsytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
+	ssytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
     } else {
 
 /*        Reduce the lower triangle of A */
@@ -33930,7 +94403,7 @@ L180:
 */
 
 	    i__3 = *n - i__ + 1;
-	    dlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
+	    slatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
 		    tau[i__], &work[1], &ldwork);
 
 /*
@@ -33939,8 +94412,8 @@ L180:
 */
 
 	    i__3 = *n - i__ - nb + 1;
-	    dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b151, &a[i__ + nb +
-		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b15, &a[
+	    ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b1150, &a[i__ + nb +
+		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b871, &a[
 		    i__ + nb + (i__ + nb) * a_dim1], lda);
 
 /*
@@ -33960,71 +94433,65 @@ L180:
 /*        Use unblocked code to reduce the last or only block */
 
 	i__1 = *n - i__ + 1;
-	dsytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__],
+	ssytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__],
 		&tau[i__], &iinfo);
     }
 
-    work[1] = (doublereal) lwkopt;
+    work[1] = (real) lwkopt;
     return 0;
 
-/*     End of DSYTRD */
+/*     End of SSYTRD */
 
-} /* dsytrd_ */
+} /* ssytrd_ */
 
-/* Subroutine */ int dtrevc_(char *side, char *howmny, logical *select,
-	integer *n, doublereal *t, integer *ldt, doublereal *vl, integer *
-	ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m,
-	doublereal *work, integer *info)
+/* Subroutine */ int strevc_(char *side, char *howmny, logical *select,
+	integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr,
+	integer *ldvr, integer *mm, integer *m, real *work, integer *info)
 {
     /* System generated locals */
     integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
 	    i__2, i__3;
-    doublereal d__1, d__2, d__3, d__4;
+    real r__1, r__2, r__3, r__4;
 
     /* Builtin functions */
     double sqrt(doublereal);
 
     /* Local variables */
     static integer i__, j, k;
-    static doublereal x[4]	/* was [2][2] */;
+    static real x[4]	/* was [2][2] */;
     static integer j1, j2, n2, ii, ki, ip, is;
-    static doublereal wi, wr, rec, ulp, beta, emax;
-    static logical pair;
-    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
-    static logical allv;
+    static real wi, wr, rec, ulp, beta, emax;
+    static logical pair, allv;
     static integer ierr;
-    static doublereal unfl, ovfl, smin;
+    static real unfl, ovfl, smin;
+    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
     static logical over;
-    static doublereal vmax;
+    static real vmax;
     static integer jnxt;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
-    static doublereal scale;
+    static real scale;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
-    static doublereal remax;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static logical leftv, bothv;
-    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *,
-	    integer *, doublereal *, integer *);
-    static doublereal vcrit;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static real remax;
+    static logical leftv;
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    static logical bothv;
+    static real vcrit;
     static logical somev;
-    static doublereal xnorm;
-    extern /* Subroutine */ int dlaln2_(logical *, integer *, integer *,
-	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
-	     doublereal *, doublereal *, integer *, doublereal *, doublereal *
-	    , doublereal *, integer *, doublereal *, doublereal *, integer *),
-	     dlabad_(doublereal *, doublereal *);
-
-    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    static real xnorm;
+    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *,
+	    real *, integer *), slaln2_(logical *, integer *, integer *, real
+	    *, real *, real *, integer *, real *, real *, real *, integer *,
+	    real *, real *, real *, integer *, real *, real *, integer *),
+	    slabad_(real *, real *);
+    extern doublereal slamch_(char *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static doublereal bignum;
+    static real bignum;
+    extern integer isamax_(integer *, real *, integer *);
     static logical rightv;
-    static doublereal smlnum;
+    static real smlnum;
 
 
 /*
@@ -34037,7 +94504,7 @@ L180:
     Purpose
     =======
 
-    DTREVC computes some or all of the right and/or left eigenvectors of
+    STREVC computes some or all of the right and/or left eigenvectors of
     a real upper quasi-triangular matrix T.
 
     The right eigenvector x and the left eigenvector y of T corresponding
@@ -34054,7 +94521,7 @@ L180:
     original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
     right or left eigenvectors of A.
 
-    T must be in Schur canonical form (as returned by DHSEQR), that is,
+    T must be in Schur canonical form (as returned by SHSEQR), that is,
     block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
     2-by-2 diagonal block has its diagonal elements equal and its
     off-diagonal elements of opposite sign.  Corresponding to each 2-by-2
@@ -34092,16 +94559,16 @@ L180:
     N       (input) INTEGER
             The order of the matrix T. N >= 0.
 
-    T       (input) DOUBLE PRECISION array, dimension (LDT,N)
+    T       (input) REAL array, dimension (LDT,N)
             The upper quasi-triangular matrix T in Schur canonical form.
 
     LDT     (input) INTEGER
             The leading dimension of the array T. LDT >= max(1,N).
 
-    VL      (input/output) DOUBLE PRECISION array, dimension (LDVL,MM)
+    VL      (input/output) REAL array, dimension (LDVL,MM)
             On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
             contain an N-by-N matrix Q (usually the orthogonal matrix Q
-            of Schur vectors returned by DHSEQR).
+            of Schur vectors returned by SHSEQR).
             On exit, if SIDE = 'L' or 'B', VL contains:
             if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
                              VL has the same quasi-lower triangular form
@@ -34127,10 +94594,10 @@ L180:
             The leading dimension of the array VL.  LDVL >= max(1,N) if
             SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
 
-    VR      (input/output) DOUBLE PRECISION array, dimension (LDVR,MM)
+    VR      (input/output) REAL array, dimension (LDVR,MM)
             On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
             contain an N-by-N matrix Q (usually the orthogonal matrix Q
-            of Schur vectors returned by DHSEQR).
+            of Schur vectors returned by SHSEQR).
             On exit, if SIDE = 'R' or 'B', VR contains:
             if HOWMNY = 'A', the matrix X of right eigenvectors of T;
                              VR has the same quasi-upper triangular form
@@ -34166,7 +94633,7 @@ L180:
             Each selected real eigenvector occupies one column and each
             selected complex eigenvector occupies two columns.
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+    WORK    (workspace) REAL array, dimension (3*N)
 
     INFO    (output) INTEGER
             = 0:  successful exit
@@ -34242,7 +94709,7 @@ L180:
 		    select[j] = FALSE_;
 		} else {
 		    if (j < *n) {
-			if (t[j + 1 + j * t_dim1] == 0.) {
+			if (t[j + 1 + j * t_dim1] == 0.f) {
 			    if (select[j]) {
 				++(*m);
 			    }
@@ -34271,7 +94738,7 @@ L180:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("DTREVC", &i__1);
+	xerbla_("STREVC", &i__1);
 	return 0;
     }
 
@@ -34283,25 +94750,25 @@ L180:
 
 /*     Set the constants to control overflow. */
 
-    unfl = SAFEMINIMUM;
-    ovfl = 1. / unfl;
-    dlabad_(&unfl, &ovfl);
-    ulp = PRECISION;
+    unfl = slamch_("Safe minimum");
+    ovfl = 1.f / unfl;
+    slabad_(&unfl, &ovfl);
+    ulp = slamch_("Precision");
     smlnum = unfl * (*n / ulp);
-    bignum = (1. - ulp) / smlnum;
+    bignum = (1.f - ulp) / smlnum;
 
 /*
        Compute 1-norm of each column of strictly upper triangular
        part of T to control overflow in triangular solver.
 */
 
-    work[1] = 0.;
+    work[1] = 0.f;
     i__1 = *n;
     for (j = 2; j <= i__1; ++j) {
-	work[j] = 0.;
+	work[j] = 0.f;
 	i__2 = j - 1;
 	for (i__ = 1; i__ <= i__2; ++i__) {
-	    work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1));
+	    work[j] += (r__1 = t[i__ + j * t_dim1], dabs(r__1));
 /* L20: */
 	}
 /* L30: */
@@ -34330,7 +94797,7 @@ L180:
 	    if (ki == 1) {
 		goto L40;
 	    }
-	    if (t[ki + (ki - 1) * t_dim1] == 0.) {
+	    if (t[ki + (ki - 1) * t_dim1] == 0.f) {
 		goto L40;
 	    }
 	    ip = -1;
@@ -34351,20 +94818,20 @@ L40:
 /*           Compute the KI-th eigenvalue (WR,WI). */
 
 	    wr = t[ki + ki * t_dim1];
-	    wi = 0.;
+	    wi = 0.f;
 	    if (ip != 0) {
-		wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) *
-			sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2)));
+		wi = sqrt((r__1 = t[ki + (ki - 1) * t_dim1], dabs(r__1))) *
+			sqrt((r__2 = t[ki - 1 + ki * t_dim1], dabs(r__2)));
 	    }
 /* Computing MAX */
-	    d__1 = ulp * (abs(wr) + abs(wi));
-	    smin = max(d__1,smlnum);
+	    r__1 = ulp * (dabs(wr) + dabs(wi));
+	    smin = dmax(r__1,smlnum);
 
 	    if (ip == 0) {
 
 /*              Real right eigenvector */
 
-		work[ki + *n] = 1.;
+		work[ki + *n] = 1.f;
 
 /*              Form right-hand side */
 
@@ -34384,1606 +94851,1209 @@ L40:
 		    if (j > jnxt) {
 			goto L60;
 		    }
-		    j1 = j;
-		    j2 = j;
-		    jnxt = j - 1;
-		    if (j > 1) {
-			if (t[j + (j - 1) * t_dim1] != 0.) {
-			    j1 = j - 1;
-			    jnxt = j - 2;
-			}
-		    }
-
-		    if (j1 == j2) {
-
-/*                    1-by-1 diagonal block */
-
-			dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b15, &t[j +
-				j * t_dim1], ldt, &c_b15, &c_b15, &work[j + *
-				n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm,
-				&ierr);
-
-/*
-                      Scale X(1,1) to avoid overflow when updating
-                      the right-hand side.
-*/
-
-			if (xnorm > 1.) {
-			    if (work[j] > bignum / xnorm) {
-				x[0] /= xnorm;
-				scale /= xnorm;
-			    }
-			}
-
-/*                    Scale if necessary */
-
-			if (scale != 1.) {
-			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
-			}
-			work[j + *n] = x[0];
-
-/*                    Update right-hand side */
-
-			i__1 = j - 1;
-			d__1 = -x[0];
-			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
-				*n + 1], &c__1);
-
-		    } else {
-
-/*                    2-by-2 diagonal block */
-
-			dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b15, &t[j -
-				1 + (j - 1) * t_dim1], ldt, &c_b15, &c_b15, &
-				work[j - 1 + *n], n, &wr, &c_b29, x, &c__2, &
-				scale, &xnorm, &ierr);
-
-/*
-                      Scale X(1,1) and X(2,1) to avoid overflow when
-                      updating the right-hand side.
-*/
-
-			if (xnorm > 1.) {
-/* Computing MAX */
-			    d__1 = work[j - 1], d__2 = work[j];
-			    beta = max(d__1,d__2);
-			    if (beta > bignum / xnorm) {
-				x[0] /= xnorm;
-				x[1] /= xnorm;
-				scale /= xnorm;
-			    }
-			}
-
-/*                    Scale if necessary */
-
-			if (scale != 1.) {
-			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
-			}
-			work[j - 1 + *n] = x[0];
-			work[j + *n] = x[1];
-
-/*                    Update right-hand side */
-
-			i__1 = j - 2;
-			d__1 = -x[0];
-			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
-				&work[*n + 1], &c__1);
-			i__1 = j - 2;
-			d__1 = -x[1];
-			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
-				*n + 1], &c__1);
-		    }
-L60:
-		    ;
-		}
-
-/*              Copy the vector x or Q*x to VR and normalize. */
-
-		if (! over) {
-		    dcopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], &
-			    c__1);
-
-		    ii = idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
-		    remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1));
-		    dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
-
-		    i__1 = *n;
-		    for (k = ki + 1; k <= i__1; ++k) {
-			vr[k + is * vr_dim1] = 0.;
-/* L70: */
-		    }
-		} else {
-		    if (ki > 1) {
-			i__1 = ki - 1;
-			dgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, &
-				work[*n + 1], &c__1, &work[ki + *n], &vr[ki *
-				vr_dim1 + 1], &c__1);
-		    }
-
-		    ii = idamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
-		    remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1));
-		    dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
-		}
-
-	    } else {
-
-/*
-                Complex right eigenvector.
-
-                Initial solve
-                  [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
-                  [ (T(KI,KI-1)   T(KI,KI)   )               ]
-*/
-
-		if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= (d__2 = t[
-			ki + (ki - 1) * t_dim1], abs(d__2))) {
-		    work[ki - 1 + *n] = 1.;
-		    work[ki + n2] = wi / t[ki - 1 + ki * t_dim1];
-		} else {
-		    work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1];
-		    work[ki + n2] = 1.;
-		}
-		work[ki + *n] = 0.;
-		work[ki - 1 + n2] = 0.;
-
-/*              Form right-hand side */
-
-		i__1 = ki - 2;
-		for (k = 1; k <= i__1; ++k) {
-		    work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) *
-			    t_dim1];
-		    work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1];
-/* L80: */
-		}
-
-/*
-                Solve upper quasi-triangular system:
-                (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
-*/
-
-		jnxt = ki - 2;
-		for (j = ki - 2; j >= 1; --j) {
-		    if (j > jnxt) {
-			goto L90;
-		    }
-		    j1 = j;
-		    j2 = j;
-		    jnxt = j - 1;
-		    if (j > 1) {
-			if (t[j + (j - 1) * t_dim1] != 0.) {
-			    j1 = j - 1;
-			    jnxt = j - 2;
-			}
-		    }
-
-		    if (j1 == j2) {
-
-/*                    1-by-1 diagonal block */
-
-			dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b15, &t[j +
-				j * t_dim1], ldt, &c_b15, &c_b15, &work[j + *
-				n], n, &wr, &wi, x, &c__2, &scale, &xnorm, &
-				ierr);
-
-/*
-                      Scale X(1,1) and X(1,2) to avoid overflow when
-                      updating the right-hand side.
-*/
-
-			if (xnorm > 1.) {
-			    if (work[j] > bignum / xnorm) {
-				x[0] /= xnorm;
-				x[2] /= xnorm;
-				scale /= xnorm;
-			    }
-			}
-
-/*                    Scale if necessary */
-
-			if (scale != 1.) {
-			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
-			    dscal_(&ki, &scale, &work[n2 + 1], &c__1);
-			}
-			work[j + *n] = x[0];
-			work[j + n2] = x[2];
-
-/*                    Update the right-hand side */
-
-			i__1 = j - 1;
-			d__1 = -x[0];
-			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
-				*n + 1], &c__1);
-			i__1 = j - 1;
-			d__1 = -x[2];
-			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
-				n2 + 1], &c__1);
-
-		    } else {
-
-/*                    2-by-2 diagonal block */
-
-			dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b15, &t[j -
-				1 + (j - 1) * t_dim1], ldt, &c_b15, &c_b15, &
-				work[j - 1 + *n], n, &wr, &wi, x, &c__2, &
-				scale, &xnorm, &ierr);
-
-/*
-                      Scale X to avoid overflow when updating
-                      the right-hand side.
-*/
-
-			if (xnorm > 1.) {
-/* Computing MAX */
-			    d__1 = work[j - 1], d__2 = work[j];
-			    beta = max(d__1,d__2);
-			    if (beta > bignum / xnorm) {
-				rec = 1. / xnorm;
-				x[0] *= rec;
-				x[2] *= rec;
-				x[1] *= rec;
-				x[3] *= rec;
-				scale *= rec;
-			    }
-			}
-
-/*                    Scale if necessary */
-
-			if (scale != 1.) {
-			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
-			    dscal_(&ki, &scale, &work[n2 + 1], &c__1);
-			}
-			work[j - 1 + *n] = x[0];
-			work[j + *n] = x[1];
-			work[j - 1 + n2] = x[2];
-			work[j + n2] = x[3];
-
-/*                    Update the right-hand side */
-
-			i__1 = j - 2;
-			d__1 = -x[0];
-			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
-				&work[*n + 1], &c__1);
-			i__1 = j - 2;
-			d__1 = -x[1];
-			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
-				*n + 1], &c__1);
-			i__1 = j - 2;
-			d__1 = -x[2];
-			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
-				&work[n2 + 1], &c__1);
-			i__1 = j - 2;
-			d__1 = -x[3];
-			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
-				n2 + 1], &c__1);
-		    }
-L90:
-		    ;
-		}
-
-/*              Copy the vector x or Q*x to VR and normalize. */
-
-		if (! over) {
-		    dcopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1
-			    + 1], &c__1);
-		    dcopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], &
-			    c__1);
-
-		    emax = 0.;
-		    i__1 = ki;
-		    for (k = 1; k <= i__1; ++k) {
-/* Computing MAX */
-			d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1]
-				, abs(d__1)) + (d__2 = vr[k + is * vr_dim1],
-				abs(d__2));
-			emax = max(d__3,d__4);
-/* L100: */
-		    }
-
-		    remax = 1. / emax;
-		    dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1);
-		    dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
-
-		    i__1 = *n;
-		    for (k = ki + 1; k <= i__1; ++k) {
-			vr[k + (is - 1) * vr_dim1] = 0.;
-			vr[k + is * vr_dim1] = 0.;
-/* L110: */
-		    }
-
-		} else {
-
-		    if (ki > 2) {
-			i__1 = ki - 2;
-			dgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, &
-				work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[(
-				ki - 1) * vr_dim1 + 1], &c__1);
-			i__1 = ki - 2;
-			dgemv_("N", n, &i__1, &c_b15, &vr[vr_offset], ldvr, &
-				work[n2 + 1], &c__1, &work[ki + n2], &vr[ki *
-				vr_dim1 + 1], &c__1);
-		    } else {
-			dscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1
-				+ 1], &c__1);
-			dscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], &
-				c__1);
-		    }
-
-		    emax = 0.;
-		    i__1 = *n;
-		    for (k = 1; k <= i__1; ++k) {
-/* Computing MAX */
-			d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1]
-				, abs(d__1)) + (d__2 = vr[k + ki * vr_dim1],
-				abs(d__2));
-			emax = max(d__3,d__4);
-/* L120: */
-		    }
-		    remax = 1. / emax;
-		    dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1);
-		    dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
-		}
-	    }
-
-	    --is;
-	    if (ip != 0) {
-		--is;
-	    }
-L130:
-	    if (ip == 1) {
-		ip = 0;
-	    }
-	    if (ip == -1) {
-		ip = 1;
-	    }
-/* L140: */
-	}
-    }
-
-    if (leftv) {
-
-/*        Compute left eigenvectors. */
-
-	ip = 0;
-	is = 1;
-	i__1 = *n;
-	for (ki = 1; ki <= i__1; ++ki) {
-
-	    if (ip == -1) {
-		goto L250;
-	    }
-	    if (ki == *n) {
-		goto L150;
-	    }
-	    if (t[ki + 1 + ki * t_dim1] == 0.) {
-		goto L150;
-	    }
-	    ip = 1;
-
-L150:
-	    if (somev) {
-		if (! select[ki]) {
-		    goto L250;
-		}
-	    }
-
-/*           Compute the KI-th eigenvalue (WR,WI). */
-
-	    wr = t[ki + ki * t_dim1];
-	    wi = 0.;
-	    if (ip != 0) {
-		wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) *
-			sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2)));
-	    }
-/* Computing MAX */
-	    d__1 = ulp * (abs(wr) + abs(wi));
-	    smin = max(d__1,smlnum);
-
-	    if (ip == 0) {
-
-/*              Real left eigenvector. */
-
-		work[ki + *n] = 1.;
-
-/*              Form right-hand side */
-
-		i__2 = *n;
-		for (k = ki + 1; k <= i__2; ++k) {
-		    work[k + *n] = -t[ki + k * t_dim1];
-/* L160: */
-		}
-
-/*
-                Solve the quasi-triangular system:
-                   (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK
-*/
-
-		vmax = 1.;
-		vcrit = bignum;
-
-		jnxt = ki + 1;
-		i__2 = *n;
-		for (j = ki + 1; j <= i__2; ++j) {
-		    if (j < jnxt) {
-			goto L170;
-		    }
-		    j1 = j;
-		    j2 = j;
-		    jnxt = j + 1;
-		    if (j < *n) {
-			if (t[j + 1 + j * t_dim1] != 0.) {
-			    j2 = j + 1;
-			    jnxt = j + 2;
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j - 1;
+		    if (j > 1) {
+			if (t[j + (j - 1) * t_dim1] != 0.f) {
+			    j1 = j - 1;
+			    jnxt = j - 2;
 			}
 		    }
 
 		    if (j1 == j2) {
 
-/*
-                      1-by-1 diagonal block
+/*                    1-by-1 diagonal block */
 
-                      Scale if necessary to avoid overflow when forming
+			slaln2_(&c_false, &c__1, &c__1, &smin, &c_b871, &t[j
+				+ j * t_dim1], ldt, &c_b871, &c_b871, &work[j
+				+ *n], n, &wr, &c_b1101, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*
+                      Scale X(1,1) to avoid overflow when updating
                       the right-hand side.
 */
 
-			if (work[j] > vcrit) {
-			    rec = 1. / vmax;
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
-			    vmax = 1.;
-			    vcrit = bignum;
+			if (xnorm > 1.f) {
+			    if (work[j] > bignum / xnorm) {
+				x[0] /= xnorm;
+				scale /= xnorm;
+			    }
 			}
 
-			i__3 = j - ki - 1;
-			work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1],
-				&c__1, &work[ki + 1 + *n], &c__1);
-
-/*                    Solve (T(J,J)-WR)'*X = WORK */
-
-			dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b15, &t[j +
-				j * t_dim1], ldt, &c_b15, &c_b15, &work[j + *
-				n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm,
-				&ierr);
-
 /*                    Scale if necessary */
 
-			if (scale != 1.) {
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			if (scale != 1.f) {
+			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
 			}
 			work[j + *n] = x[0];
-/* Computing MAX */
-			d__2 = (d__1 = work[j + *n], abs(d__1));
-			vmax = max(d__2,vmax);
-			vcrit = bignum / vmax;
-
-		    } else {
 
-/*
-                      2-by-2 diagonal block
+/*                    Update right-hand side */
 
-                      Scale if necessary to avoid overflow when forming
-                      the right-hand side.
+			i__1 = j - 1;
+			r__1 = -x[0];
+			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
 
-   Computing MAX
-*/
-			d__1 = work[j], d__2 = work[j + 1];
-			beta = max(d__1,d__2);
-			if (beta > vcrit) {
-			    rec = 1. / vmax;
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
-			    vmax = 1.;
-			    vcrit = bignum;
-			}
+		    } else {
 
-			i__3 = j - ki - 1;
-			work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1],
-				&c__1, &work[ki + 1 + *n], &c__1);
+/*                    2-by-2 diagonal block */
 
-			i__3 = j - ki - 1;
-			work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 1 + (j + 1) *
-				 t_dim1], &c__1, &work[ki + 1 + *n], &c__1);
+			slaln2_(&c_false, &c__2, &c__1, &smin, &c_b871, &t[j
+				- 1 + (j - 1) * t_dim1], ldt, &c_b871, &
+				c_b871, &work[j - 1 + *n], n, &wr, &c_b1101,
+				x, &c__2, &scale, &xnorm, &ierr);
 
 /*
-                      Solve
-                        [T(J,J)-WR   T(J,J+1)     ]'* X = SCALE*( WORK1 )
-                        [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 )
+                      Scale X(1,1) and X(2,1) to avoid overflow when
+                      updating the right-hand side.
 */
 
-			dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b15, &t[j +
-				j * t_dim1], ldt, &c_b15, &c_b15, &work[j + *
-				n], n, &wr, &c_b29, x, &c__2, &scale, &xnorm,
-				&ierr);
+			if (xnorm > 1.f) {
+/* Computing MAX */
+			    r__1 = work[j - 1], r__2 = work[j];
+			    beta = dmax(r__1,r__2);
+			    if (beta > bignum / xnorm) {
+				x[0] /= xnorm;
+				x[1] /= xnorm;
+				scale /= xnorm;
+			    }
+			}
 
 /*                    Scale if necessary */
 
-			if (scale != 1.) {
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			if (scale != 1.f) {
+			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
 			}
-			work[j + *n] = x[0];
-			work[j + 1 + *n] = x[1];
+			work[j - 1 + *n] = x[0];
+			work[j + *n] = x[1];
 
-/* Computing MAX */
-			d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2
-				= work[j + 1 + *n], abs(d__2)), d__3 = max(
-				d__3,d__4);
-			vmax = max(d__3,vmax);
-			vcrit = bignum / vmax;
+/*                    Update right-hand side */
 
+			i__1 = j - 2;
+			r__1 = -x[0];
+			saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+				&work[*n + 1], &c__1);
+			i__1 = j - 2;
+			r__1 = -x[1];
+			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
 		    }
-L170:
+L60:
 		    ;
 		}
 
-/*              Copy the vector x or Q*x to VL and normalize. */
+/*              Copy the vector x or Q*x to VR and normalize. */
 
 		if (! over) {
-		    i__2 = *n - ki + 1;
-		    dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
-			    vl_dim1], &c__1);
+		    scopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], &
+			    c__1);
 
-		    i__2 = *n - ki + 1;
-		    ii = idamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki -
-			    1;
-		    remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1));
-		    i__2 = *n - ki + 1;
-		    dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+		    ii = isamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
+		    remax = 1.f / (r__1 = vr[ii + is * vr_dim1], dabs(r__1));
+		    sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
 
-		    i__2 = ki - 1;
-		    for (k = 1; k <= i__2; ++k) {
-			vl[k + is * vl_dim1] = 0.;
-/* L180: */
+		    i__1 = *n;
+		    for (k = ki + 1; k <= i__1; ++k) {
+			vr[k + is * vr_dim1] = 0.f;
+/* L70: */
 		    }
-
 		} else {
-
-		    if (ki < *n) {
-			i__2 = *n - ki;
-			dgemv_("N", n, &i__2, &c_b15, &vl[(ki + 1) * vl_dim1
-				+ 1], ldvl, &work[ki + 1 + *n], &c__1, &work[
-				ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
+		    if (ki > 1) {
+			i__1 = ki - 1;
+			sgemv_("N", n, &i__1, &c_b871, &vr[vr_offset], ldvr, &
+				work[*n + 1], &c__1, &work[ki + *n], &vr[ki *
+				vr_dim1 + 1], &c__1);
 		    }
 
-		    ii = idamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
-		    remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1));
-		    dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
-
+		    ii = isamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
+		    remax = 1.f / (r__1 = vr[ii + ki * vr_dim1], dabs(r__1));
+		    sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
 		}
 
 	    } else {
 
 /*
-                Complex left eigenvector.
+                Complex right eigenvector.
 
-                 Initial solve:
-                   ((T(KI,KI)    T(KI,KI+1) )' - (WR - I* WI))*X = 0.
-                   ((T(KI+1,KI) T(KI+1,KI+1))                )
+                Initial solve
+                  [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
+                  [ (T(KI,KI-1)   T(KI,KI)   )               ]
 */
 
-		if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= (d__2 =
-			t[ki + 1 + ki * t_dim1], abs(d__2))) {
-		    work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1];
-		    work[ki + 1 + n2] = 1.;
+		if ((r__1 = t[ki - 1 + ki * t_dim1], dabs(r__1)) >= (r__2 = t[
+			ki + (ki - 1) * t_dim1], dabs(r__2))) {
+		    work[ki - 1 + *n] = 1.f;
+		    work[ki + n2] = wi / t[ki - 1 + ki * t_dim1];
 		} else {
-		    work[ki + *n] = 1.;
-		    work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1];
+		    work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1];
+		    work[ki + n2] = 1.f;
 		}
-		work[ki + 1 + *n] = 0.;
-		work[ki + n2] = 0.;
+		work[ki + *n] = 0.f;
+		work[ki - 1 + n2] = 0.f;
 
 /*              Form right-hand side */
 
-		i__2 = *n;
-		for (k = ki + 2; k <= i__2; ++k) {
-		    work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1];
-		    work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1]
-			    ;
-/* L190: */
+		i__1 = ki - 2;
+		for (k = 1; k <= i__1; ++k) {
+		    work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) *
+			    t_dim1];
+		    work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1];
+/* L80: */
 		}
 
 /*
-                Solve complex quasi-triangular system:
-                ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
+                Solve upper quasi-triangular system:
+                (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
 */
 
-		vmax = 1.;
-		vcrit = bignum;
-
-		jnxt = ki + 2;
-		i__2 = *n;
-		for (j = ki + 2; j <= i__2; ++j) {
-		    if (j < jnxt) {
-			goto L200;
+		jnxt = ki - 2;
+		for (j = ki - 2; j >= 1; --j) {
+		    if (j > jnxt) {
+			goto L90;
 		    }
 		    j1 = j;
 		    j2 = j;
-		    jnxt = j + 1;
-		    if (j < *n) {
-			if (t[j + 1 + j * t_dim1] != 0.) {
-			    j2 = j + 1;
-			    jnxt = j + 2;
+		    jnxt = j - 1;
+		    if (j > 1) {
+			if (t[j + (j - 1) * t_dim1] != 0.f) {
+			    j1 = j - 1;
+			    jnxt = j - 2;
 			}
 		    }
 
 		    if (j1 == j2) {
 
-/*
-                      1-by-1 diagonal block
+/*                    1-by-1 diagonal block */
 
-                      Scale if necessary to avoid overflow when
-                      forming the right-hand side elements.
+			slaln2_(&c_false, &c__1, &c__2, &smin, &c_b871, &t[j
+				+ j * t_dim1], ldt, &c_b871, &c_b871, &work[j
+				+ *n], n, &wr, &wi, x, &c__2, &scale, &xnorm,
+				&ierr);
+
+/*
+                      Scale X(1,1) and X(1,2) to avoid overflow when
+                      updating the right-hand side.
 */
 
-			if (work[j] > vcrit) {
-			    rec = 1. / vmax;
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &rec, &work[ki + n2], &c__1);
-			    vmax = 1.;
-			    vcrit = bignum;
+			if (xnorm > 1.f) {
+			    if (work[j] > bignum / xnorm) {
+				x[0] /= xnorm;
+				x[2] /= xnorm;
+				scale /= xnorm;
+			    }
 			}
 
-			i__3 = j - ki - 2;
-			work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
-				&c__1, &work[ki + 2 + *n], &c__1);
-			i__3 = j - ki - 2;
-			work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
-				&c__1, &work[ki + 2 + n2], &c__1);
-
-/*                    Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */
-
-			d__1 = -wi;
-			dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b15, &t[j +
-				j * t_dim1], ldt, &c_b15, &c_b15, &work[j + *
-				n], n, &wr, &d__1, x, &c__2, &scale, &xnorm, &
-				ierr);
-
 /*                    Scale if necessary */
 
-			if (scale != 1.) {
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &scale, &work[ki + n2], &c__1);
+			if (scale != 1.f) {
+			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
+			    sscal_(&ki, &scale, &work[n2 + 1], &c__1);
 			}
 			work[j + *n] = x[0];
 			work[j + n2] = x[2];
-/* Computing MAX */
-			d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2
-				= work[j + n2], abs(d__2)), d__3 = max(d__3,
-				d__4);
-			vmax = max(d__3,vmax);
-			vcrit = bignum / vmax;
-
-		    } else {
-
-/*
-                      2-by-2 diagonal block
-
-                      Scale if necessary to avoid overflow when forming
-                      the right-hand side elements.
 
-   Computing MAX
-*/
-			d__1 = work[j], d__2 = work[j + 1];
-			beta = max(d__1,d__2);
-			if (beta > vcrit) {
-			    rec = 1. / vmax;
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &rec, &work[ki + n2], &c__1);
-			    vmax = 1.;
-			    vcrit = bignum;
-			}
+/*                    Update the right-hand side */
 
-			i__3 = j - ki - 2;
-			work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
-				&c__1, &work[ki + 2 + *n], &c__1);
+			i__1 = j - 1;
+			r__1 = -x[0];
+			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+			i__1 = j - 1;
+			r__1 = -x[2];
+			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+				n2 + 1], &c__1);
 
-			i__3 = j - ki - 2;
-			work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
-				&c__1, &work[ki + 2 + n2], &c__1);
+		    } else {
 
-			i__3 = j - ki - 2;
-			work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 2 + (j + 1) *
-				 t_dim1], &c__1, &work[ki + 2 + *n], &c__1);
+/*                    2-by-2 diagonal block */
 
-			i__3 = j - ki - 2;
-			work[j + 1 + n2] -= ddot_(&i__3, &t[ki + 2 + (j + 1) *
-				 t_dim1], &c__1, &work[ki + 2 + n2], &c__1);
+			slaln2_(&c_false, &c__2, &c__2, &smin, &c_b871, &t[j
+				- 1 + (j - 1) * t_dim1], ldt, &c_b871, &
+				c_b871, &work[j - 1 + *n], n, &wr, &wi, x, &
+				c__2, &scale, &xnorm, &ierr);
 
 /*
-                      Solve 2-by-2 complex linear equation
-                        ([T(j,j)   T(j,j+1)  ]'-(wr-i*wi)*I)*X = SCALE*B
-                        ([T(j+1,j) T(j+1,j+1)]             )
+                      Scale X to avoid overflow when updating
+                      the right-hand side.
 */
 
-			d__1 = -wi;
-			dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b15, &t[j +
-				j * t_dim1], ldt, &c_b15, &c_b15, &work[j + *
-				n], n, &wr, &d__1, x, &c__2, &scale, &xnorm, &
-				ierr);
+			if (xnorm > 1.f) {
+/* Computing MAX */
+			    r__1 = work[j - 1], r__2 = work[j];
+			    beta = dmax(r__1,r__2);
+			    if (beta > bignum / xnorm) {
+				rec = 1.f / xnorm;
+				x[0] *= rec;
+				x[2] *= rec;
+				x[1] *= rec;
+				x[3] *= rec;
+				scale *= rec;
+			    }
+			}
 
 /*                    Scale if necessary */
 
-			if (scale != 1.) {
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
-			    i__3 = *n - ki + 1;
-			    dscal_(&i__3, &scale, &work[ki + n2], &c__1);
+			if (scale != 1.f) {
+			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
+			    sscal_(&ki, &scale, &work[n2 + 1], &c__1);
 			}
-			work[j + *n] = x[0];
-			work[j + n2] = x[2];
-			work[j + 1 + *n] = x[1];
-			work[j + 1 + n2] = x[3];
-/* Computing MAX */
-			d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1,
-				d__2), d__2 = abs(x[1]), d__1 = max(d__1,d__2)
-				, d__2 = abs(x[3]), d__1 = max(d__1,d__2);
-			vmax = max(d__1,vmax);
-			vcrit = bignum / vmax;
+			work[j - 1 + *n] = x[0];
+			work[j + *n] = x[1];
+			work[j - 1 + n2] = x[2];
+			work[j + n2] = x[3];
+
+/*                    Update the right-hand side */
 
+			i__1 = j - 2;
+			r__1 = -x[0];
+			saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+				&work[*n + 1], &c__1);
+			i__1 = j - 2;
+			r__1 = -x[1];
+			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+			i__1 = j - 2;
+			r__1 = -x[2];
+			saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+				&work[n2 + 1], &c__1);
+			i__1 = j - 2;
+			r__1 = -x[3];
+			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+				n2 + 1], &c__1);
 		    }
-L200:
+L90:
 		    ;
 		}
 
-/*
-                Copy the vector x or Q*x to VL and normalize.
+/*              Copy the vector x or Q*x to VR and normalize. */
 
-   L210:
-*/
 		if (! over) {
-		    i__2 = *n - ki + 1;
-		    dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
-			    vl_dim1], &c__1);
-		    i__2 = *n - ki + 1;
-		    dcopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) *
-			    vl_dim1], &c__1);
+		    scopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1
+			    + 1], &c__1);
+		    scopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], &
+			    c__1);
 
-		    emax = 0.;
-		    i__2 = *n;
-		    for (k = ki; k <= i__2; ++k) {
+		    emax = 0.f;
+		    i__1 = ki;
+		    for (k = 1; k <= i__1; ++k) {
 /* Computing MAX */
-			d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs(
-				d__1)) + (d__2 = vl[k + (is + 1) * vl_dim1],
-				abs(d__2));
-			emax = max(d__3,d__4);
-/* L220: */
+			r__3 = emax, r__4 = (r__1 = vr[k + (is - 1) * vr_dim1]
+				, dabs(r__1)) + (r__2 = vr[k + is * vr_dim1],
+				dabs(r__2));
+			emax = dmax(r__3,r__4);
+/* L100: */
 		    }
-		    remax = 1. / emax;
-		    i__2 = *n - ki + 1;
-		    dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
-		    i__2 = *n - ki + 1;
-		    dscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1)
-			    ;
 
-		    i__2 = ki - 1;
-		    for (k = 1; k <= i__2; ++k) {
-			vl[k + is * vl_dim1] = 0.;
-			vl[k + (is + 1) * vl_dim1] = 0.;
-/* L230: */
+		    remax = 1.f / emax;
+		    sscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1);
+		    sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+
+		    i__1 = *n;
+		    for (k = ki + 1; k <= i__1; ++k) {
+			vr[k + (is - 1) * vr_dim1] = 0.f;
+			vr[k + is * vr_dim1] = 0.f;
+/* L110: */
 		    }
+
 		} else {
-		    if (ki < *n - 1) {
-			i__2 = *n - ki - 1;
-			dgemv_("N", n, &i__2, &c_b15, &vl[(ki + 2) * vl_dim1
-				+ 1], ldvl, &work[ki + 2 + *n], &c__1, &work[
-				ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
-			i__2 = *n - ki - 1;
-			dgemv_("N", n, &i__2, &c_b15, &vl[(ki + 2) * vl_dim1
-				+ 1], ldvl, &work[ki + 2 + n2], &c__1, &work[
-				ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + 1], &
-				c__1);
+
+		    if (ki > 2) {
+			i__1 = ki - 2;
+			sgemv_("N", n, &i__1, &c_b871, &vr[vr_offset], ldvr, &
+				work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[(
+				ki - 1) * vr_dim1 + 1], &c__1);
+			i__1 = ki - 2;
+			sgemv_("N", n, &i__1, &c_b871, &vr[vr_offset], ldvr, &
+				work[n2 + 1], &c__1, &work[ki + n2], &vr[ki *
+				vr_dim1 + 1], &c__1);
 		    } else {
-			dscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], &
-				c__1);
-			dscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1
+			sscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1
 				+ 1], &c__1);
+			sscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], &
+				c__1);
 		    }
 
-		    emax = 0.;
-		    i__2 = *n;
-		    for (k = 1; k <= i__2; ++k) {
+		    emax = 0.f;
+		    i__1 = *n;
+		    for (k = 1; k <= i__1; ++k) {
 /* Computing MAX */
-			d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs(
-				d__1)) + (d__2 = vl[k + (ki + 1) * vl_dim1],
-				abs(d__2));
-			emax = max(d__3,d__4);
-/* L240: */
+			r__3 = emax, r__4 = (r__1 = vr[k + (ki - 1) * vr_dim1]
+				, dabs(r__1)) + (r__2 = vr[k + ki * vr_dim1],
+				dabs(r__2));
+			emax = dmax(r__3,r__4);
+/* L120: */
 		    }
-		    remax = 1. / emax;
-		    dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
-		    dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1);
-
+		    remax = 1.f / emax;
+		    sscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1);
+		    sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
 		}
-
 	    }
 
-	    ++is;
+	    --is;
 	    if (ip != 0) {
-		++is;
+		--is;
 	    }
-L250:
-	    if (ip == -1) {
+L130:
+	    if (ip == 1) {
 		ip = 0;
 	    }
-	    if (ip == 1) {
-		ip = -1;
+	    if (ip == -1) {
+		ip = 1;
 	    }
-
-/* L260: */
+/* L140: */
 	}
-
-    }
-
-    return 0;
-
-/*     End of DTREVC */
-
-} /* dtrevc_ */
-
-integer ieeeck_(integer *ispec, real *zero, real *one)
-{
-    /* System generated locals */
-    integer ret_val;
-
-    /* Local variables */
-    static real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro,
-	    newzro;
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1998
-
-
-    Purpose
-    =======
-
-    IEEECK is called from the ILAENV to verify that Infinity and
-    possibly NaN arithmetic is safe (i.e. will not trap).
-
-    Arguments
-    =========
-
-    ISPEC   (input) INTEGER
-            Specifies whether to test just for inifinity arithmetic
-            or whether to test for infinity and NaN arithmetic.
-            = 0: Verify infinity arithmetic only.
-            = 1: Verify infinity and NaN arithmetic.
-
-    ZERO    (input) REAL
-            Must contain the value 0.0
-            This is passed to prevent the compiler from optimizing
-            away this code.
-
-    ONE     (input) REAL
-            Must contain the value 1.0
-            This is passed to prevent the compiler from optimizing
-            away this code.
-
-    RETURN VALUE:  INTEGER
-            = 0:  Arithmetic failed to produce the correct answers
-            = 1:  Arithmetic produced the correct answers
-*/
-
-    ret_val = 1;
-
-    posinf = *one / *zero;
-    if (posinf <= *one) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    neginf = -(*one) / *zero;
-    if (neginf >= *zero) {
-	ret_val = 0;
-	return ret_val;
-    }
-
-    negzro = *one / (neginf + *one);
-    if (negzro != *zero) {
-	ret_val = 0;
-	return ret_val;
     }
 
-    neginf = *one / negzro;
-    if (neginf >= *zero) {
-	ret_val = 0;
-	return ret_val;
-    }
+    if (leftv) {
 
-    newzro = negzro + *zero;
-    if (newzro != *zero) {
-	ret_val = 0;
-	return ret_val;
-    }
+/*        Compute left eigenvectors. */
 
-    posinf = *one / newzro;
-    if (posinf <= *one) {
-	ret_val = 0;
-	return ret_val;
-    }
+	ip = 0;
+	is = 1;
+	i__1 = *n;
+	for (ki = 1; ki <= i__1; ++ki) {
 
-    neginf *= posinf;
-    if (neginf >= *zero) {
-	ret_val = 0;
-	return ret_val;
-    }
+	    if (ip == -1) {
+		goto L250;
+	    }
+	    if (ki == *n) {
+		goto L150;
+	    }
+	    if (t[ki + 1 + ki * t_dim1] == 0.f) {
+		goto L150;
+	    }
+	    ip = 1;
 
-    posinf *= posinf;
-    if (posinf <= *one) {
-	ret_val = 0;
-	return ret_val;
-    }
+L150:
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L250;
+		}
+	    }
 
+/*           Compute the KI-th eigenvalue (WR,WI). */
 
-/*     Return if we were only asked to check infinity arithmetic */
+	    wr = t[ki + ki * t_dim1];
+	    wi = 0.f;
+	    if (ip != 0) {
+		wi = sqrt((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1))) *
+			sqrt((r__2 = t[ki + 1 + ki * t_dim1], dabs(r__2)));
+	    }
+/* Computing MAX */
+	    r__1 = ulp * (dabs(wr) + dabs(wi));
+	    smin = dmax(r__1,smlnum);
 
-    if (*ispec == 0) {
-	return ret_val;
-    }
+	    if (ip == 0) {
 
-    nan1 = posinf + neginf;
+/*              Real left eigenvector. */
 
-    nan2 = posinf / neginf;
+		work[ki + *n] = 1.f;
 
-    nan3 = posinf / posinf;
+/*              Form right-hand side */
 
-    nan4 = posinf * *zero;
+		i__2 = *n;
+		for (k = ki + 1; k <= i__2; ++k) {
+		    work[k + *n] = -t[ki + k * t_dim1];
+/* L160: */
+		}
 
-    nan5 = neginf * negzro;
+/*
+                Solve the quasi-triangular system:
+                   (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK
+*/
 
-    nan6 = nan5 * 0.f;
+		vmax = 1.f;
+		vcrit = bignum;
 
-    if (nan1 == nan1) {
-	ret_val = 0;
-	return ret_val;
-    }
+		jnxt = ki + 1;
+		i__2 = *n;
+		for (j = ki + 1; j <= i__2; ++j) {
+		    if (j < jnxt) {
+			goto L170;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j + 1;
+		    if (j < *n) {
+			if (t[j + 1 + j * t_dim1] != 0.f) {
+			    j2 = j + 1;
+			    jnxt = j + 2;
+			}
+		    }
 
-    if (nan2 == nan2) {
-	ret_val = 0;
-	return ret_val;
-    }
+		    if (j1 == j2) {
 
-    if (nan3 == nan3) {
-	ret_val = 0;
-	return ret_val;
-    }
+/*
+                      1-by-1 diagonal block
 
-    if (nan4 == nan4) {
-	ret_val = 0;
-	return ret_val;
-    }
+                      Scale if necessary to avoid overflow when forming
+                      the right-hand side.
+*/
 
-    if (nan5 == nan5) {
-	ret_val = 0;
-	return ret_val;
-    }
+			if (work[j] > vcrit) {
+			    rec = 1.f / vmax;
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    vmax = 1.f;
+			    vcrit = bignum;
+			}
 
-    if (nan6 == nan6) {
-	ret_val = 0;
-	return ret_val;
-    }
+			i__3 = j - ki - 1;
+			work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1],
+				&c__1, &work[ki + 1 + *n], &c__1);
 
-    return ret_val;
-} /* ieeeck_ */
+/*                    Solve (T(J,J)-WR)'*X = WORK */
 
-integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
-	integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen
-	opts_len)
-{
-    /* System generated locals */
-    integer ret_val;
+			slaln2_(&c_false, &c__1, &c__1, &smin, &c_b871, &t[j
+				+ j * t_dim1], ldt, &c_b871, &c_b871, &work[j
+				+ *n], n, &wr, &c_b1101, x, &c__2, &scale, &
+				xnorm, &ierr);
 
-    /* Builtin functions */
-    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
-    integer s_cmp(char *, char *, ftnlen, ftnlen);
+/*                    Scale if necessary */
 
-    /* Local variables */
-    static integer i__;
-    static char c1[1], c2[2], c3[3], c4[2];
-    static integer ic, nb, iz, nx;
-    static logical cname, sname;
-    static integer nbmin;
-    extern integer ieeeck_(integer *, real *, real *);
-    static char subnam[6];
+			if (scale != 1.f) {
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			}
+			work[j + *n] = x[0];
+/* Computing MAX */
+			r__2 = (r__1 = work[j + *n], dabs(r__1));
+			vmax = dmax(r__2,vmax);
+			vcrit = bignum / vmax;
 
+		    } else {
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+                      2-by-2 diagonal block
 
+                      Scale if necessary to avoid overflow when forming
+                      the right-hand side.
 
-    Purpose
-    =======
+   Computing MAX
+*/
+			r__1 = work[j], r__2 = work[j + 1];
+			beta = dmax(r__1,r__2);
+			if (beta > vcrit) {
+			    rec = 1.f / vmax;
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    vmax = 1.f;
+			    vcrit = bignum;
+			}
 
-    ILAENV is called from the LAPACK routines to choose problem-dependent
-    parameters for the local environment.  See ISPEC for a description of
-    the parameters.
+			i__3 = j - ki - 1;
+			work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1],
+				&c__1, &work[ki + 1 + *n], &c__1);
 
-    This version provides a set of parameters which should give good,
-    but not optimal, performance on many of the currently available
-    computers.  Users are encouraged to modify this subroutine to set
-    the tuning parameters for their particular machine using the option
-    and problem size information in the arguments.
+			i__3 = j - ki - 1;
+			work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 1 + (j + 1) *
+				 t_dim1], &c__1, &work[ki + 1 + *n], &c__1);
 
-    This routine will not function correctly if it is converted to all
-    lower case.  Converting it to all upper case is allowed.
+/*
+                      Solve
+                        [T(J,J)-WR   T(J,J+1)     ]'* X = SCALE*( WORK1 )
+                        [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 )
+*/
 
-    Arguments
-    =========
+			slaln2_(&c_true, &c__2, &c__1, &smin, &c_b871, &t[j +
+				j * t_dim1], ldt, &c_b871, &c_b871, &work[j +
+				*n], n, &wr, &c_b1101, x, &c__2, &scale, &
+				xnorm, &ierr);
 
-    ISPEC   (input) INTEGER
-            Specifies the parameter to be returned as the value of
-            ILAENV.
-            = 1: the optimal blocksize; if this value is 1, an unblocked
-                 algorithm will give the best performance.
-            = 2: the minimum block size for which the block routine
-                 should be used; if the usable block size is less than
-                 this value, an unblocked routine should be used.
-            = 3: the crossover point (in a block routine, for N less
-                 than this value, an unblocked routine should be used)
-            = 4: the number of shifts, used in the nonsymmetric
-                 eigenvalue routines
-            = 5: the minimum column dimension for blocking to be used;
-                 rectangular blocks must have dimension at least k by m,
-                 where k is given by ILAENV(2,...) and m by ILAENV(5,...)
-            = 6: the crossover point for the SVD (when reducing an m by n
-                 matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds
-                 this value, a QR factorization is used first to reduce
-                 the matrix to a triangular form.)
-            = 7: the number of processors
-            = 8: the crossover point for the multishift QR and QZ methods
-                 for nonsymmetric eigenvalue problems.
-            = 9: maximum size of the subproblems at the bottom of the
-                 computation tree in the divide-and-conquer algorithm
-                 (used by xGELSD and xGESDD)
-            =10: ieee NaN arithmetic can be trusted not to trap
-            =11: infinity arithmetic can be trusted not to trap
+/*                    Scale if necessary */
 
-    NAME    (input) CHARACTER*(*)
-            The name of the calling subroutine, in either upper case or
-            lower case.
+			if (scale != 1.f) {
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + 1 + *n] = x[1];
 
-    OPTS    (input) CHARACTER*(*)
-            The character options to the subroutine NAME, concatenated
-            into a single character string.  For example, UPLO = 'U',
-            TRANS = 'T', and DIAG = 'N' for a triangular routine would
-            be specified as OPTS = 'UTN'.
+/* Computing MAX */
+			r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = (
+				r__2 = work[j + 1 + *n], dabs(r__2)), r__3 =
+				max(r__3,r__4);
+			vmax = dmax(r__3,vmax);
+			vcrit = bignum / vmax;
 
-    N1      (input) INTEGER
-    N2      (input) INTEGER
-    N3      (input) INTEGER
-    N4      (input) INTEGER
-            Problem dimensions for the subroutine NAME; these may not all
-            be required.
+		    }
+L170:
+		    ;
+		}
 
-   (ILAENV) (output) INTEGER
-            >= 0: the value of the parameter specified by ISPEC
-            < 0:  if ILAENV = -k, the k-th argument had an illegal value.
+/*              Copy the vector x or Q*x to VL and normalize. */
 
-    Further Details
-    ===============
+		if (! over) {
+		    i__2 = *n - ki + 1;
+		    scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
+			    vl_dim1], &c__1);
 
-    The following conventions have been used when calling ILAENV from the
-    LAPACK routines:
-    1)  OPTS is a concatenation of all of the character options to
-        subroutine NAME, in the same order that they appear in the
-        argument list for NAME, even if they are not used in determining
-        the value of the parameter specified by ISPEC.
-    2)  The problem dimensions N1, N2, N3, N4 are specified in the order
-        that they appear in the argument list for NAME.  N1 is used
-        first, N2 second, and so on, and unused problem dimensions are
-        passed a value of -1.
-    3)  The parameter value returned by ILAENV is checked for validity in
-        the calling subroutine.  For example, ILAENV is used to retrieve
-        the optimal blocksize for STRTRI as follows:
+		    i__2 = *n - ki + 1;
+		    ii = isamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki -
+			    1;
+		    remax = 1.f / (r__1 = vl[ii + is * vl_dim1], dabs(r__1));
+		    i__2 = *n - ki + 1;
+		    sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
 
-        NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 )
-        IF( NB.LE.1 ) NB = MAX( 1, N )
+		    i__2 = ki - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			vl[k + is * vl_dim1] = 0.f;
+/* L180: */
+		    }
 
-    =====================================================================
-*/
+		} else {
 
+		    if (ki < *n) {
+			i__2 = *n - ki;
+			sgemv_("N", n, &i__2, &c_b871, &vl[(ki + 1) * vl_dim1
+				+ 1], ldvl, &work[ki + 1 + *n], &c__1, &work[
+				ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
+		    }
 
-    switch (*ispec) {
-	case 1:  goto L100;
-	case 2:  goto L100;
-	case 3:  goto L100;
-	case 4:  goto L400;
-	case 5:  goto L500;
-	case 6:  goto L600;
-	case 7:  goto L700;
-	case 8:  goto L800;
-	case 9:  goto L900;
-	case 10:  goto L1000;
-	case 11:  goto L1100;
-    }
+		    ii = isamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
+		    remax = 1.f / (r__1 = vl[ii + ki * vl_dim1], dabs(r__1));
+		    sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
 
-/*     Invalid value for ISPEC */
+		}
 
-    ret_val = -1;
-    return ret_val;
+	    } else {
 
-L100:
+/*
+                Complex left eigenvector.
 
-/*     Convert NAME to upper case if the first character is lower case. */
+                 Initial solve:
+                   ((T(KI,KI)    T(KI,KI+1) )' - (WR - I* WI))*X = 0.
+                   ((T(KI+1,KI) T(KI+1,KI+1))                )
+*/
 
-    ret_val = 1;
-    s_copy(subnam, name__, (ftnlen)6, name_len);
-    ic = *(unsigned char *)subnam;
-    iz = 'Z';
-    if (iz == 90 || iz == 122) {
+		if ((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1)) >= (r__2 =
+			t[ki + 1 + ki * t_dim1], dabs(r__2))) {
+		    work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1];
+		    work[ki + 1 + n2] = 1.f;
+		} else {
+		    work[ki + *n] = 1.f;
+		    work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1];
+		}
+		work[ki + 1 + *n] = 0.f;
+		work[ki + n2] = 0.f;
 
-/*        ASCII character set */
+/*              Form right-hand side */
 
-	if (ic >= 97 && ic <= 122) {
-	    *(unsigned char *)subnam = (char) (ic - 32);
-	    for (i__ = 2; i__ <= 6; ++i__) {
-		ic = *(unsigned char *)&subnam[i__ - 1];
-		if (ic >= 97 && ic <= 122) {
-		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
+		i__2 = *n;
+		for (k = ki + 2; k <= i__2; ++k) {
+		    work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1];
+		    work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1]
+			    ;
+/* L190: */
 		}
-/* L10: */
-	    }
-	}
 
-    } else if (iz == 233 || iz == 169) {
+/*
+                Solve complex quasi-triangular system:
+                ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
+*/
 
-/*        EBCDIC character set */
+		vmax = 1.f;
+		vcrit = bignum;
 
-	if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 &&
-		ic <= 169) {
-	    *(unsigned char *)subnam = (char) (ic + 64);
-	    for (i__ = 2; i__ <= 6; ++i__) {
-		ic = *(unsigned char *)&subnam[i__ - 1];
-		if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >=
-			162 && ic <= 169) {
-		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64);
-		}
-/* L20: */
-	    }
-	}
+		jnxt = ki + 2;
+		i__2 = *n;
+		for (j = ki + 2; j <= i__2; ++j) {
+		    if (j < jnxt) {
+			goto L200;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j + 1;
+		    if (j < *n) {
+			if (t[j + 1 + j * t_dim1] != 0.f) {
+			    j2 = j + 1;
+			    jnxt = j + 2;
+			}
+		    }
 
-    } else if (iz == 218 || iz == 250) {
+		    if (j1 == j2) {
 
-/*        Prime machines:  ASCII+128 */
+/*
+                      1-by-1 diagonal block
 
-	if (ic >= 225 && ic <= 250) {
-	    *(unsigned char *)subnam = (char) (ic - 32);
-	    for (i__ = 2; i__ <= 6; ++i__) {
-		ic = *(unsigned char *)&subnam[i__ - 1];
-		if (ic >= 225 && ic <= 250) {
-		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
-		}
-/* L30: */
-	    }
-	}
-    }
+                      Scale if necessary to avoid overflow when
+                      forming the right-hand side elements.
+*/
 
-    *(unsigned char *)c1 = *(unsigned char *)subnam;
-    sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D';
-    cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z';
-    if (! (cname || sname)) {
-	return ret_val;
-    }
-    s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2);
-    s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3);
-    s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2);
+			if (work[j] > vcrit) {
+			    rec = 1.f / vmax;
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &rec, &work[ki + n2], &c__1);
+			    vmax = 1.f;
+			    vcrit = bignum;
+			}
 
-    switch (*ispec) {
-	case 1:  goto L110;
-	case 2:  goto L200;
-	case 3:  goto L300;
-    }
+			i__3 = j - ki - 2;
+			work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + *n], &c__1);
+			i__3 = j - ki - 2;
+			work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + n2], &c__1);
 
-L110:
+/*                    Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */
 
-/*
-       ISPEC = 1:  block size
+			r__1 = -wi;
+			slaln2_(&c_false, &c__1, &c__2, &smin, &c_b871, &t[j
+				+ j * t_dim1], ldt, &c_b871, &c_b871, &work[j
+				+ *n], n, &wr, &r__1, x, &c__2, &scale, &
+				xnorm, &ierr);
 
-       In these examples, separate code is provided for setting NB for
-       real and complex.  We assume that NB will take the same value in
-       single or double precision.
-*/
+/*                    Scale if necessary */
 
-    nb = 1;
+			if (scale != 1.f) {
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &scale, &work[ki + n2], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + n2] = x[2];
+/* Computing MAX */
+			r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = (
+				r__2 = work[j + n2], dabs(r__2)), r__3 = max(
+				r__3,r__4);
+			vmax = dmax(r__3,vmax);
+			vcrit = bignum / vmax;
 
-    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	} else if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3,
-		"RQF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)
-		3, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3)
-		== 0) {
-	    if (sname) {
-		nb = 32;
-	    } else {
-		nb = 32;
-	    }
-	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 32;
-	    } else {
-		nb = 32;
-	    }
-	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 32;
-	    } else {
-		nb = 32;
-	    }
-	} else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	}
-    } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	}
-    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	} else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    nb = 32;
-	} else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
-	    nb = 64;
-	}
-    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    nb = 64;
-	} else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    nb = 32;
-	} else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
-	    nb = 64;
-	}
-    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
-		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)2, (ftnlen)2) == 0) {
-		nb = 32;
-	    }
-	} else if (*(unsigned char *)c3 == 'M') {
-	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
-		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)2, (ftnlen)2) == 0) {
-		nb = 32;
-	    }
-	}
-    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
-		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)2, (ftnlen)2) == 0) {
-		nb = 32;
-	    }
-	} else if (*(unsigned char *)c3 == 'M') {
-	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
-		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)2, (ftnlen)2) == 0) {
-		nb = 32;
-	    }
-	}
-    } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		if (*n4 <= 64) {
-		    nb = 1;
-		} else {
-		    nb = 32;
+		    } else {
+
+/*
+                      2-by-2 diagonal block
+
+                      Scale if necessary to avoid overflow when forming
+                      the right-hand side elements.
+
+   Computing MAX
+*/
+			r__1 = work[j], r__2 = work[j + 1];
+			beta = dmax(r__1,r__2);
+			if (beta > vcrit) {
+			    rec = 1.f / vmax;
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &rec, &work[ki + n2], &c__1);
+			    vmax = 1.f;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 2;
+			work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + *n], &c__1);
+
+			i__3 = j - ki - 2;
+			work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + n2], &c__1);
+
+			i__3 = j - ki - 2;
+			work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 2 + (j + 1) *
+				 t_dim1], &c__1, &work[ki + 2 + *n], &c__1);
+
+			i__3 = j - ki - 2;
+			work[j + 1 + n2] -= sdot_(&i__3, &t[ki + 2 + (j + 1) *
+				 t_dim1], &c__1, &work[ki + 2 + n2], &c__1);
+
+/*
+                      Solve 2-by-2 complex linear equation
+                        ([T(j,j)   T(j,j+1)  ]'-(wr-i*wi)*I)*X = SCALE*B
+                        ([T(j+1,j) T(j+1,j+1)]             )
+*/
+
+			r__1 = -wi;
+			slaln2_(&c_true, &c__2, &c__2, &smin, &c_b871, &t[j +
+				j * t_dim1], ldt, &c_b871, &c_b871, &work[j +
+				*n], n, &wr, &r__1, x, &c__2, &scale, &xnorm,
+				&ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.f) {
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &scale, &work[ki + n2], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + n2] = x[2];
+			work[j + 1 + *n] = x[1];
+			work[j + 1 + n2] = x[3];
+/* Computing MAX */
+			r__1 = dabs(x[0]), r__2 = dabs(x[2]), r__1 = max(r__1,
+				r__2), r__2 = dabs(x[1]), r__1 = max(r__1,
+				r__2), r__2 = dabs(x[3]), r__1 = max(r__1,
+				r__2);
+			vmax = dmax(r__1,vmax);
+			vcrit = bignum / vmax;
+
+		    }
+L200:
+		    ;
 		}
-	    } else {
-		if (*n4 <= 64) {
-		    nb = 1;
+
+/*
+                Copy the vector x or Q*x to VL and normalize.
+
+   L210:
+*/
+		if (! over) {
+		    i__2 = *n - ki + 1;
+		    scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
+			    vl_dim1], &c__1);
+		    i__2 = *n - ki + 1;
+		    scopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) *
+			    vl_dim1], &c__1);
+
+		    emax = 0.f;
+		    i__2 = *n;
+		    for (k = ki; k <= i__2; ++k) {
+/* Computing MAX */
+			r__3 = emax, r__4 = (r__1 = vl[k + is * vl_dim1],
+				dabs(r__1)) + (r__2 = vl[k + (is + 1) *
+				vl_dim1], dabs(r__2));
+			emax = dmax(r__3,r__4);
+/* L220: */
+		    }
+		    remax = 1.f / emax;
+		    i__2 = *n - ki + 1;
+		    sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+		    i__2 = *n - ki + 1;
+		    sscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1)
+			    ;
+
+		    i__2 = ki - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			vl[k + is * vl_dim1] = 0.f;
+			vl[k + (is + 1) * vl_dim1] = 0.f;
+/* L230: */
+		    }
 		} else {
-		    nb = 32;
+		    if (ki < *n - 1) {
+			i__2 = *n - ki - 1;
+			sgemv_("N", n, &i__2, &c_b871, &vl[(ki + 2) * vl_dim1
+				+ 1], ldvl, &work[ki + 2 + *n], &c__1, &work[
+				ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
+			i__2 = *n - ki - 1;
+			sgemv_("N", n, &i__2, &c_b871, &vl[(ki + 2) * vl_dim1
+				+ 1], ldvl, &work[ki + 2 + n2], &c__1, &work[
+				ki + 1 + n2], &vl[(ki + 1) * vl_dim1 + 1], &
+				c__1);
+		    } else {
+			sscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], &
+				c__1);
+			sscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1
+				+ 1], &c__1);
+		    }
+
+		    emax = 0.f;
+		    i__2 = *n;
+		    for (k = 1; k <= i__2; ++k) {
+/* Computing MAX */
+			r__3 = emax, r__4 = (r__1 = vl[k + ki * vl_dim1],
+				dabs(r__1)) + (r__2 = vl[k + (ki + 1) *
+				vl_dim1], dabs(r__2));
+			emax = dmax(r__3,r__4);
+/* L240: */
+		    }
+		    remax = 1.f / emax;
+		    sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+		    sscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1);
+
 		}
+
 	    }
-	}
-    } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		if (*n2 <= 64) {
-		    nb = 1;
-		} else {
-		    nb = 32;
-		}
-	    } else {
-		if (*n2 <= 64) {
-		    nb = 1;
-		} else {
-		    nb = 32;
-		}
+
+	    ++is;
+	    if (ip != 0) {
+		++is;
 	    }
-	}
-    } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
+L250:
+	    if (ip == -1) {
+		ip = 0;
 	    }
-	}
-    } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
+	    if (ip == 1) {
+		ip = -1;
 	    }
+
+/* L260: */
 	}
-    } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) {
-	    nb = 1;
-	}
+
     }
-    ret_val = nb;
-    return ret_val;
 
-L200:
+    return 0;
 
-/*     ISPEC = 2:  minimum block size */
+/*     End of STREVC */
 
-    nbmin = 2;
-    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
-		ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, (
-		ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0)
-		 {
-	    if (sname) {
-		nbmin = 2;
-	    } else {
-		nbmin = 2;
-	    }
-	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nbmin = 2;
-	    } else {
-		nbmin = 2;
-	    }
-	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nbmin = 2;
-	    } else {
-		nbmin = 2;
-	    }
-	} else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nbmin = 2;
-	    } else {
-		nbmin = 2;
-	    }
-	}
-    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nbmin = 8;
-	    } else {
-		nbmin = 8;
-	    }
-	} else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    nbmin = 2;
-	}
-    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    nbmin = 2;
-	}
-    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
-		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)2, (ftnlen)2) == 0) {
-		nbmin = 2;
-	    }
-	} else if (*(unsigned char *)c3 == 'M') {
-	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
-		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)2, (ftnlen)2) == 0) {
-		nbmin = 2;
-	    }
-	}
-    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
-		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)2, (ftnlen)2) == 0) {
-		nbmin = 2;
-	    }
-	} else if (*(unsigned char *)c3 == 'M') {
-	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
-		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)2, (ftnlen)2) == 0) {
-		nbmin = 2;
-	    }
-	}
+} /* strevc_ */
+
+/* Subroutine */ int strti2_(char *uplo, char *diag, integer *n, real *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer j;
+    static real ajj;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int strmv_(char *, char *, char *, integer *,
+	    real *, integer *, real *, integer *),
+	    xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    STRTI2 computes the inverse of a real upper or lower triangular
+    matrix.
+
+    This is the Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the matrix A is upper or lower triangular.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    DIAG    (input) CHARACTER*1
+            Specifies whether or not the matrix A is unit triangular.
+            = 'N':  Non-unit triangular
+            = 'U':  Unit triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading n by n upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("STRTI2", &i__1);
+	return 0;
     }
-    ret_val = nbmin;
-    return ret_val;
 
-L300:
+    if (upper) {
 
-/*     ISPEC = 3:  crossover point */
+/*        Compute inverse of upper triangular matrix. */
 
-    nx = 0;
-    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
-		ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, (
-		ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0)
-		 {
-	    if (sname) {
-		nx = 128;
-	    } else {
-		nx = 128;
-	    }
-	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nx = 128;
-	    } else {
-		nx = 128;
-	    }
-	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    if (sname) {
-		nx = 128;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (nounit) {
+		a[j + j * a_dim1] = 1.f / a[j + j * a_dim1];
+		ajj = -a[j + j * a_dim1];
 	    } else {
-		nx = 128;
+		ajj = -1.f;
 	    }
+
+/*           Compute elements 1:j-1 of j-th column. */
+
+	    i__2 = j - 1;
+	    strmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, &
+		    a[j * a_dim1 + 1], &c__1);
+	    i__2 = j - 1;
+	    sscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1);
+/* L10: */
 	}
-    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
-	if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    nx = 32;
-	}
-    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
-	    nx = 32;
-	}
-    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
-		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)2, (ftnlen)2) == 0) {
-		nx = 128;
+    } else {
+
+/*        Compute inverse of lower triangular matrix. */
+
+	for (j = *n; j >= 1; --j) {
+	    if (nounit) {
+		a[j + j * a_dim1] = 1.f / a[j + j * a_dim1];
+		ajj = -a[j + j * a_dim1];
+	    } else {
+		ajj = -1.f;
 	    }
-	}
-    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ",
-		    (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)2, (ftnlen)2) == 0) {
-		nx = 128;
+	    if (j < *n) {
+
+/*              Compute elements j+1:n of j-th column. */
+
+		i__1 = *n - j;
+		strmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j +
+			1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1);
+		i__1 = *n - j;
+		sscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1);
 	    }
+/* L20: */
 	}
     }
-    ret_val = nx;
-    return ret_val;
 
-L400:
+    return 0;
 
-/*     ISPEC = 4:  number of shifts (used by xHSEQR) */
+/*     End of STRTI2 */
 
-    ret_val = 6;
-    return ret_val;
+} /* strti2_ */
 
-L500:
+/* Subroutine */ int strtri_(char *uplo, char *diag, integer *n, real *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5;
+    char ch__1[2];
 
-/*     ISPEC = 5:  minimum column dimension (not used) */
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
 
-    ret_val = 2;
-    return ret_val;
+    /* Local variables */
+    static integer j, jb, nb, nn;
+    extern logical lsame_(char *, char *);
+    static logical upper;
+    extern /* Subroutine */ int strmm_(char *, char *, char *, char *,
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), strsm_(char *, char *, char *,
+	    char *, integer *, integer *, real *, real *, integer *, real *,
+	    integer *), strti2_(char *, char *
+	    , integer *, real *, integer *, integer *),
+	    xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical nounit;
 
-L600:
 
-/*     ISPEC = 6:  crossover point for SVD (used by xGELSS and xGESVD) */
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
 
-    ret_val = (integer) ((real) min(*n1,*n2) * 1.6f);
-    return ret_val;
 
-L700:
+    Purpose
+    =======
 
-/*     ISPEC = 7:  number of processors (not used) */
+    STRTRI computes the inverse of a real upper or lower triangular
+    matrix A.
 
-    ret_val = 1;
-    return ret_val;
+    This is the Level 3 BLAS version of the algorithm.
 
-L800:
+    Arguments
+    =========
 
-/*     ISPEC = 8:  crossover point for multishift (used by xHSEQR) */
+    UPLO    (input) CHARACTER*1
+            = 'U':  A is upper triangular;
+            = 'L':  A is lower triangular.
 
-    ret_val = 50;
-    return ret_val;
+    DIAG    (input) CHARACTER*1
+            = 'N':  A is non-unit triangular;
+            = 'U':  A is unit triangular.
 
-L900:
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-/*
-       ISPEC = 9:  maximum size of the subproblems at the bottom of the
-                   computation tree in the divide-and-conquer algorithm
-                   (used by xGELSD and xGESDD)
-*/
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
 
-    ret_val = 25;
-    return ret_val;
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-L1000:
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
+                 matrix is singular and its inverse can not be computed.
 
-/*
-       ISPEC = 10: ieee NaN arithmetic can be trusted not to trap
+    =====================================================================
 
-       ILAENV = 0
+
+       Test the input parameters.
 */
-    ret_val = 1;
-    if (ret_val == 1) {
-	ret_val = ieeeck_(&c__0, &c_b3825, &c_b3826);
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("STRTRI", &i__1);
+	return 0;
     }
-    return ret_val;
 
-L1100:
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Check for singularity if non-unit. */
+
+    if (nounit) {
+	i__1 = *n;
+	for (*info = 1; *info <= i__1; ++(*info)) {
+	    if (a[*info + *info * a_dim1] == 0.f) {
+		return 0;
+	    }
+/* L10: */
+	}
+	*info = 0;
+    }
 
 /*
-       ISPEC = 11: infinity arithmetic can be trusted not to trap
+       Determine the block size for this environment.
 
-       ILAENV = 0
+   Writing concatenation
 */
-    ret_val = 1;
-    if (ret_val == 1) {
-	ret_val = ieeeck_(&c__1, &c_b3825, &c_b3826);
+    i__2[0] = 1, a__1[0] = uplo;
+    i__2[1] = 1, a__1[1] = diag;
+    s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
+    nb = ilaenv_(&c__1, "STRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (nb <= 1 || nb >= *n) {
+
+/*        Use unblocked code */
+
+	strti2_(uplo, diag, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code */
+
+	if (upper) {
+
+/*           Compute inverse of upper triangular matrix */
+
+	    i__1 = *n;
+	    i__3 = nb;
+	    for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) {
+/* Computing MIN */
+		i__4 = nb, i__5 = *n - j + 1;
+		jb = min(i__4,i__5);
+
+/*              Compute rows 1:j-1 of current block column */
+
+		i__4 = j - 1;
+		strmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
+			c_b871, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
+		i__4 = j - 1;
+		strsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
+			c_b1150, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1],
+			lda);
+
+/*              Compute inverse of current diagonal block */
+
+		strti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L20: */
+	    }
+	} else {
+
+/*           Compute inverse of lower triangular matrix */
+
+	    nn = (*n - 1) / nb * nb + 1;
+	    i__3 = -nb;
+	    for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) {
+/* Computing MIN */
+		i__1 = nb, i__4 = *n - j + 1;
+		jb = min(i__1,i__4);
+		if (j + jb <= *n) {
+
+/*                 Compute rows j+jb:n of current block column */
+
+		    i__1 = *n - j - jb + 1;
+		    strmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
+			    &c_b871, &a[j + jb + (j + jb) * a_dim1], lda, &a[
+			    j + jb + j * a_dim1], lda);
+		    i__1 = *n - j - jb + 1;
+		    strsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
+			     &c_b1150, &a[j + j * a_dim1], lda, &a[j + jb + j
+			    * a_dim1], lda);
+		}
+
+/*              Compute inverse of current diagonal block */
+
+		strti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L30: */
+	    }
+	}
     }
-    return ret_val;
 
-/*     End of ILAENV */
+    return 0;
 
-} /* ilaenv_ */
+/*     End of STRTRI */
+
+} /* strtri_ */
 
diff --git a/numpy/linalg/lapack_lite/wrapped_routines b/numpy/linalg/lapack_lite/wrapped_routines
index cc279f083..47a73508d 100644
--- a/numpy/linalg/lapack_lite/wrapped_routines
+++ b/numpy/linalg/lapack_lite/wrapped_routines
@@ -1,19 +1,49 @@
+ccopy
+cgeev
+cgemm
+cgesdd
+cgesv
+cgetrf
+cheevd
+cpotrf
+cpotri
+cpotrs
+dcopy
 dgeev
 dgelsd
+dgemm
 dgeqrf
 dgesdd
 dgesv
 dgetrf
+dorgqr
 dpotrf
+dpotri
+dpotrs
 dsyevd
+scopy
+sgeev
+sgemm
+sgesdd
+sgesv
+sgetrf
+spotrf
+spotri
+spotrs
+ssyevd
+zcopy
 zgeev
 zgelsd
+zgemm
 zgeqrf
 zgesdd
 zgesv
 zgetrf
 zheevd
 zpotrf
+zpotri
+zpotrs
+zungqr
 # need this b/c it's not properly declared as external in the BLAS source
 dcabs1
 IGNORE: dlamch
diff --git a/numpy/linalg/lapack_lite/zlapack_lite.c b/numpy/linalg/lapack_lite/zlapack_lite.c
index 0df4bdcda..7dcd92cc5 100644
--- a/numpy/linalg/lapack_lite/zlapack_lite.c
+++ b/numpy/linalg/lapack_lite/zlapack_lite.c
@@ -20485,20 +20485,17 @@ L210:
 
 } /* zlatrs_ */
 
-/* Subroutine */ int zpotf2_(char *uplo, integer *n, doublecomplex *a,
+/* Subroutine */ int zlauu2_(char *uplo, integer *n, doublecomplex *a,
 	integer *lda, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3;
     doublereal d__1;
-    doublecomplex z__1, z__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
+    doublecomplex z__1;
 
     /* Local variables */
-    static integer j;
-    static doublereal ajj;
+    static integer i__;
+    static doublereal aii;
     extern logical lsame_(char *, char *);
     extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *,
 	    doublecomplex *, integer *, doublecomplex *, integer *);
@@ -20512,7 +20509,7 @@ L210:
 
 
 /*
-    -- LAPACK routine (version 3.0) --
+    -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
        September 30, 1994
@@ -20521,39 +20518,35 @@ L210:
     Purpose
     =======
 
-    ZPOTF2 computes the Cholesky factorization of a complex Hermitian
-    positive definite matrix A.
+    ZLAUU2 computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
 
-    The factorization has the form
-       A = U' * U ,  if UPLO = 'U', or
-       A = L  * L',  if UPLO = 'L',
-    where U is an upper triangular matrix and L is lower triangular.
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
 
-    This is the unblocked version of the algorithm, calling Level 2 BLAS.
+    This is the unblocked form of the algorithm, calling Level 2 BLAS.
 
     Arguments
     =========
 
     UPLO    (input) CHARACTER*1
-            Specifies whether the upper or lower triangular part of the
-            Hermitian matrix A is stored.
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
             = 'U':  Upper triangular
             = 'L':  Lower triangular
 
     N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+            The order of the triangular factor U or L.  N >= 0.
 
     A       (input/output) COMPLEX*16 array, dimension (LDA,N)
-            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
-            n by n upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading n by n lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
-
-            On exit, if INFO = 0, the factor U or L from the Cholesky
-            factorization A = U'*U  or A = L*L'.
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
@@ -20561,9 +20554,6 @@ L210:
     INFO    (output) INTEGER
             = 0: successful exit
             < 0: if INFO = -k, the k-th argument had an illegal value
-            > 0: if INFO = k, the leading minor of order k is not
-                 positive definite, and the factorization could not be
-                 completed.
 
     =====================================================================
 
@@ -20588,7 +20578,7 @@ L210:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("ZPOTF2", &i__1);
+	xerbla_("ZLAUU2", &i__1);
 	return 0;
     }
 
@@ -20600,113 +20590,81 @@ L210:
 
     if (upper) {
 
-/*        Compute the Cholesky factorization A = U'*U. */
+/*        Compute the product U * U'. */
 
 	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-
-/*           Compute U(J,J) and test for non-positive-definiteness. */
-
-	    i__2 = j + j * a_dim1;
-	    d__1 = a[i__2].r;
-	    i__3 = j - 1;
-	    zdotc_(&z__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1]
-		    , &c__1);
-	    z__1.r = d__1 - z__2.r, z__1.i = -z__2.i;
-	    ajj = z__1.r;
-	    if (ajj <= 0.) {
-		i__2 = j + j * a_dim1;
-		a[i__2].r = ajj, a[i__2].i = 0.;
-		goto L30;
-	    }
-	    ajj = sqrt(ajj);
-	    i__2 = j + j * a_dim1;
-	    a[i__2].r = ajj, a[i__2].i = 0.;
-
-/*           Compute elements J+1:N of row J. */
-
-	    if (j < *n) {
-		i__2 = j - 1;
-		zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
-		i__2 = j - 1;
-		i__3 = *n - j;
-		z__1.r = -1., z__1.i = -0.;
-		zgemv_("Transpose", &i__2, &i__3, &z__1, &a[(j + 1) * a_dim1
-			+ 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b60, &a[j + (
-			j + 1) * a_dim1], lda);
-		i__2 = j - 1;
-		zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
-		i__2 = *n - j;
-		d__1 = 1. / ajj;
-		zdscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda);
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    aii = a[i__2].r;
+	    if (i__ < *n) {
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = *n - i__;
+		zdotc_(&z__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &a[
+			i__ + (i__ + 1) * a_dim1], lda);
+		d__1 = aii * aii + z__1.r;
+		a[i__2].r = d__1, a[i__2].i = 0.;
+		i__2 = *n - i__;
+		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		z__1.r = aii, z__1.i = 0.;
+		zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			z__1, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    } else {
+		zdscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
 	    }
 /* L10: */
 	}
+
     } else {
 
-/*        Compute the Cholesky factorization A = L*L'. */
+/*        Compute the product L' * L. */
 
 	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-
-/*           Compute L(J,J) and test for non-positive-definiteness. */
-
-	    i__2 = j + j * a_dim1;
-	    d__1 = a[i__2].r;
-	    i__3 = j - 1;
-	    zdotc_(&z__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda);
-	    z__1.r = d__1 - z__2.r, z__1.i = -z__2.i;
-	    ajj = z__1.r;
-	    if (ajj <= 0.) {
-		i__2 = j + j * a_dim1;
-		a[i__2].r = ajj, a[i__2].i = 0.;
-		goto L30;
-	    }
-	    ajj = sqrt(ajj);
-	    i__2 = j + j * a_dim1;
-	    a[i__2].r = ajj, a[i__2].i = 0.;
-
-/*           Compute elements J+1:N of column J. */
-
-	    if (j < *n) {
-		i__2 = j - 1;
-		zlacgv_(&i__2, &a[j + a_dim1], lda);
-		i__2 = *n - j;
-		i__3 = j - 1;
-		z__1.r = -1., z__1.i = -0.;
-		zgemv_("No transpose", &i__2, &i__3, &z__1, &a[j + 1 + a_dim1]
-			, lda, &a[j + a_dim1], lda, &c_b60, &a[j + 1 + j *
-			a_dim1], &c__1);
-		i__2 = j - 1;
-		zlacgv_(&i__2, &a[j + a_dim1], lda);
-		i__2 = *n - j;
-		d__1 = 1. / ajj;
-		zdscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    aii = a[i__2].r;
+	    if (i__ < *n) {
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = *n - i__;
+		zdotc_(&z__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[
+			i__ + 1 + i__ * a_dim1], &c__1);
+		d__1 = aii * aii + z__1.r;
+		a[i__2].r = d__1, a[i__2].i = 0.;
+		i__2 = i__ - 1;
+		zlacgv_(&i__2, &a[i__ + a_dim1], lda);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		z__1.r = aii, z__1.i = 0.;
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ +
+			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			z__1, &a[i__ + a_dim1], lda);
+		i__2 = i__ - 1;
+		zlacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    } else {
+		zdscal_(&i__, &aii, &a[i__ + a_dim1], lda);
 	    }
 /* L20: */
 	}
     }
-    goto L40;
-
-L30:
-    *info = j;
 
-L40:
     return 0;
 
-/*     End of ZPOTF2 */
+/*     End of ZLAUU2 */
 
-} /* zpotf2_ */
+} /* zlauu2_ */
 
-/* Subroutine */ int zpotrf_(char *uplo, integer *n, doublecomplex *a,
+/* Subroutine */ int zlauum_(char *uplo, integer *n, doublecomplex *a,
 	integer *lda, integer *info)
 {
     /* System generated locals */
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    doublecomplex z__1;
 
     /* Local variables */
-    static integer j, jb, nb;
+    static integer i__, ib, nb;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
 	    integer *, doublecomplex *, doublecomplex *, integer *,
@@ -20715,16 +20673,16 @@ L40:
 	    integer *, doublereal *, doublecomplex *, integer *, doublereal *,
 	     doublecomplex *, integer *);
     static logical upper;
-    extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *,
+    extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *,
 	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
 	     doublecomplex *, integer *),
-	    zpotf2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *);
+	    zlauu2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
+    -- LAPACK auxiliary routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
        September 30, 1994
@@ -20733,47 +20691,42 @@ L40:
     Purpose
     =======
 
-    ZPOTRF computes the Cholesky factorization of a complex Hermitian
-    positive definite matrix A.
+    ZLAUUM computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
 
-    The factorization has the form
-       A = U**H * U,  if UPLO = 'U', or
-       A = L  * L**H,  if UPLO = 'L',
-    where U is an upper triangular matrix and L is lower triangular.
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
 
-    This is the block version of the algorithm, calling Level 3 BLAS.
+    This is the blocked form of the algorithm, calling Level 3 BLAS.
 
     Arguments
     =========
 
     UPLO    (input) CHARACTER*1
-            = 'U':  Upper triangle of A is stored;
-            = 'L':  Lower triangle of A is stored.
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
 
     N       (input) INTEGER
-            The order of the matrix A.  N >= 0.
+            The order of the triangular factor U or L.  N >= 0.
 
     A       (input/output) COMPLEX*16 array, dimension (LDA,N)
-            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
-            N-by-N upper triangular part of A contains the upper
-            triangular part of the matrix A, and the strictly lower
-            triangular part of A is not referenced.  If UPLO = 'L', the
-            leading N-by-N lower triangular part of A contains the lower
-            triangular part of the matrix A, and the strictly upper
-            triangular part of A is not referenced.
-
-            On exit, if INFO = 0, the factor U or L from the Cholesky
-            factorization A = U**H*U or A = L*L**H.
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,N).
 
     INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the leading minor of order i is not
-                  positive definite, and the factorization could not be
-                  completed.
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
 
     =====================================================================
 
@@ -20798,7 +20751,7 @@ L40:
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("ZPOTRF", &i__1);
+	xerbla_("ZLAUUM", &i__1);
 	return 0;
     }
 
@@ -20810,285 +20763,164 @@ L40:
 
 /*     Determine the block size for this environment. */
 
-    nb = ilaenv_(&c__1, "ZPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+    nb = ilaenv_(&c__1, "ZLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
 	    ftnlen)1);
+
     if (nb <= 1 || nb >= *n) {
 
-/*        Use unblocked code. */
+/*        Use unblocked code */
 
-	zpotf2_(uplo, n, &a[a_offset], lda, info);
+	zlauu2_(uplo, n, &a[a_offset], lda, info);
     } else {
 
-/*        Use blocked code. */
+/*        Use blocked code */
 
 	if (upper) {
 
-/*           Compute the Cholesky factorization A = U'*U. */
+/*           Compute the product U * U'. */
 
 	    i__1 = *n;
 	    i__2 = nb;
-	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
-
-/*
-                Update and factorize the current diagonal block and test
-                for non-positive-definiteness.
-
-   Computing MIN
-*/
-		i__3 = nb, i__4 = *n - j + 1;
-		jb = min(i__3,i__4);
-		i__3 = j - 1;
-		zherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1294, &
-			a[j * a_dim1 + 1], lda, &c_b1015, &a[j + j * a_dim1],
-			lda);
-		zpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
-		if (*info != 0) {
-		    goto L30;
-		}
-		if (j + jb <= *n) {
-
-/*                 Compute the current block row. */
-
-		    i__3 = *n - j - jb + 1;
-		    i__4 = j - 1;
-		    z__1.r = -1., z__1.i = -0.;
-		    zgemm_("Conjugate transpose", "No transpose", &jb, &i__3,
-			    &i__4, &z__1, &a[j * a_dim1 + 1], lda, &a[(j + jb)
-			     * a_dim1 + 1], lda, &c_b60, &a[j + (j + jb) *
-			    a_dim1], lda);
-		    i__3 = *n - j - jb + 1;
-		    ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit",
-			     &jb, &i__3, &c_b60, &a[j + j * a_dim1], lda, &a[
-			    j + (j + jb) * a_dim1], lda);
+	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		ztrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", &
+			i__3, &ib, &c_b60, &a[i__ + i__ * a_dim1], lda, &a[
+			i__ * a_dim1 + 1], lda);
+		zlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    zgemm_("No transpose", "Conjugate transpose", &i__3, &ib,
+			    &i__4, &c_b60, &a[(i__ + ib) * a_dim1 + 1], lda, &
+			    a[i__ + (i__ + ib) * a_dim1], lda, &c_b60, &a[i__
+			    * a_dim1 + 1], lda);
+		    i__3 = *n - i__ - ib + 1;
+		    zherk_("Upper", "No transpose", &ib, &i__3, &c_b1015, &a[
+			    i__ + (i__ + ib) * a_dim1], lda, &c_b1015, &a[i__
+			    + i__ * a_dim1], lda);
 		}
 /* L10: */
 	    }
-
 	} else {
 
-/*           Compute the Cholesky factorization A = L*L'. */
+/*           Compute the product L' * L. */
 
 	    i__2 = *n;
 	    i__1 = nb;
-	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		ztrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", &
+			ib, &i__3, &c_b60, &a[i__ + i__ * a_dim1], lda, &a[
+			i__ + a_dim1], lda);
+		zlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    zgemm_("Conjugate transpose", "No transpose", &ib, &i__3,
+			    &i__4, &c_b60, &a[i__ + ib + i__ * a_dim1], lda, &
+			    a[i__ + ib + a_dim1], lda, &c_b60, &a[i__ +
+			    a_dim1], lda);
+		    i__3 = *n - i__ - ib + 1;
+		    zherk_("Lower", "Conjugate transpose", &ib, &i__3, &
+			    c_b1015, &a[i__ + ib + i__ * a_dim1], lda, &
+			    c_b1015, &a[i__ + i__ * a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
 
-/*
-                Update and factorize the current diagonal block and test
-                for non-positive-definiteness.
+    return 0;
 
-   Computing MIN
-*/
-		i__3 = nb, i__4 = *n - j + 1;
-		jb = min(i__3,i__4);
-		i__3 = j - 1;
-		zherk_("Lower", "No transpose", &jb, &i__3, &c_b1294, &a[j +
-			a_dim1], lda, &c_b1015, &a[j + j * a_dim1], lda);
-		zpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
-		if (*info != 0) {
-		    goto L30;
-		}
-		if (j + jb <= *n) {
+/*     End of ZLAUUM */
 
-/*                 Compute the current block column. */
+} /* zlauum_ */
 
-		    i__3 = *n - j - jb + 1;
-		    i__4 = j - 1;
-		    z__1.r = -1., z__1.i = -0.;
-		    zgemm_("No transpose", "Conjugate transpose", &i__3, &jb,
-			    &i__4, &z__1, &a[j + jb + a_dim1], lda, &a[j +
-			    a_dim1], lda, &c_b60, &a[j + jb + j * a_dim1],
-			    lda);
-		    i__3 = *n - j - jb + 1;
-		    ztrsm_("Right", "Lower", "Conjugate transpose", "Non-unit"
-			    , &i__3, &jb, &c_b60, &a[j + j * a_dim1], lda, &a[
-			    j + jb + j * a_dim1], lda);
-		}
-/* L20: */
-	    }
-	}
-    }
-    goto L40;
-
-L30:
-    *info = *info + j - 1;
-
-L40:
-    return 0;
-
-/*     End of ZPOTRF */
-
-} /* zpotrf_ */
-
-/* Subroutine */ int zstedc_(char *compz, integer *n, doublereal *d__,
-	doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work,
-	integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork,
-	integer *liwork, integer *info)
+/* Subroutine */ int zpotf2_(char *uplo, integer *n, doublecomplex *a,
+	integer *lda, integer *info)
 {
     /* System generated locals */
-    integer z_dim1, z_offset, i__1, i__2, i__3, i__4;
-    doublereal d__1, d__2;
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+    doublecomplex z__1, z__2;
 
     /* Builtin functions */
-    double log(doublereal);
-    integer pow_ii(integer *, integer *);
     double sqrt(doublereal);
 
     /* Local variables */
-    static integer i__, j, k, m;
-    static doublereal p;
-    static integer ii, ll, end, lgn;
-    static doublereal eps, tiny;
+    static integer j;
+    static doublereal ajj;
     extern logical lsame_(char *, char *);
-    static integer lwmin, start;
-    extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *,
-	    doublecomplex *, integer *), zlaed0_(integer *, integer *,
-	    doublereal *, doublereal *, doublecomplex *, integer *,
-	    doublecomplex *, integer *, doublereal *, integer *, integer *);
-
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *), dstedc_(char *, integer *,
-	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
-	     integer *, integer *, integer *, integer *), dlaset_(
-	    char *, integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *), xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
-	     integer *), zlacrm_(integer *, integer *, doublecomplex *,
-	    integer *, doublereal *, integer *, doublecomplex *, integer *,
-	    doublereal *);
-    static integer liwmin, icompz;
-    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *,
+    extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *);
+    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
+	    integer *, doublereal *, doublecomplex *, integer *), zlacgv_(
 	    integer *, doublecomplex *, integer *);
-    static doublereal orgnrm;
-    static integer lrwmin;
-    static logical lquery;
-    static integer smlsiz;
-    extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *,
-	    doublereal *, doublecomplex *, integer *, doublereal *, integer *);
 
 
 /*
     -- LAPACK routine (version 3.0) --
        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
        Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+       September 30, 1994
 
 
     Purpose
     =======
 
-    ZSTEDC computes all eigenvalues and, optionally, eigenvectors of a
-    symmetric tridiagonal matrix using the divide and conquer method.
-    The eigenvectors of a full or band complex Hermitian matrix can also
-    be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this
-    matrix to tridiagonal form.
+    ZPOTF2 computes the Cholesky factorization of a complex Hermitian
+    positive definite matrix A.
 
-    This code makes very mild assumptions about floating point
-    arithmetic. It will work on machines with a guard digit in
-    add/subtract, or on those binary machines without guard digits
-    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
-    It could conceivably fail on hexadecimal or decimal machines
-    without guard digits, but we know of none.  See DLAED3 for details.
+    The factorization has the form
+       A = U' * U ,  if UPLO = 'U', or
+       A = L  * L',  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the unblocked version of the algorithm, calling Level 2 BLAS.
 
     Arguments
     =========
 
-    COMPZ   (input) CHARACTER*1
-            = 'N':  Compute eigenvalues only.
-            = 'I':  Compute eigenvectors of tridiagonal matrix also.
-            = 'V':  Compute eigenvectors of original Hermitian matrix
-                    also.  On entry, Z contains the unitary matrix used
-                    to reduce the original matrix to tridiagonal form.
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            Hermitian matrix A is stored.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
 
     N       (input) INTEGER
-            The dimension of the symmetric tridiagonal matrix.  N >= 0.
-
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the diagonal elements of the tridiagonal matrix.
-            On exit, if INFO = 0, the eigenvalues in ascending order.
-
-    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
-            On entry, the subdiagonal elements of the tridiagonal matrix.
-            On exit, E has been destroyed.
-
-    Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
-            On entry, if COMPZ = 'V', then Z contains the unitary
-            matrix used in the reduction to tridiagonal form.
-            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
-            orthonormal eigenvectors of the original Hermitian matrix,
-            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
-            of the symmetric tridiagonal matrix.
-            If  COMPZ = 'N', then Z is not referenced.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.  LDZ >= 1.
-            If eigenvectors are desired, then LDZ >= max(1,N).
-
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.
-            If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1.
-            If COMPZ = 'V' and N > 1, LWORK must be at least N*N.
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    RWORK   (workspace/output) DOUBLE PRECISION array,
-                                           dimension (LRWORK)
-            On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
-
-    LRWORK  (input) INTEGER
-            The dimension of the array RWORK.
-            If COMPZ = 'N' or N <= 1, LRWORK must be at least 1.
-            If COMPZ = 'V' and N > 1, LRWORK must be at least
-                           1 + 3*N + 2*N*lg N + 3*N**2 ,
-                           where lg( N ) = smallest integer k such
-                           that 2**k >= N.
-            If COMPZ = 'I' and N > 1, LRWORK must be at least
-                           1 + 4*N + 2*N**2 .
-
-            If LRWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the RWORK array,
-            returns this value as the first entry of the RWORK array, and
-            no error message related to LRWORK is issued by XERBLA.
+            The order of the matrix A.  N >= 0.
 
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
-            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            n by n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
 
-    LIWORK  (input) INTEGER
-            The dimension of the array IWORK.
-            If COMPZ = 'N' or N <= 1, LIWORK must be at least 1.
-            If COMPZ = 'V' or N > 1,  LIWORK must be at least
-                                      6 + 6*N + 5*N*lg N.
-            If COMPZ = 'I' or N > 1,  LIWORK must be at least
-                                      3 + 5*N .
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U'*U  or A = L*L'.
 
-            If LIWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the IWORK array,
-            returns this value as the first entry of the IWORK array, and
-            no error message related to LIWORK is issued by XERBLA.
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
     INFO    (output) INTEGER
-            = 0:  successful exit.
-            < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The algorithm failed to compute an eigenvalue while
-                  working on the submatrix lying in rows and columns
-                  INFO/(N+1) through mod(INFO,N+1).
-
-    Further Details
-    ===============
-
-    Based on contributions by
-       Jeff Rutter, Computer Science Division, University of California
-       at Berkeley, USA
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, the leading minor of order k is not
+                 positive definite, and the factorization could not be
+                 completed.
 
     =====================================================================
 
@@ -21097,403 +20929,208 @@ L40:
 */
 
     /* Parameter adjustments */
-    --d__;
-    --e;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --work;
-    --rwork;
-    --iwork;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
     /* Function Body */
     *info = 0;
-    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZPOTF2", &i__1);
+	return 0;
+    }
 
-    if (lsame_(compz, "N")) {
-	icompz = 0;
-    } else if (lsame_(compz, "V")) {
-	icompz = 1;
-    } else if (lsame_(compz, "I")) {
-	icompz = 2;
-    } else {
-	icompz = -1;
-    }
-    if (*n <= 1 || icompz <= 0) {
-	lwmin = 1;
-	liwmin = 1;
-	lrwmin = 1;
-    } else {
-	lgn = (integer) (log((doublereal) (*n)) / log(2.));
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (icompz == 1) {
-	    lwmin = *n * *n;
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lrwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3;
-	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
-	} else if (icompz == 2) {
-	    lwmin = 1;
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1);
-	    liwmin = *n * 5 + 3;
-	}
-    }
-    if (icompz < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
-	*info = -6;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*lrwork < lrwmin && ! lquery) {
-	*info = -10;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -12;
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
     }
 
-    if (*info == 0) {
-	work[1].r = (doublereal) lwmin, work[1].i = 0.;
-	rwork[1] = (doublereal) lrwmin;
-	iwork[1] = liwmin;
-    }
-
-    if (*info != 0) {
-	i__1 = -(*info);
-	xerbla_("ZSTEDC", &i__1);
-	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Quick return if possible */
-
-    if (*n == 0) {
-	return 0;
-    }
-    if (*n == 1) {
-	if (icompz != 0) {
-	    i__1 = z_dim1 + 1;
-	    z__[i__1].r = 1., z__[i__1].i = 0.;
-	}
-	return 0;
-    }
+    if (upper) {
 
-    smlsiz = ilaenv_(&c__9, "ZSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
+/*        Compute the Cholesky factorization A = U'*U. */
 
-/*
-       If the following conditional clause is removed, then the routine
-       will use the Divide and Conquer routine to compute only the
-       eigenvalues, which requires (3N + 3N**2) real workspace and
-       (2 + 5N + 2N lg(N)) integer workspace.
-       Since on many architectures DSTERF is much faster than any other
-       algorithm for finding eigenvalues only, it is used here
-       as the default.
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
 
-       If COMPZ = 'N', use DSTERF to compute the eigenvalues.
-*/
+/*           Compute U(J,J) and test for non-positive-definiteness. */
 
-    if (icompz == 0) {
-	dsterf_(n, &d__[1], &e[1], info);
-	return 0;
-    }
+	    i__2 = j + j * a_dim1;
+	    d__1 = a[i__2].r;
+	    i__3 = j - 1;
+	    zdotc_(&z__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1]
+		    , &c__1);
+	    z__1.r = d__1 - z__2.r, z__1.i = -z__2.i;
+	    ajj = z__1.r;
+	    if (ajj <= 0.) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = ajj, a[i__2].i = 0.;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    i__2 = j + j * a_dim1;
+	    a[i__2].r = ajj, a[i__2].i = 0.;
 
-/*
-       If N is smaller than the minimum divide size (SMLSIZ+1), then
-       solve the problem with another solver.
-*/
+/*           Compute elements J+1:N of row J. */
 
-    if (*n <= smlsiz) {
-	if (icompz == 0) {
-	    dsterf_(n, &d__[1], &e[1], info);
-	    return 0;
-	} else if (icompz == 2) {
-	    zsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
-		    info);
-	    return 0;
-	} else {
-	    zsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
-		    info);
-	    return 0;
+	    if (j < *n) {
+		i__2 = j - 1;
+		zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
+		i__2 = j - 1;
+		i__3 = *n - j;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("Transpose", &i__2, &i__3, &z__1, &a[(j + 1) * a_dim1
+			+ 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b60, &a[j + (
+			j + 1) * a_dim1], lda);
+		i__2 = j - 1;
+		zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
+		i__2 = *n - j;
+		d__1 = 1. / ajj;
+		zdscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda);
+	    }
+/* L10: */
 	}
-    }
+    } else {
 
-/*     If COMPZ = 'I', we simply call DSTEDC instead. */
+/*        Compute the Cholesky factorization A = L*L'. */
 
-    if (icompz == 2) {
-	dlaset_("Full", n, n, &c_b324, &c_b1015, &rwork[1], n);
-	ll = *n * *n + 1;
-	i__1 = *lrwork - ll + 1;
-	dstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, &
-		iwork[1], liwork, info);
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *n;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * z_dim1;
-		i__4 = (j - 1) * *n + i__;
-		z__[i__3].r = rwork[i__4], z__[i__3].i = 0.;
-/* L10: */
+
+/*           Compute L(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j + j * a_dim1;
+	    d__1 = a[i__2].r;
+	    i__3 = j - 1;
+	    zdotc_(&z__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda);
+	    z__1.r = d__1 - z__2.r, z__1.i = -z__2.i;
+	    ajj = z__1.r;
+	    if (ajj <= 0.) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = ajj, a[i__2].i = 0.;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    i__2 = j + j * a_dim1;
+	    a[i__2].r = ajj, a[i__2].i = 0.;
+
+/*           Compute elements J+1:N of column J. */
+
+	    if (j < *n) {
+		i__2 = j - 1;
+		zlacgv_(&i__2, &a[j + a_dim1], lda);
+		i__2 = *n - j;
+		i__3 = j - 1;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__3, &z__1, &a[j + 1 + a_dim1]
+			, lda, &a[j + a_dim1], lda, &c_b60, &a[j + 1 + j *
+			a_dim1], &c__1);
+		i__2 = j - 1;
+		zlacgv_(&i__2, &a[j + a_dim1], lda);
+		i__2 = *n - j;
+		d__1 = 1. / ajj;
+		zdscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
 	    }
 /* L20: */
 	}
-	return 0;
     }
+    goto L40;
 
-/*
-       From now on, only option left to be handled is COMPZ = 'V',
-       i.e. ICOMPZ = 1.
+L30:
+    *info = j;
 
-       Scale.
-*/
+L40:
+    return 0;
 
-    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.) {
-	return 0;
-    }
+/*     End of ZPOTF2 */
 
-    eps = EPSILON;
+} /* zpotf2_ */
 
-    start = 1;
+/* Subroutine */ int zpotrf_(char *uplo, integer *n, doublecomplex *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    doublecomplex z__1;
 
-/*     while ( START <= N ) */
+    /* Local variables */
+    static integer j, jb, nb;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *), zherk_(char *, char *, integer *,
+	    integer *, doublereal *, doublecomplex *, integer *, doublereal *,
+	     doublecomplex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
+	     doublecomplex *, integer *),
+	    zpotf2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
 
-L30:
-    if (start <= *n) {
 
 /*
-       Let END be the position of the next subdiagonal entry such that
-       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
-       matrix identified by the elements between START and END
-       constitutes an independent sub-problem.
-*/
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-	end = start;
-L40:
-	if (end < *n) {
-	    tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 =
-		    d__[end + 1], abs(d__2)));
-	    if ((d__1 = e[end], abs(d__1)) > tiny) {
-		++end;
-		goto L40;
-	    }
-	}
 
-/*        (Sub) Problem determined.  Compute its size and solve it. */
+    Purpose
+    =======
 
-	m = end - start + 1;
-	if (m > smlsiz) {
-	    *info = smlsiz;
+    ZPOTRF computes the Cholesky factorization of a complex Hermitian
+    positive definite matrix A.
 
-/*           Scale. */
+    The factorization has the form
+       A = U**H * U,  if UPLO = 'U', or
+       A = L  * L**H,  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
 
-	    orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
-	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &m, &c__1, &d__[
-		    start], &m, info);
-	    i__1 = m - 1;
-	    i__2 = m - 1;
-	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &i__1, &c__1, &e[
-		    start], &i__2, info);
-
-	    zlaed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1],
-		    ldz, &work[1], n, &rwork[1], &iwork[1], info);
-	    if (*info > 0) {
-		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
-			+ 1) + start - 1;
-		return 0;
-	    }
-
-/*           Scale back. */
-
-	    dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, &m, &c__1, &d__[
-		    start], &m, info);
-
-	} else {
-	    dsteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m *
-		     m + 1], info);
-	    zlacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, &
-		    work[1], n, &rwork[m * m + 1]);
-	    zlacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz);
-	    if (*info > 0) {
-		*info = start * (*n + 1) + end;
-		return 0;
-	    }
-	}
-
-	start = end + 1;
-	goto L30;
-    }
-
-/*
-       endwhile
-
-       If the problem split any number of times, then the eigenvalues
-       will not be properly ordered.  Here we permute the eigenvalues
-       (and the associated eigenvectors) into ascending order.
-*/
-
-    if (m != *n) {
-
-/*        Use Selection Sort to minimize swaps of eigenvectors */
-
-	i__1 = *n;
-	for (ii = 2; ii <= i__1; ++ii) {
-	    i__ = ii - 1;
-	    k = i__;
-	    p = d__[i__];
-	    i__2 = *n;
-	    for (j = ii; j <= i__2; ++j) {
-		if (d__[j] < p) {
-		    k = j;
-		    p = d__[j];
-		}
-/* L50: */
-	    }
-	    if (k != i__) {
-		d__[k] = d__[i__];
-		d__[i__] = p;
-		zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
-			 &c__1);
-	    }
-/* L60: */
-	}
-    }
-
-    work[1].r = (doublereal) lwmin, work[1].i = 0.;
-    rwork[1] = (doublereal) lrwmin;
-    iwork[1] = liwmin;
-
-    return 0;
-
-/*     End of ZSTEDC */
-
-} /* zstedc_ */
-
-/* Subroutine */ int zsteqr_(char *compz, integer *n, doublereal *d__,
-	doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work,
-	integer *info)
-{
-    /* System generated locals */
-    integer z_dim1, z_offset, i__1, i__2;
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static doublereal b, c__, f, g;
-    static integer i__, j, k, l, m;
-    static doublereal p, r__, s;
-    static integer l1, ii, mm, lm1, mm1, nm1;
-    static doublereal rt1, rt2, eps;
-    static integer lsv;
-    static doublereal tst, eps2;
-    static integer lend, jtot;
-    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
-	    *, doublereal *, doublereal *);
-    extern logical lsame_(char *, char *);
-    static doublereal anorm;
-    extern /* Subroutine */ int zlasr_(char *, char *, char *, integer *,
-	    integer *, doublereal *, doublereal *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *,
-	    integer *, doublecomplex *, integer *), dlaev2_(doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *);
-    static integer lendm1, lendp1;
-
-    static integer iscale;
-    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, integer *, doublereal *,
-	    integer *, integer *);
-    static doublereal safmin;
-    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *);
-    static doublereal safmax;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
-    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
-	    integer *);
-    static integer lendsv;
-    static doublereal ssfmin;
-    static integer nmaxit, icompz;
-    static doublereal ssfmax;
-    extern /* Subroutine */ int zlaset_(char *, integer *, integer *,
-	    doublecomplex *, doublecomplex *, doublecomplex *, integer *);
-
-
-/*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a
-    symmetric tridiagonal matrix using the implicit QL or QR method.
-    The eigenvectors of a full or band complex Hermitian matrix can also
-    be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this
-    matrix to tridiagonal form.
+    This is the block version of the algorithm, calling Level 3 BLAS.
 
     Arguments
     =========
 
-    COMPZ   (input) CHARACTER*1
-            = 'N':  Compute eigenvalues only.
-            = 'V':  Compute eigenvalues and eigenvectors of the original
-                    Hermitian matrix.  On entry, Z must contain the
-                    unitary matrix used to reduce the original matrix
-                    to tridiagonal form.
-            = 'I':  Compute eigenvalues and eigenvectors of the
-                    tridiagonal matrix.  Z is initialized to the identity
-                    matrix.
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
 
     N       (input) INTEGER
-            The order of the matrix.  N >= 0.
-
-    D       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the diagonal elements of the tridiagonal matrix.
-            On exit, if INFO = 0, the eigenvalues in ascending order.
-
-    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
-            On entry, the (n-1) subdiagonal elements of the tridiagonal
-            matrix.
-            On exit, E has been destroyed.
+            The order of the matrix A.  N >= 0.
 
-    Z       (input/output) COMPLEX*16 array, dimension (LDZ, N)
-            On entry, if  COMPZ = 'V', then Z contains the unitary
-            matrix used in the reduction to tridiagonal form.
-            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
-            orthonormal eigenvectors of the original Hermitian matrix,
-            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
-            of the symmetric tridiagonal matrix.
-            If COMPZ = 'N', then Z is not referenced.
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
 
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.  LDZ >= 1, and if
-            eigenvectors are desired, then  LDZ >= max(1,N).
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U**H*U or A = L*L**H.
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
-            If COMPZ = 'N', then WORK is not referenced.
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
     INFO    (output) INTEGER
             = 0:  successful exit
             < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  the algorithm has failed to find all the eigenvalues in
-                  a total of 30*N iterations; if INFO = i, then i
-                  elements of E have not converged to zero; on exit, D
-                  and E contain the elements of a symmetric tridiagonal
-                  matrix which is unitarily similar to the original
-                  matrix.
+            > 0:  if INFO = i, the leading minor of order i is not
+                  positive definite, and the factorization could not be
+                  completed.
 
     =====================================================================
 
@@ -21502,35 +21139,23 @@ L40:
 */
 
     /* Parameter adjustments */
-    --d__;
-    --e;
-    z_dim1 = *ldz;
-    z_offset = 1 + z_dim1;
-    z__ -= z_offset;
-    --work;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
     /* Function Body */
     *info = 0;
-
-    if (lsame_(compz, "N")) {
-	icompz = 0;
-    } else if (lsame_(compz, "V")) {
-	icompz = 1;
-    } else if (lsame_(compz, "I")) {
-	icompz = 2;
-    } else {
-	icompz = -1;
-    }
-    if (icompz < 0) {
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
     } else if (*n < 0) {
 	*info = -2;
-    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
-	*info = -6;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("ZSTEQR", &i__1);
+	xerbla_("ZPOTRF", &i__1);
 	return 0;
     }
 
@@ -21540,951 +21165,2307 @@ L40:
 	return 0;
     }
 
-    if (*n == 1) {
-	if (icompz == 2) {
-	    i__1 = z_dim1 + 1;
-	    z__[i__1].r = 1., z__[i__1].i = 0.;
-	}
-	return 0;
-    }
+/*     Determine the block size for this environment. */
 
-/*     Determine the unit roundoff and over/underflow thresholds. */
+    nb = ilaenv_(&c__1, "ZPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    if (nb <= 1 || nb >= *n) {
 
-    eps = EPSILON;
-/* Computing 2nd power */
-    d__1 = eps;
-    eps2 = d__1 * d__1;
-    safmin = SAFEMINIMUM;
-    safmax = 1. / safmin;
-    ssfmax = sqrt(safmax) / 3.;
-    ssfmin = sqrt(safmin) / eps2;
+/*        Use unblocked code. */
 
-/*
-       Compute the eigenvalues and eigenvectors of the tridiagonal
-       matrix.
-*/
+	zpotf2_(uplo, n, &a[a_offset], lda, info);
+    } else {
 
-    if (icompz == 2) {
-	zlaset_("Full", n, n, &c_b59, &c_b60, &z__[z_offset], ldz);
-    }
+/*        Use blocked code. */
 
-    nmaxit = *n * 30;
-    jtot = 0;
+	if (upper) {
 
-/*
-       Determine where the matrix splits and choose QL or QR iteration
-       for each block, according to whether top or bottom diagonal
-       element is smaller.
-*/
+/*           Compute the Cholesky factorization A = U'*U. */
 
-    l1 = 1;
-    nm1 = *n - 1;
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
 
-L10:
-    if (l1 > *n) {
-	goto L160;
-    }
-    if (l1 > 1) {
-	e[l1 - 1] = 0.;
-    }
-    if (l1 <= nm1) {
-	i__1 = nm1;
-	for (m = l1; m <= i__1; ++m) {
-	    tst = (d__1 = e[m], abs(d__1));
-	    if (tst == 0.) {
-		goto L30;
-	    }
-	    if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m
-		    + 1], abs(d__2))) * eps) {
-		e[m] = 0.;
-		goto L30;
-	    }
-/* L20: */
-	}
-    }
-    m = *n;
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
 
-L30:
-    l = l1;
-    lsv = l;
-    lend = m;
-    lendsv = lend;
-    l1 = m + 1;
-    if (lend == l) {
-	goto L10;
-    }
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		zherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1294, &
+			a[j * a_dim1 + 1], lda, &c_b1015, &a[j + j * a_dim1],
+			lda);
+		zpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
 
-/*     Scale submatrix in rows and columns L to LEND */
+/*                 Compute the current block row. */
 
-    i__1 = lend - l + 1;
-    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
-    iscale = 0;
-    if (anorm == 0.) {
-	goto L10;
-    }
-    if (anorm > ssfmax) {
-	iscale = 1;
-	i__1 = lend - l + 1;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
-		info);
-	i__1 = lend - l;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
-		info);
-    } else if (anorm < ssfmin) {
-	iscale = 2;
-	i__1 = lend - l + 1;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
-		info);
-	i__1 = lend - l;
-	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
-		info);
-    }
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("Conjugate transpose", "No transpose", &jb, &i__3,
+			    &i__4, &z__1, &a[j * a_dim1 + 1], lda, &a[(j + jb)
+			     * a_dim1 + 1], lda, &c_b60, &a[j + (j + jb) *
+			    a_dim1], lda);
+		    i__3 = *n - j - jb + 1;
+		    ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit",
+			     &jb, &i__3, &c_b60, &a[j + j * a_dim1], lda, &a[
+			    j + (j + jb) * a_dim1], lda);
+		}
+/* L10: */
+	    }
 
-/*     Choose between QL and QR iteration */
+	} else {
 
-    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
-	lend = lsv;
-	l = lendsv;
-    }
+/*           Compute the Cholesky factorization A = L*L'. */
 
-    if (lend > l) {
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
 
 /*
-          QL Iteration
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
 
-          Look for small subdiagonal element.
+   Computing MIN
 */
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		zherk_("Lower", "No transpose", &jb, &i__3, &c_b1294, &a[j +
+			a_dim1], lda, &c_b1015, &a[j + j * a_dim1], lda);
+		zpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
 
-L40:
-	if (l != lend) {
-	    lendm1 = lend - 1;
-	    i__1 = lendm1;
-	    for (m = l; m <= i__1; ++m) {
-/* Computing 2nd power */
-		d__2 = (d__1 = e[m], abs(d__1));
-		tst = d__2 * d__2;
-		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
-			+ 1], abs(d__2)) + safmin) {
-		    goto L60;
+/*                 Compute the current block column. */
+
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("No transpose", "Conjugate transpose", &i__3, &jb,
+			    &i__4, &z__1, &a[j + jb + a_dim1], lda, &a[j +
+			    a_dim1], lda, &c_b60, &a[j + jb + j * a_dim1],
+			    lda);
+		    i__3 = *n - j - jb + 1;
+		    ztrsm_("Right", "Lower", "Conjugate transpose", "Non-unit"
+			    , &i__3, &jb, &c_b60, &a[j + j * a_dim1], lda, &a[
+			    j + jb + j * a_dim1], lda);
 		}
-/* L50: */
+/* L20: */
 	    }
 	}
+    }
+    goto L40;
 
-	m = lend;
+L30:
+    *info = *info + j - 1;
 
-L60:
-	if (m < lend) {
-	    e[m] = 0.;
-	}
-	p = d__[l];
-	if (m == l) {
-	    goto L80;
-	}
+L40:
+    return 0;
 
-/*
-          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
-          to compute its eigensystem.
-*/
+/*     End of ZPOTRF */
 
-	if (m == l + 1) {
-	    if (icompz > 0) {
-		dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
-		work[l] = c__;
-		work[*n - 1 + l] = s;
-		zlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
-			z__[l * z_dim1 + 1], ldz);
-	    } else {
-		dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
-	    }
-	    d__[l] = rt1;
-	    d__[l + 1] = rt2;
-	    e[l] = 0.;
-	    l += 2;
-	    if (l <= lend) {
-		goto L40;
-	    }
-	    goto L140;
-	}
+} /* zpotrf_ */
 
-	if (jtot == nmaxit) {
-	    goto L140;
-	}
-	++jtot;
+/* Subroutine */ int zpotri_(char *uplo, integer *n, doublecomplex *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1;
 
-/*        Form shift. */
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), zlauum_(
+	    char *, integer *, doublecomplex *, integer *, integer *),
+	     ztrtri_(char *, char *, integer *, doublecomplex *, integer *,
+	    integer *);
 
-	g = (d__[l + 1] - p) / (e[l] * 2.);
-	r__ = dlapy2_(&g, &c_b1015);
-	g = d__[m] - p + e[l] / (g + d_sign(&r__, &g));
 
-	s = 1.;
-	c__ = 1.;
-	p = 0.;
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
 
-/*        Inner loop */
 
-	mm1 = m - 1;
-	i__1 = l;
-	for (i__ = mm1; i__ >= i__1; --i__) {
-	    f = s * e[i__];
-	    b = c__ * e[i__];
-	    dlartg_(&g, &f, &c__, &s, &r__);
-	    if (i__ != m - 1) {
-		e[i__ + 1] = r__;
-	    }
-	    g = d__[i__ + 1] - p;
-	    r__ = (d__[i__] - g) * s + c__ * 2. * b;
-	    p = s * r__;
-	    d__[i__ + 1] = g + p;
-	    g = c__ * r__ - b;
+    Purpose
+    =======
 
-/*           If eigenvectors are desired, then save rotations. */
+    ZPOTRI computes the inverse of a complex Hermitian positive definite
+    matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
+    computed by ZPOTRF.
 
-	    if (icompz > 0) {
-		work[i__] = c__;
-		work[*n - 1 + i__] = -s;
-	    }
+    Arguments
+    =========
 
-/* L70: */
-	}
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
 
-/*        If eigenvectors are desired, then apply saved rotations. */
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-	if (icompz > 0) {
-	    mm = m - l + 1;
-	    zlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the triangular factor U or L from the Cholesky
+            factorization A = U**H*U or A = L*L**H, as computed by
+            ZPOTRF.
+            On exit, the upper or lower triangle of the (Hermitian)
+            inverse of A, overwriting the input factor U or L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the (i,i) element of the factor U or L is
+                  zero, and the inverse could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZPOTRI", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Invert the triangular Cholesky factor U or L. */
+
+    ztrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*     Form inv(U)*inv(U)' or inv(L)'*inv(L). */
+
+    zlauum_(uplo, n, &a[a_offset], lda, info);
+
+    return 0;
+
+/*     End of ZPOTRI */
+
+} /* zpotri_ */
+
+/* Subroutine */ int zpotrs_(char *uplo, integer *n, integer *nrhs,
+	doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    static logical upper;
+    extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
+	     doublecomplex *, integer *),
+	    xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZPOTRS solves a system of linear equations A*X = B with a Hermitian
+    positive definite matrix A using the Cholesky factorization
+    A = U**H*U or A = L*L**H computed by ZPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,N)
+            The triangular factor U or L from the Cholesky factorization
+            A = U**H*U or A = L*L**H, as computed by ZPOTRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZPOTRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *nrhs == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*
+          Solve A*X = B where A = U'*U.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, &
+		c_b60, &a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b60, &
+		a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A*X = B where A = L*L'.
+
+          Solve L*X = B, overwriting B with X.
+*/
+
+	ztrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b60, &
+		a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	ztrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, &
+		c_b60, &a[a_offset], lda, &b[b_offset], ldb);
+    }
+
+    return 0;
+
+/*     End of ZPOTRS */
+
+} /* zpotrs_ */
+
+/* Subroutine */ int zstedc_(char *compz, integer *n, doublereal *d__,
+	doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work,
+	integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork,
+	integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k, m;
+    static doublereal p;
+    static integer ii, ll, end, lgn;
+    static doublereal eps, tiny;
+    extern logical lsame_(char *, char *);
+    static integer lwmin, start;
+    extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), zlaed0_(integer *, integer *,
+	    doublereal *, doublereal *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublereal *, integer *, integer *);
+
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dstedc_(char *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, integer *, integer *, integer *), dlaset_(
+	    char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
+	     integer *), zlacrm_(integer *, integer *, doublecomplex *,
+	    integer *, doublereal *, integer *, doublecomplex *, integer *,
+	    doublereal *);
+    static integer liwmin, icompz;
+    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *);
+    static doublereal orgnrm;
+    static integer lrwmin;
+    static logical lquery;
+    static integer smlsiz;
+    extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublecomplex *, integer *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the divide and conquer method.
+    The eigenvectors of a full or band complex Hermitian matrix can also
+    be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this
+    matrix to tridiagonal form.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See DLAED3 for details.
+
+    Arguments
+    =========
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'I':  Compute eigenvectors of tridiagonal matrix also.
+            = 'V':  Compute eigenvectors of original Hermitian matrix
+                    also.  On entry, Z contains the unitary matrix used
+                    to reduce the original matrix to tridiagonal form.
+
+    N       (input) INTEGER
+            The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the subdiagonal elements of the tridiagonal matrix.
+            On exit, E has been destroyed.
+
+    Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+            On entry, if COMPZ = 'V', then Z contains the unitary
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original Hermitian matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If  COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1.
+            If eigenvectors are desired, then LDZ >= max(1,N).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1.
+            If COMPZ = 'V' and N > 1, LWORK must be at least N*N.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    RWORK   (workspace/output) DOUBLE PRECISION array,
+                                           dimension (LRWORK)
+            On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+
+    LRWORK  (input) INTEGER
+            The dimension of the array RWORK.
+            If COMPZ = 'N' or N <= 1, LRWORK must be at least 1.
+            If COMPZ = 'V' and N > 1, LRWORK must be at least
+                           1 + 3*N + 2*N*lg N + 3*N**2 ,
+                           where lg( N ) = smallest integer k such
+                           that 2**k >= N.
+            If COMPZ = 'I' and N > 1, LRWORK must be at least
+                           1 + 4*N + 2*N**2 .
+
+            If LRWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the RWORK array,
+            returns this value as the first entry of the RWORK array, and
+            no error message related to LRWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If COMPZ = 'N' or N <= 1, LIWORK must be at least 1.
+            If COMPZ = 'V' or N > 1,  LIWORK must be at least
+                                      6 + 6*N + 5*N*lg N.
+            If COMPZ = 'I' or N > 1,  LIWORK must be at least
+                                      3 + 5*N .
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if (*n <= 1 || icompz <= 0) {
+	lwmin = 1;
+	liwmin = 1;
+	lrwmin = 1;
+    } else {
+	lgn = (integer) (log((doublereal) (*n)) / log(2.));
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (icompz == 1) {
+	    lwmin = *n * *n;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3;
+	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
+	} else if (icompz == 2) {
+	    lwmin = 1;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1);
+	    liwmin = *n * 5 + 3;
+	}
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
+	*info = -6;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*lrwork < lrwmin && ! lquery) {
+	*info = -10;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+	work[1].r = (doublereal) lwmin, work[1].i = 0.;
+	rwork[1] = (doublereal) lrwmin;
+	iwork[1] = liwmin;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZSTEDC", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	if (icompz != 0) {
+	    i__1 = z_dim1 + 1;
+	    z__[i__1].r = 1., z__[i__1].i = 0.;
+	}
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "ZSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       If the following conditional clause is removed, then the routine
+       will use the Divide and Conquer routine to compute only the
+       eigenvalues, which requires (3N + 3N**2) real workspace and
+       (2 + 5N + 2N lg(N)) integer workspace.
+       Since on many architectures DSTERF is much faster than any other
+       algorithm for finding eigenvalues only, it is used here
+       as the default.
+
+       If COMPZ = 'N', use DSTERF to compute the eigenvalues.
+*/
+
+    if (icompz == 0) {
+	dsterf_(n, &d__[1], &e[1], info);
+	return 0;
+    }
+
+/*
+       If N is smaller than the minimum divide size (SMLSIZ+1), then
+       solve the problem with another solver.
+*/
+
+    if (*n <= smlsiz) {
+	if (icompz == 0) {
+	    dsterf_(n, &d__[1], &e[1], info);
+	    return 0;
+	} else if (icompz == 2) {
+	    zsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
+		    info);
+	    return 0;
+	} else {
+	    zsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
+		    info);
+	    return 0;
+	}
+    }
+
+/*     If COMPZ = 'I', we simply call DSTEDC instead. */
+
+    if (icompz == 2) {
+	dlaset_("Full", n, n, &c_b324, &c_b1015, &rwork[1], n);
+	ll = *n * *n + 1;
+	i__1 = *lrwork - ll + 1;
+	dstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, &
+		iwork[1], liwork, info);
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *n;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * z_dim1;
+		i__4 = (j - 1) * *n + i__;
+		z__[i__3].r = rwork[i__4], z__[i__3].i = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*
+       From now on, only option left to be handled is COMPZ = 'V',
+       i.e. ICOMPZ = 1.
+
+       Scale.
+*/
+
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	return 0;
+    }
+
+    eps = EPSILON;
+
+    start = 1;
+
+/*     while ( START <= N ) */
+
+L30:
+    if (start <= *n) {
+
+/*
+       Let END be the position of the next subdiagonal entry such that
+       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
+       matrix identified by the elements between START and END
+       constitutes an independent sub-problem.
+*/
+
+	end = start;
+L40:
+	if (end < *n) {
+	    tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 =
+		    d__[end + 1], abs(d__2)));
+	    if ((d__1 = e[end], abs(d__1)) > tiny) {
+		++end;
+		goto L40;
+	    }
+	}
+
+/*        (Sub) Problem determined.  Compute its size and solve it. */
+
+	m = end - start + 1;
+	if (m > smlsiz) {
+	    *info = smlsiz;
+
+/*           Scale. */
+
+	    orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
+	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &m, &c__1, &d__[
+		    start], &m, info);
+	    i__1 = m - 1;
+	    i__2 = m - 1;
+	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &i__1, &c__1, &e[
+		    start], &i__2, info);
+
+	    zlaed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1],
+		    ldz, &work[1], n, &rwork[1], &iwork[1], info);
+	    if (*info > 0) {
+		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
+			+ 1) + start - 1;
+		return 0;
+	    }
+
+/*           Scale back. */
+
+	    dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, &m, &c__1, &d__[
+		    start], &m, info);
+
+	} else {
+	    dsteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m *
+		     m + 1], info);
+	    zlacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, &
+		    work[1], n, &rwork[m * m + 1]);
+	    zlacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz);
+	    if (*info > 0) {
+		*info = start * (*n + 1) + end;
+		return 0;
+	    }
+	}
+
+	start = end + 1;
+	goto L30;
+    }
+
+/*
+       endwhile
+
+       If the problem split any number of times, then the eigenvalues
+       will not be properly ordered.  Here we permute the eigenvalues
+       (and the associated eigenvectors) into ascending order.
+*/
+
+    if (m != *n) {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	i__1 = *n;
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
+	    i__2 = *n;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
+		}
+/* L50: */
+	    }
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
+	    }
+/* L60: */
+	}
+    }
+
+    work[1].r = (doublereal) lwmin, work[1].i = 0.;
+    rwork[1] = (doublereal) lrwmin;
+    iwork[1] = liwmin;
+
+    return 0;
+
+/*     End of ZSTEDC */
+
+} /* zstedc_ */
+
+/* Subroutine */ int zsteqr_(char *compz, integer *n, doublereal *d__,
+	doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work,
+	integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static doublereal b, c__, f, g;
+    static integer i__, j, k, l, m;
+    static doublereal p, r__, s;
+    static integer l1, ii, mm, lm1, mm1, nm1;
+    static doublereal rt1, rt2, eps;
+    static integer lsv;
+    static doublereal tst, eps2;
+    static integer lend, jtot;
+    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *);
+    extern logical lsame_(char *, char *);
+    static doublereal anorm;
+    extern /* Subroutine */ int zlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *), dlaev2_(doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *);
+    static integer lendm1, lendp1;
+
+    static integer iscale;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
+    static doublereal safmax;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
+    static integer lendsv;
+    static doublereal ssfmin;
+    static integer nmaxit, icompz;
+    static doublereal ssfmax;
+    extern /* Subroutine */ int zlaset_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the implicit QL or QR method.
+    The eigenvectors of a full or band complex Hermitian matrix can also
+    be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this
+    matrix to tridiagonal form.
+
+    Arguments
+    =========
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'V':  Compute eigenvalues and eigenvectors of the original
+                    Hermitian matrix.  On entry, Z must contain the
+                    unitary matrix used to reduce the original matrix
+                    to tridiagonal form.
+            = 'I':  Compute eigenvalues and eigenvectors of the
+                    tridiagonal matrix.  Z is initialized to the identity
+                    matrix.
+
+    N       (input) INTEGER
+            The order of the matrix.  N >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the (n-1) subdiagonal elements of the tridiagonal
+            matrix.
+            On exit, E has been destroyed.
+
+    Z       (input/output) COMPLEX*16 array, dimension (LDZ, N)
+            On entry, if  COMPZ = 'V', then Z contains the unitary
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original Hermitian matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1, and if
+            eigenvectors are desired, then  LDZ >= max(1,N).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
+            If COMPZ = 'N', then WORK is not referenced.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm has failed to find all the eigenvalues in
+                  a total of 30*N iterations; if INFO = i, then i
+                  elements of E have not converged to zero; on exit, D
+                  and E contain the elements of a symmetric tridiagonal
+                  matrix which is unitarily similar to the original
+                  matrix.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZSTEQR", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*n == 1) {
+	if (icompz == 2) {
+	    i__1 = z_dim1 + 1;
+	    z__[i__1].r = 1., z__[i__1].i = 0.;
+	}
+	return 0;
+    }
+
+/*     Determine the unit roundoff and over/underflow thresholds. */
+
+    eps = EPSILON;
+/* Computing 2nd power */
+    d__1 = eps;
+    eps2 = d__1 * d__1;
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    ssfmax = sqrt(safmax) / 3.;
+    ssfmin = sqrt(safmin) / eps2;
+
+/*
+       Compute the eigenvalues and eigenvectors of the tridiagonal
+       matrix.
+*/
+
+    if (icompz == 2) {
+	zlaset_("Full", n, n, &c_b59, &c_b60, &z__[z_offset], ldz);
+    }
+
+    nmaxit = *n * 30;
+    jtot = 0;
+
+/*
+       Determine where the matrix splits and choose QL or QR iteration
+       for each block, according to whether top or bottom diagonal
+       element is smaller.
+*/
+
+    l1 = 1;
+    nm1 = *n - 1;
+
+L10:
+    if (l1 > *n) {
+	goto L160;
+    }
+    if (l1 > 1) {
+	e[l1 - 1] = 0.;
+    }
+    if (l1 <= nm1) {
+	i__1 = nm1;
+	for (m = l1; m <= i__1; ++m) {
+	    tst = (d__1 = e[m], abs(d__1));
+	    if (tst == 0.) {
+		goto L30;
+	    }
+	    if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m
+		    + 1], abs(d__2))) * eps) {
+		e[m] = 0.;
+		goto L30;
+	    }
+/* L20: */
+	}
+    }
+    m = *n;
+
+L30:
+    l = l1;
+    lsv = l;
+    lend = m;
+    lendsv = lend;
+    l1 = m + 1;
+    if (lend == l) {
+	goto L10;
+    }
+
+/*     Scale submatrix in rows and columns L to LEND */
+
+    i__1 = lend - l + 1;
+    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
+    iscale = 0;
+    if (anorm == 0.) {
+	goto L10;
+    }
+    if (anorm > ssfmax) {
+	iscale = 1;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+		info);
+    } else if (anorm < ssfmin) {
+	iscale = 2;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+		info);
+    }
+
+/*     Choose between QL and QR iteration */
+
+    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
+	lend = lsv;
+	l = lendsv;
+    }
+
+    if (lend > l) {
+
+/*
+          QL Iteration
+
+          Look for small subdiagonal element.
+*/
+
+L40:
+	if (l != lend) {
+	    lendm1 = lend - 1;
+	    i__1 = lendm1;
+	    for (m = l; m <= i__1; ++m) {
+/* Computing 2nd power */
+		d__2 = (d__1 = e[m], abs(d__1));
+		tst = d__2 * d__2;
+		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
+			+ 1], abs(d__2)) + safmin) {
+		    goto L60;
+		}
+/* L50: */
+	    }
+	}
+
+	m = lend;
+
+L60:
+	if (m < lend) {
+	    e[m] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L80;
+	}
+
+/*
+          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
+
+	if (m == l + 1) {
+	    if (icompz > 0) {
+		dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
+		work[l] = c__;
+		work[*n - 1 + l] = s;
+		zlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
+			z__[l * z_dim1 + 1], ldz);
+	    } else {
+		dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
+	    }
+	    d__[l] = rt1;
+	    d__[l + 1] = rt2;
+	    e[l] = 0.;
+	    l += 2;
+	    if (l <= lend) {
+		goto L40;
+	    }
+	    goto L140;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	g = (d__[l + 1] - p) / (e[l] * 2.);
+	r__ = dlapy2_(&g, &c_b1015);
+	g = d__[m] - p + e[l] / (g + d_sign(&r__, &g));
+
+	s = 1.;
+	c__ = 1.;
+	p = 0.;
+
+/*        Inner loop */
+
+	mm1 = m - 1;
+	i__1 = l;
+	for (i__ = mm1; i__ >= i__1; --i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    dlartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m - 1) {
+		e[i__ + 1] = r__;
+	    }
+	    g = d__[i__ + 1] - p;
+	    r__ = (d__[i__] - g) * s + c__ * 2. * b;
+	    p = s * r__;
+	    d__[i__ + 1] = g + p;
+	    g = c__ * r__ - b;
+
+/*           If eigenvectors are desired, then save rotations. */
+
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = -s;
+	    }
+
+/* L70: */
+	}
+
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = m - l + 1;
+	    zlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
+		    * z_dim1 + 1], ldz);
+	}
+
+	d__[l] -= p;
+	e[l] = g;
+	goto L40;
+
+/*        Eigenvalue found. */
+
+L80:
+	d__[l] = p;
+
+	++l;
+	if (l <= lend) {
+	    goto L40;
+	}
+	goto L140;
+
+    } else {
+
+/*
+          QR Iteration
+
+          Look for small superdiagonal element.
+*/
+
+L90:
+	if (l != lend) {
+	    lendp1 = lend + 1;
+	    i__1 = lendp1;
+	    for (m = l; m >= i__1; --m) {
+/* Computing 2nd power */
+		d__2 = (d__1 = e[m - 1], abs(d__1));
+		tst = d__2 * d__2;
+		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
+			- 1], abs(d__2)) + safmin) {
+		    goto L110;
+		}
+/* L100: */
+	    }
+	}
+
+	m = lend;
+
+L110:
+	if (m > lend) {
+	    e[m - 1] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L130;
+	}
+
+/*
+          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
+
+	if (m == l - 1) {
+	    if (icompz > 0) {
+		dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
+			;
+		work[m] = c__;
+		work[*n - 1 + m] = s;
+		zlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
+			z__[(l - 1) * z_dim1 + 1], ldz);
+	    } else {
+		dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
+	    }
+	    d__[l - 1] = rt1;
+	    d__[l] = rt2;
+	    e[l - 1] = 0.;
+	    l += -2;
+	    if (l >= lend) {
+		goto L90;
+	    }
+	    goto L140;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	g = (d__[l - 1] - p) / (e[l - 1] * 2.);
+	r__ = dlapy2_(&g, &c_b1015);
+	g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g));
+
+	s = 1.;
+	c__ = 1.;
+	p = 0.;
+
+/*        Inner loop */
+
+	lm1 = l - 1;
+	i__1 = lm1;
+	for (i__ = m; i__ <= i__1; ++i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    dlartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m) {
+		e[i__ - 1] = r__;
+	    }
+	    g = d__[i__] - p;
+	    r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b;
+	    p = s * r__;
+	    d__[i__] = g + p;
+	    g = c__ * r__ - b;
+
+/*           If eigenvectors are desired, then save rotations. */
+
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = s;
+	    }
+
+/* L120: */
+	}
+
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = l - m + 1;
+	    zlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
 		    * z_dim1 + 1], ldz);
 	}
 
 	d__[l] -= p;
-	e[l] = g;
-	goto L40;
+	e[lm1] = g;
+	goto L90;
 
 /*        Eigenvalue found. */
 
-L80:
+L130:
 	d__[l] = p;
 
-	++l;
-	if (l <= lend) {
-	    goto L40;
-	}
-	goto L140;
+	--l;
+	if (l >= lend) {
+	    goto L90;
+	}
+	goto L140;
+
+    }
+
+/*     Undo scaling if necessary */
+
+L140:
+    if (iscale == 1) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    } else if (iscale == 2) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    }
+
+/*
+       Check for no convergence to an eigenvalue after a total
+       of N*MAXIT iterations.
+*/
+
+    if (jtot == nmaxit) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (e[i__] != 0.) {
+		++(*info);
+	    }
+/* L150: */
+	}
+	return 0;
+    }
+    goto L10;
+
+/*     Order eigenvalues and eigenvectors. */
+
+L160:
+    if (icompz == 0) {
+
+/*        Use Quick Sort */
+
+	dlasrt_("I", n, &d__[1], info);
+
+    } else {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	i__1 = *n;
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
+	    i__2 = *n;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
+		}
+/* L170: */
+	    }
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
+	    }
+/* L180: */
+	}
+    }
+    return 0;
+
+/*     End of ZSTEQR */
+
+} /* zsteqr_ */
+
+/* Subroutine */ int ztrevc_(char *side, char *howmny, logical *select,
+	integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl,
+	integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer
+	*m, doublecomplex *work, doublereal *rwork, integer *info)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3, i__4, i__5;
+    doublereal d__1, d__2, d__3;
+    doublecomplex z__1, z__2;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, k, ii, ki, is;
+    static doublereal ulp;
+    static logical allv;
+    static doublereal unfl, ovfl, smin;
+    static logical over;
+    static doublereal scale;
+    extern logical lsame_(char *, char *);
+    static doublereal remax;
+    static logical leftv, bothv;
+    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *);
+    static logical somev;
+    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), dlabad_(doublereal *, doublereal *);
+
+    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
+	    integer *, doublereal *, doublecomplex *, integer *);
+    extern integer izamax_(integer *, doublecomplex *, integer *);
+    static logical rightv;
+    extern doublereal dzasum_(integer *, doublecomplex *, integer *);
+    static doublereal smlnum;
+    extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublereal *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZTREVC computes some or all of the right and/or left eigenvectors of
+    a complex upper triangular matrix T.
+
+    The right eigenvector x and the left eigenvector y of T corresponding
+    to an eigenvalue w are defined by:
+
+                 T*x = w*x,     y'*T = w*y'
+
+    where y' denotes the conjugate transpose of the vector y.
+
+    If all eigenvectors are requested, the routine may either return the
+    matrices X and/or Y of right or left eigenvectors of T, or the
+    products Q*X and/or Q*Y, where Q is an input unitary
+    matrix. If T was obtained from the Schur factorization of an
+    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
+    right or left eigenvectors of A.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  compute right eigenvectors only;
+            = 'L':  compute left eigenvectors only;
+            = 'B':  compute both right and left eigenvectors.
+
+    HOWMNY  (input) CHARACTER*1
+            = 'A':  compute all right and/or left eigenvectors;
+            = 'B':  compute all right and/or left eigenvectors,
+                    and backtransform them using the input matrices
+                    supplied in VR and/or VL;
+            = 'S':  compute selected right and/or left eigenvectors,
+                    specified by the logical array SELECT.
+
+    SELECT  (input) LOGICAL array, dimension (N)
+            If HOWMNY = 'S', SELECT specifies the eigenvectors to be
+            computed.
+            If HOWMNY = 'A' or 'B', SELECT is not referenced.
+            To select the eigenvector corresponding to the j-th
+            eigenvalue, SELECT(j) must be set to .TRUE..
+
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
+
+    T       (input/output) COMPLEX*16 array, dimension (LDT,N)
+            The upper triangular matrix T.  T is modified, but restored
+            on exit.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
+
+    VL      (input/output) COMPLEX*16 array, dimension (LDVL,MM)
+            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+            contain an N-by-N matrix Q (usually the unitary matrix Q of
+            Schur vectors returned by ZHSEQR).
+            On exit, if SIDE = 'L' or 'B', VL contains:
+            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
+                             VL is lower triangular. The i-th column
+                             VL(i) of VL is the eigenvector corresponding
+                             to T(i,i).
+            if HOWMNY = 'B', the matrix Q*Y;
+            if HOWMNY = 'S', the left eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VL, in the same order as their
+                             eigenvalues.
+            If SIDE = 'R', VL is not referenced.
+
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= max(1,N) if
+            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+
+    VR      (input/output) COMPLEX*16 array, dimension (LDVR,MM)
+            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+            contain an N-by-N matrix Q (usually the unitary matrix Q of
+            Schur vectors returned by ZHSEQR).
+            On exit, if SIDE = 'R' or 'B', VR contains:
+            if HOWMNY = 'A', the matrix X of right eigenvectors of T;
+                             VR is upper triangular. The i-th column
+                             VR(i) of VR is the eigenvector corresponding
+                             to T(i,i).
+            if HOWMNY = 'B', the matrix Q*X;
+            if HOWMNY = 'S', the right eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VR, in the same order as their
+                             eigenvalues.
+            If SIDE = 'L', VR is not referenced.
+
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= max(1,N) if
+             SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+
+    MM      (input) INTEGER
+            The number of columns in the arrays VL and/or VR. MM >= M.
+
+    M       (output) INTEGER
+            The number of columns in the arrays VL and/or VR actually
+            used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
+            is set to N.  Each selected eigenvector occupies one
+            column.
+
+    WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+
+    RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The algorithm used in this program is basically backward (forward)
+    substitution, with scaling to make the the code robust against
+    possible overflow.
+
+    Each eigenvector is normalized so that the element of largest
+    magnitude has magnitude 1; here the magnitude of a complex number
+    (x,y) is taken to be |x| + |y|.
 
-    } else {
+    =====================================================================
 
-/*
-          QR Iteration
 
-          Look for small superdiagonal element.
+       Decode and test the input parameters
 */
 
-L90:
-	if (l != lend) {
-	    lendp1 = lend + 1;
-	    i__1 = lendp1;
-	    for (m = l; m >= i__1; --m) {
-/* Computing 2nd power */
-		d__2 = (d__1 = e[m - 1], abs(d__1));
-		tst = d__2 * d__2;
-		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
-			- 1], abs(d__2)) + safmin) {
-		    goto L110;
-		}
-/* L100: */
-	    }
-	}
+    /* Parameter adjustments */
+    --select;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+    --rwork;
 
-	m = lend;
+    /* Function Body */
+    bothv = lsame_(side, "B");
+    rightv = lsame_(side, "R") || bothv;
+    leftv = lsame_(side, "L") || bothv;
 
-L110:
-	if (m > lend) {
-	    e[m - 1] = 0.;
-	}
-	p = d__[l];
-	if (m == l) {
-	    goto L130;
-	}
+    allv = lsame_(howmny, "A");
+    over = lsame_(howmny, "B");
+    somev = lsame_(howmny, "S");
 
 /*
-          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
-          to compute its eigensystem.
+       Set M to the number of columns required to store the selected
+       eigenvectors.
 */
 
-	if (m == l - 1) {
-	    if (icompz > 0) {
-		dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
-			;
-		work[m] = c__;
-		work[*n - 1 + m] = s;
-		zlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
-			z__[(l - 1) * z_dim1 + 1], ldz);
-	    } else {
-		dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
-	    }
-	    d__[l - 1] = rt1;
-	    d__[l] = rt2;
-	    e[l - 1] = 0.;
-	    l += -2;
-	    if (l >= lend) {
-		goto L90;
+    if (somev) {
+	*m = 0;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (select[j]) {
+		++(*m);
 	    }
-	    goto L140;
-	}
-
-	if (jtot == nmaxit) {
-	    goto L140;
+/* L10: */
 	}
-	++jtot;
-
-/*        Form shift. */
+    } else {
+	*m = *n;
+    }
 
-	g = (d__[l - 1] - p) / (e[l - 1] * 2.);
-	r__ = dlapy2_(&g, &c_b1015);
-	g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g));
+    *info = 0;
+    if (! rightv && ! leftv) {
+	*info = -1;
+    } else if (! allv && ! over && ! somev) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ldt < max(1,*n)) {
+	*info = -6;
+    } else if (*ldvl < 1 || leftv && *ldvl < *n) {
+	*info = -8;
+    } else if (*ldvr < 1 || rightv && *ldvr < *n) {
+	*info = -10;
+    } else if (*mm < *m) {
+	*info = -11;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZTREVC", &i__1);
+	return 0;
+    }
 
-	s = 1.;
-	c__ = 1.;
-	p = 0.;
+/*     Quick return if possible. */
 
-/*        Inner loop */
+    if (*n == 0) {
+	return 0;
+    }
 
-	lm1 = l - 1;
-	i__1 = lm1;
-	for (i__ = m; i__ <= i__1; ++i__) {
-	    f = s * e[i__];
-	    b = c__ * e[i__];
-	    dlartg_(&g, &f, &c__, &s, &r__);
-	    if (i__ != m) {
-		e[i__ - 1] = r__;
-	    }
-	    g = d__[i__] - p;
-	    r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b;
-	    p = s * r__;
-	    d__[i__] = g + p;
-	    g = c__ * r__ - b;
+/*     Set the constants to control overflow. */
 
-/*           If eigenvectors are desired, then save rotations. */
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
+    smlnum = unfl * (*n / ulp);
 
-	    if (icompz > 0) {
-		work[i__] = c__;
-		work[*n - 1 + i__] = s;
-	    }
+/*     Store the diagonal elements of T in working array WORK. */
 
-/* L120: */
-	}
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__ + *n;
+	i__3 = i__ + i__ * t_dim1;
+	work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i;
+/* L20: */
+    }
 
-/*        If eigenvectors are desired, then apply saved rotations. */
+/*
+       Compute 1-norm of each column of strictly upper triangular
+       part of T to control overflow in triangular solver.
+*/
 
-	if (icompz > 0) {
-	    mm = l - m + 1;
-	    zlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
-		    * z_dim1 + 1], ldz);
-	}
+    rwork[1] = 0.;
+    i__1 = *n;
+    for (j = 2; j <= i__1; ++j) {
+	i__2 = j - 1;
+	rwork[j] = dzasum_(&i__2, &t[j * t_dim1 + 1], &c__1);
+/* L30: */
+    }
 
-	d__[l] -= p;
-	e[lm1] = g;
-	goto L90;
+    if (rightv) {
 
-/*        Eigenvalue found. */
+/*        Compute right eigenvectors. */
 
-L130:
-	d__[l] = p;
+	is = *m;
+	for (ki = *n; ki >= 1; --ki) {
 
-	--l;
-	if (l >= lend) {
-	    goto L90;
-	}
-	goto L140;
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L80;
+		}
+	    }
+/* Computing MAX */
+	    i__1 = ki + ki * t_dim1;
+	    d__3 = ulp * ((d__1 = t[i__1].r, abs(d__1)) + (d__2 = d_imag(&t[
+		    ki + ki * t_dim1]), abs(d__2)));
+	    smin = max(d__3,smlnum);
 
-    }
+	    work[1].r = 1., work[1].i = 0.;
 
-/*     Undo scaling if necessary */
+/*           Form right-hand side. */
 
-L140:
-    if (iscale == 1) {
-	i__1 = lendsv - lsv + 1;
-	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
-		n, info);
-	i__1 = lendsv - lsv;
-	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
-		info);
-    } else if (iscale == 2) {
-	i__1 = lendsv - lsv + 1;
-	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
-		n, info);
-	i__1 = lendsv - lsv;
-	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
-		info);
-    }
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k;
+		i__3 = k + ki * t_dim1;
+		z__1.r = -t[i__3].r, z__1.i = -t[i__3].i;
+		work[i__2].r = z__1.r, work[i__2].i = z__1.i;
+/* L40: */
+	    }
 
 /*
-       Check for no convergence to an eigenvalue after a total
-       of N*MAXIT iterations.
+             Solve the triangular system:
+                (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK.
 */
 
-    if (jtot == nmaxit) {
-	i__1 = *n - 1;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    if (e[i__] != 0.) {
-		++(*info);
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k + k * t_dim1;
+		i__3 = k + k * t_dim1;
+		i__4 = ki + ki * t_dim1;
+		z__1.r = t[i__3].r - t[i__4].r, z__1.i = t[i__3].i - t[i__4]
+			.i;
+		t[i__2].r = z__1.r, t[i__2].i = z__1.i;
+		i__2 = k + k * t_dim1;
+		if ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[k + k *
+			t_dim1]), abs(d__2)) < smin) {
+		    i__3 = k + k * t_dim1;
+		    t[i__3].r = smin, t[i__3].i = 0.;
+		}
+/* L50: */
+	    }
+
+	    if (ki > 1) {
+		i__1 = ki - 1;
+		zlatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[
+			t_offset], ldt, &work[1], &scale, &rwork[1], info);
+		i__1 = ki;
+		work[i__1].r = scale, work[i__1].i = 0.;
 	    }
-/* L150: */
-	}
-	return 0;
-    }
-    goto L10;
 
-/*     Order eigenvalues and eigenvectors. */
+/*           Copy the vector x or Q*x to VR and normalize. */
 
-L160:
-    if (icompz == 0) {
+	    if (! over) {
+		zcopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1);
 
-/*        Use Quick Sort */
+		ii = izamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
+		i__1 = ii + is * vr_dim1;
+		remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag(
+			&vr[ii + is * vr_dim1]), abs(d__2)));
+		zdscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
 
-	dlasrt_("I", n, &d__[1], info);
+		i__1 = *n;
+		for (k = ki + 1; k <= i__1; ++k) {
+		    i__2 = k + is * vr_dim1;
+		    vr[i__2].r = 0., vr[i__2].i = 0.;
+/* L60: */
+		}
+	    } else {
+		if (ki > 1) {
+		    i__1 = ki - 1;
+		    z__1.r = scale, z__1.i = 0.;
+		    zgemv_("N", n, &i__1, &c_b60, &vr[vr_offset], ldvr, &work[
+			    1], &c__1, &z__1, &vr[ki * vr_dim1 + 1], &c__1);
+		}
 
-    } else {
+		ii = izamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
+		i__1 = ii + ki * vr_dim1;
+		remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag(
+			&vr[ii + ki * vr_dim1]), abs(d__2)));
+		zdscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+	    }
 
-/*        Use Selection Sort to minimize swaps of eigenvectors */
+/*           Set back the original diagonal elements of T. */
 
-	i__1 = *n;
-	for (ii = 2; ii <= i__1; ++ii) {
-	    i__ = ii - 1;
-	    k = i__;
-	    p = d__[i__];
-	    i__2 = *n;
-	    for (j = ii; j <= i__2; ++j) {
-		if (d__[j] < p) {
-		    k = j;
-		    p = d__[j];
-		}
-/* L170: */
-	    }
-	    if (k != i__) {
-		d__[k] = d__[i__];
-		d__[i__] = p;
-		zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
-			 &c__1);
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k + k * t_dim1;
+		i__3 = k + *n;
+		t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i;
+/* L70: */
 	    }
-/* L180: */
+
+	    --is;
+L80:
+	    ;
 	}
     }
-    return 0;
 
-/*     End of ZSTEQR */
+    if (leftv) {
 
-} /* zsteqr_ */
+/*        Compute left eigenvectors. */
 
-/* Subroutine */ int ztrevc_(char *side, char *howmny, logical *select,
-	integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl,
-	integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer
-	*m, doublecomplex *work, doublereal *rwork, integer *info)
-{
-    /* System generated locals */
-    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
-	    i__2, i__3, i__4, i__5;
-    doublereal d__1, d__2, d__3;
-    doublecomplex z__1, z__2;
+	is = 1;
+	i__1 = *n;
+	for (ki = 1; ki <= i__1; ++ki) {
 
-    /* Builtin functions */
-    double d_imag(doublecomplex *);
-    void d_cnjg(doublecomplex *, doublecomplex *);
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L130;
+		}
+	    }
+/* Computing MAX */
+	    i__2 = ki + ki * t_dim1;
+	    d__3 = ulp * ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[
+		    ki + ki * t_dim1]), abs(d__2)));
+	    smin = max(d__3,smlnum);
 
-    /* Local variables */
-    static integer i__, j, k, ii, ki, is;
-    static doublereal ulp;
-    static logical allv;
-    static doublereal unfl, ovfl, smin;
-    static logical over;
-    static doublereal scale;
-    extern logical lsame_(char *, char *);
-    static doublereal remax;
-    static logical leftv, bothv;
-    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
-	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
-	    integer *, doublecomplex *, doublecomplex *, integer *);
-    static logical somev;
-    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
-	    doublecomplex *, integer *), dlabad_(doublereal *, doublereal *);
+	    i__2 = *n;
+	    work[i__2].r = 1., work[i__2].i = 0.;
 
-    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
-	    integer *, doublereal *, doublecomplex *, integer *);
-    extern integer izamax_(integer *, doublecomplex *, integer *);
-    static logical rightv;
-    extern doublereal dzasum_(integer *, doublecomplex *, integer *);
-    static doublereal smlnum;
-    extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *,
-	    integer *, doublecomplex *, integer *, doublecomplex *,
-	    doublereal *, doublereal *, integer *);
+/*           Form right-hand side. */
 
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k;
+		d_cnjg(&z__2, &t[ki + k * t_dim1]);
+		z__1.r = -z__2.r, z__1.i = -z__2.i;
+		work[i__3].r = z__1.r, work[i__3].i = z__1.i;
+/* L90: */
+	    }
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
+             Solve the triangular system:
+                (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK.
+*/
 
-    Purpose
-    =======
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k + k * t_dim1;
+		i__4 = k + k * t_dim1;
+		i__5 = ki + ki * t_dim1;
+		z__1.r = t[i__4].r - t[i__5].r, z__1.i = t[i__4].i - t[i__5]
+			.i;
+		t[i__3].r = z__1.r, t[i__3].i = z__1.i;
+		i__3 = k + k * t_dim1;
+		if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t[k + k *
+			t_dim1]), abs(d__2)) < smin) {
+		    i__4 = k + k * t_dim1;
+		    t[i__4].r = smin, t[i__4].i = 0.;
+		}
+/* L100: */
+	    }
 
-    ZTREVC computes some or all of the right and/or left eigenvectors of
-    a complex upper triangular matrix T.
+	    if (ki < *n) {
+		i__2 = *n - ki;
+		zlatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", &
+			i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki +
+			1], &scale, &rwork[1], info);
+		i__2 = ki;
+		work[i__2].r = scale, work[i__2].i = 0.;
+	    }
 
-    The right eigenvector x and the left eigenvector y of T corresponding
-    to an eigenvalue w are defined by:
+/*           Copy the vector x or Q*x to VL and normalize. */
 
-                 T*x = w*x,     y'*T = w*y'
+	    if (! over) {
+		i__2 = *n - ki + 1;
+		zcopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1)
+			;
 
-    where y' denotes the conjugate transpose of the vector y.
+		i__2 = *n - ki + 1;
+		ii = izamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1;
+		i__2 = ii + is * vl_dim1;
+		remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag(
+			&vl[ii + is * vl_dim1]), abs(d__2)));
+		i__2 = *n - ki + 1;
+		zdscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
 
-    If all eigenvectors are requested, the routine may either return the
-    matrices X and/or Y of right or left eigenvectors of T, or the
-    products Q*X and/or Q*Y, where Q is an input unitary
-    matrix. If T was obtained from the Schur factorization of an
-    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
-    right or left eigenvectors of A.
+		i__2 = ki - 1;
+		for (k = 1; k <= i__2; ++k) {
+		    i__3 = k + is * vl_dim1;
+		    vl[i__3].r = 0., vl[i__3].i = 0.;
+/* L110: */
+		}
+	    } else {
+		if (ki < *n) {
+		    i__2 = *n - ki;
+		    z__1.r = scale, z__1.i = 0.;
+		    zgemv_("N", n, &i__2, &c_b60, &vl[(ki + 1) * vl_dim1 + 1],
+			     ldvl, &work[ki + 1], &c__1, &z__1, &vl[ki *
+			    vl_dim1 + 1], &c__1);
+		}
 
-    Arguments
-    =========
+		ii = izamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
+		i__2 = ii + ki * vl_dim1;
+		remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag(
+			&vl[ii + ki * vl_dim1]), abs(d__2)));
+		zdscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+	    }
 
-    SIDE    (input) CHARACTER*1
-            = 'R':  compute right eigenvectors only;
-            = 'L':  compute left eigenvectors only;
-            = 'B':  compute both right and left eigenvectors.
+/*           Set back the original diagonal elements of T. */
 
-    HOWMNY  (input) CHARACTER*1
-            = 'A':  compute all right and/or left eigenvectors;
-            = 'B':  compute all right and/or left eigenvectors,
-                    and backtransform them using the input matrices
-                    supplied in VR and/or VL;
-            = 'S':  compute selected right and/or left eigenvectors,
-                    specified by the logical array SELECT.
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k + k * t_dim1;
+		i__4 = k + *n;
+		t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i;
+/* L120: */
+	    }
 
-    SELECT  (input) LOGICAL array, dimension (N)
-            If HOWMNY = 'S', SELECT specifies the eigenvectors to be
-            computed.
-            If HOWMNY = 'A' or 'B', SELECT is not referenced.
-            To select the eigenvector corresponding to the j-th
-            eigenvalue, SELECT(j) must be set to .TRUE..
+	    ++is;
+L130:
+	    ;
+	}
+    }
 
-    N       (input) INTEGER
-            The order of the matrix T. N >= 0.
+    return 0;
 
-    T       (input/output) COMPLEX*16 array, dimension (LDT,N)
-            The upper triangular matrix T.  T is modified, but restored
-            on exit.
+/*     End of ZTREVC */
 
-    LDT     (input) INTEGER
-            The leading dimension of the array T. LDT >= max(1,N).
+} /* ztrevc_ */
 
-    VL      (input/output) COMPLEX*16 array, dimension (LDVL,MM)
-            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
-            contain an N-by-N matrix Q (usually the unitary matrix Q of
-            Schur vectors returned by ZHSEQR).
-            On exit, if SIDE = 'L' or 'B', VL contains:
-            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
-                             VL is lower triangular. The i-th column
-                             VL(i) of VL is the eigenvector corresponding
-                             to T(i,i).
-            if HOWMNY = 'B', the matrix Q*Y;
-            if HOWMNY = 'S', the left eigenvectors of T specified by
-                             SELECT, stored consecutively in the columns
-                             of VL, in the same order as their
-                             eigenvalues.
-            If SIDE = 'R', VL is not referenced.
+/* Subroutine */ int ztrti2_(char *uplo, char *diag, integer *n,
+	doublecomplex *a, integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublecomplex z__1;
 
-    LDVL    (input) INTEGER
-            The leading dimension of the array VL.  LDVL >= max(1,N) if
-            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+    /* Builtin functions */
+    void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
 
-    VR      (input/output) COMPLEX*16 array, dimension (LDVR,MM)
-            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
-            contain an N-by-N matrix Q (usually the unitary matrix Q of
-            Schur vectors returned by ZHSEQR).
-            On exit, if SIDE = 'R' or 'B', VR contains:
-            if HOWMNY = 'A', the matrix X of right eigenvectors of T;
-                             VR is upper triangular. The i-th column
-                             VR(i) of VR is the eigenvector corresponding
-                             to T(i,i).
-            if HOWMNY = 'B', the matrix Q*X;
-            if HOWMNY = 'S', the right eigenvectors of T specified by
-                             SELECT, stored consecutively in the columns
-                             of VR, in the same order as their
-                             eigenvalues.
-            If SIDE = 'L', VR is not referenced.
+    /* Local variables */
+    static integer j;
+    static doublecomplex ajj;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
+	    doublecomplex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *);
+    static logical nounit;
 
-    LDVR    (input) INTEGER
-            The leading dimension of the array VR.  LDVR >= max(1,N) if
-             SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
 
-    MM      (input) INTEGER
-            The number of columns in the arrays VL and/or VR. MM >= M.
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-    M       (output) INTEGER
-            The number of columns in the arrays VL and/or VR actually
-            used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
-            is set to N.  Each selected eigenvector occupies one
-            column.
 
-    WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+    Purpose
+    =======
 
-    RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+    ZTRTI2 computes the inverse of a complex upper or lower triangular
+    matrix.
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
+    This is the Level 2 BLAS version of the algorithm.
 
-    Further Details
-    ===============
+    Arguments
+    =========
 
-    The algorithm used in this program is basically backward (forward)
-    substitution, with scaling to make the the code robust against
-    possible overflow.
+    UPLO    (input) CHARACTER*1
+            Specifies whether the matrix A is upper or lower triangular.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
 
-    Each eigenvector is normalized so that the element of largest
-    magnitude has magnitude 1; here the magnitude of a complex number
-    (x,y) is taken to be |x| + |y|.
+    DIAG    (input) CHARACTER*1
+            Specifies whether or not the matrix A is unit triangular.
+            = 'N':  Non-unit triangular
+            = 'U':  Unit triangular
 
-    =====================================================================
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading n by n upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
 
-       Decode and test the input parameters
-*/
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
 
-    /* Parameter adjustments */
-    --select;
-    t_dim1 = *ldt;
-    t_offset = 1 + t_dim1;
-    t -= t_offset;
-    vl_dim1 = *ldvl;
-    vl_offset = 1 + vl_dim1;
-    vl -= vl_offset;
-    vr_dim1 = *ldvr;
-    vr_offset = 1 + vr_dim1;
-    vr -= vr_offset;
-    --work;
-    --rwork;
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-    /* Function Body */
-    bothv = lsame_(side, "B");
-    rightv = lsame_(side, "R") || bothv;
-    leftv = lsame_(side, "L") || bothv;
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
 
-    allv = lsame_(howmny, "A");
-    over = lsame_(howmny, "B");
-    somev = lsame_(howmny, "S");
+    =====================================================================
 
-/*
-       Set M to the number of columns required to store the selected
-       eigenvectors.
+
+       Test the input parameters.
 */
 
-    if (somev) {
-	*m = 0;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (select[j]) {
-		++(*m);
-	    }
-/* L10: */
-	}
-    } else {
-	*m = *n;
-    }
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
+    /* Function Body */
     *info = 0;
-    if (! rightv && ! leftv) {
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
 	*info = -1;
-    } else if (! allv && ! over && ! somev) {
+    } else if (! nounit && ! lsame_(diag, "U")) {
 	*info = -2;
     } else if (*n < 0) {
-	*info = -4;
-    } else if (*ldt < max(1,*n)) {
-	*info = -6;
-    } else if (*ldvl < 1 || leftv && *ldvl < *n) {
-	*info = -8;
-    } else if (*ldvr < 1 || rightv && *ldvr < *n) {
-	*info = -10;
-    } else if (*mm < *m) {
-	*info = -11;
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
     }
     if (*info != 0) {
 	i__1 = -(*info);
-	xerbla_("ZTREVC", &i__1);
+	xerbla_("ZTRTI2", &i__1);
 	return 0;
     }
 
-/*     Quick return if possible. */
+    if (upper) {
 
-    if (*n == 0) {
-	return 0;
-    }
+/*        Compute inverse of upper triangular matrix. */
 
-/*     Set the constants to control overflow. */
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (nounit) {
+		i__2 = j + j * a_dim1;
+		z_div(&z__1, &c_b60, &a[j + j * a_dim1]);
+		a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+		i__2 = j + j * a_dim1;
+		z__1.r = -a[i__2].r, z__1.i = -a[i__2].i;
+		ajj.r = z__1.r, ajj.i = z__1.i;
+	    } else {
+		z__1.r = -1., z__1.i = -0.;
+		ajj.r = z__1.r, ajj.i = z__1.i;
+	    }
 
-    unfl = SAFEMINIMUM;
-    ovfl = 1. / unfl;
-    dlabad_(&unfl, &ovfl);
-    ulp = PRECISION;
-    smlnum = unfl * (*n / ulp);
+/*           Compute elements 1:j-1 of j-th column. */
 
-/*     Store the diagonal elements of T in working array WORK. */
+	    i__2 = j - 1;
+	    ztrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, &
+		    a[j * a_dim1 + 1], &c__1);
+	    i__2 = j - 1;
+	    zscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1);
+/* L10: */
+	}
+    } else {
 
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__ + *n;
-	i__3 = i__ + i__ * t_dim1;
-	work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i;
+/*        Compute inverse of lower triangular matrix. */
+
+	for (j = *n; j >= 1; --j) {
+	    if (nounit) {
+		i__1 = j + j * a_dim1;
+		z_div(&z__1, &c_b60, &a[j + j * a_dim1]);
+		a[i__1].r = z__1.r, a[i__1].i = z__1.i;
+		i__1 = j + j * a_dim1;
+		z__1.r = -a[i__1].r, z__1.i = -a[i__1].i;
+		ajj.r = z__1.r, ajj.i = z__1.i;
+	    } else {
+		z__1.r = -1., z__1.i = -0.;
+		ajj.r = z__1.r, ajj.i = z__1.i;
+	    }
+	    if (j < *n) {
+
+/*              Compute elements j+1:n of j-th column. */
+
+		i__1 = *n - j;
+		ztrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j +
+			1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1);
+		i__1 = *n - j;
+		zscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1);
+	    }
 /* L20: */
+	}
     }
 
+    return 0;
+
+/*     End of ZTRTI2 */
+
+} /* ztrti2_ */
+
+/* Subroutine */ int ztrtri_(char *uplo, char *diag, integer *n,
+	doublecomplex *a, integer *lda, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5;
+    doublecomplex z__1;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer j, jb, nb, nn;
+    extern logical lsame_(char *, char *);
+    static logical upper;
+    extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *,
+	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
+	     doublecomplex *, integer *),
+	    ztrsm_(char *, char *, char *, char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *), ztrti2_(char *, char *
+	    , integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical nounit;
+
+
 /*
-       Compute 1-norm of each column of strictly upper triangular
-       part of T to control overflow in triangular solver.
-*/
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
 
-    rwork[1] = 0.;
-    i__1 = *n;
-    for (j = 2; j <= i__1; ++j) {
-	i__2 = j - 1;
-	rwork[j] = dzasum_(&i__2, &t[j * t_dim1 + 1], &c__1);
-/* L30: */
-    }
 
-    if (rightv) {
+    Purpose
+    =======
+
+    ZTRTRI computes the inverse of a complex upper or lower triangular
+    matrix A.
 
-/*        Compute right eigenvectors. */
+    This is the Level 3 BLAS version of the algorithm.
 
-	is = *m;
-	for (ki = *n; ki >= 1; --ki) {
+    Arguments
+    =========
 
-	    if (somev) {
-		if (! select[ki]) {
-		    goto L80;
-		}
-	    }
-/* Computing MAX */
-	    i__1 = ki + ki * t_dim1;
-	    d__3 = ulp * ((d__1 = t[i__1].r, abs(d__1)) + (d__2 = d_imag(&t[
-		    ki + ki * t_dim1]), abs(d__2)));
-	    smin = max(d__3,smlnum);
+    UPLO    (input) CHARACTER*1
+            = 'U':  A is upper triangular;
+            = 'L':  A is lower triangular.
 
-	    work[1].r = 1., work[1].i = 0.;
+    DIAG    (input) CHARACTER*1
+            = 'N':  A is non-unit triangular;
+            = 'U':  A is unit triangular.
 
-/*           Form right-hand side. */
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
 
-	    i__1 = ki - 1;
-	    for (k = 1; k <= i__1; ++k) {
-		i__2 = k;
-		i__3 = k + ki * t_dim1;
-		z__1.r = -t[i__3].r, z__1.i = -t[i__3].i;
-		work[i__2].r = z__1.r, work[i__2].i = z__1.i;
-/* L40: */
-	    }
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
 
-/*
-             Solve the triangular system:
-                (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK.
-*/
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
 
-	    i__1 = ki - 1;
-	    for (k = 1; k <= i__1; ++k) {
-		i__2 = k + k * t_dim1;
-		i__3 = k + k * t_dim1;
-		i__4 = ki + ki * t_dim1;
-		z__1.r = t[i__3].r - t[i__4].r, z__1.i = t[i__3].i - t[i__4]
-			.i;
-		t[i__2].r = z__1.r, t[i__2].i = z__1.i;
-		i__2 = k + k * t_dim1;
-		if ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[k + k *
-			t_dim1]), abs(d__2)) < smin) {
-		    i__3 = k + k * t_dim1;
-		    t[i__3].r = smin, t[i__3].i = 0.;
-		}
-/* L50: */
-	    }
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
+                 matrix is singular and its inverse can not be computed.
 
-	    if (ki > 1) {
-		i__1 = ki - 1;
-		zlatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[
-			t_offset], ldt, &work[1], &scale, &rwork[1], info);
-		i__1 = ki;
-		work[i__1].r = scale, work[i__1].i = 0.;
-	    }
+    =====================================================================
 
-/*           Copy the vector x or Q*x to VR and normalize. */
 
-	    if (! over) {
-		zcopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1);
+       Test the input parameters.
+*/
 
-		ii = izamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
-		i__1 = ii + is * vr_dim1;
-		remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag(
-			&vr[ii + is * vr_dim1]), abs(d__2)));
-		zdscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
 
-		i__1 = *n;
-		for (k = ki + 1; k <= i__1; ++k) {
-		    i__2 = k + is * vr_dim1;
-		    vr[i__2].r = 0., vr[i__2].i = 0.;
-/* L60: */
-		}
-	    } else {
-		if (ki > 1) {
-		    i__1 = ki - 1;
-		    z__1.r = scale, z__1.i = 0.;
-		    zgemv_("N", n, &i__1, &c_b60, &vr[vr_offset], ldvr, &work[
-			    1], &c__1, &z__1, &vr[ki * vr_dim1 + 1], &c__1);
-		}
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZTRTRI", &i__1);
+	return 0;
+    }
 
-		ii = izamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
-		i__1 = ii + ki * vr_dim1;
-		remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag(
-			&vr[ii + ki * vr_dim1]), abs(d__2)));
-		zdscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
-	    }
+/*     Quick return if possible */
 
-/*           Set back the original diagonal elements of T. */
+    if (*n == 0) {
+	return 0;
+    }
 
-	    i__1 = ki - 1;
-	    for (k = 1; k <= i__1; ++k) {
-		i__2 = k + k * t_dim1;
-		i__3 = k + *n;
-		t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i;
-/* L70: */
-	    }
+/*     Check for singularity if non-unit. */
 
-	    --is;
-L80:
-	    ;
+    if (nounit) {
+	i__1 = *n;
+	for (*info = 1; *info <= i__1; ++(*info)) {
+	    i__2 = *info + *info * a_dim1;
+	    if (a[i__2].r == 0. && a[i__2].i == 0.) {
+		return 0;
+	    }
+/* L10: */
 	}
+	*info = 0;
     }
 
-    if (leftv) {
+/*
+       Determine the block size for this environment.
 
-/*        Compute left eigenvectors. */
+   Writing concatenation
+*/
+    i__3[0] = 1, a__1[0] = uplo;
+    i__3[1] = 1, a__1[1] = diag;
+    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+    nb = ilaenv_(&c__1, "ZTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (nb <= 1 || nb >= *n) {
 
-	is = 1;
-	i__1 = *n;
-	for (ki = 1; ki <= i__1; ++ki) {
+/*        Use unblocked code */
 
-	    if (somev) {
-		if (! select[ki]) {
-		    goto L130;
-		}
-	    }
-/* Computing MAX */
-	    i__2 = ki + ki * t_dim1;
-	    d__3 = ulp * ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[
-		    ki + ki * t_dim1]), abs(d__2)));
-	    smin = max(d__3,smlnum);
+	ztrti2_(uplo, diag, n, &a[a_offset], lda, info);
+    } else {
 
-	    i__2 = *n;
-	    work[i__2].r = 1., work[i__2].i = 0.;
+/*        Use blocked code */
 
-/*           Form right-hand side. */
+	if (upper) {
 
-	    i__2 = *n;
-	    for (k = ki + 1; k <= i__2; ++k) {
-		i__3 = k;
-		d_cnjg(&z__2, &t[ki + k * t_dim1]);
-		z__1.r = -z__2.r, z__1.i = -z__2.i;
-		work[i__3].r = z__1.r, work[i__3].i = z__1.i;
-/* L90: */
-	    }
+/*           Compute inverse of upper triangular matrix */
 
-/*
-             Solve the triangular system:
-                (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK.
-*/
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+		i__4 = nb, i__5 = *n - j + 1;
+		jb = min(i__4,i__5);
 
-	    i__2 = *n;
-	    for (k = ki + 1; k <= i__2; ++k) {
-		i__3 = k + k * t_dim1;
-		i__4 = k + k * t_dim1;
-		i__5 = ki + ki * t_dim1;
-		z__1.r = t[i__4].r - t[i__5].r, z__1.i = t[i__4].i - t[i__5]
-			.i;
-		t[i__3].r = z__1.r, t[i__3].i = z__1.i;
-		i__3 = k + k * t_dim1;
-		if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t[k + k *
-			t_dim1]), abs(d__2)) < smin) {
-		    i__4 = k + k * t_dim1;
-		    t[i__4].r = smin, t[i__4].i = 0.;
-		}
-/* L100: */
-	    }
+/*              Compute rows 1:j-1 of current block column */
 
-	    if (ki < *n) {
-		i__2 = *n - ki;
-		zlatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", &
-			i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki +
-			1], &scale, &rwork[1], info);
-		i__2 = ki;
-		work[i__2].r = scale, work[i__2].i = 0.;
+		i__4 = j - 1;
+		ztrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
+			c_b60, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
+		i__4 = j - 1;
+		z__1.r = -1., z__1.i = -0.;
+		ztrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
+			z__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1],
+			lda);
+
+/*              Compute inverse of current diagonal block */
+
+		ztrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L20: */
 	    }
+	} else {
 
-/*           Copy the vector x or Q*x to VL and normalize. */
+/*           Compute inverse of lower triangular matrix */
 
-	    if (! over) {
-		i__2 = *n - ki + 1;
-		zcopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1)
-			;
+	    nn = (*n - 1) / nb * nb + 1;
+	    i__2 = -nb;
+	    for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) {
+/* Computing MIN */
+		i__1 = nb, i__4 = *n - j + 1;
+		jb = min(i__1,i__4);
+		if (j + jb <= *n) {
 
-		i__2 = *n - ki + 1;
-		ii = izamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1;
-		i__2 = ii + is * vl_dim1;
-		remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag(
-			&vl[ii + is * vl_dim1]), abs(d__2)));
-		i__2 = *n - ki + 1;
-		zdscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+/*                 Compute rows j+jb:n of current block column */
 
-		i__2 = ki - 1;
-		for (k = 1; k <= i__2; ++k) {
-		    i__3 = k + is * vl_dim1;
-		    vl[i__3].r = 0., vl[i__3].i = 0.;
-/* L110: */
-		}
-	    } else {
-		if (ki < *n) {
-		    i__2 = *n - ki;
-		    z__1.r = scale, z__1.i = 0.;
-		    zgemv_("N", n, &i__2, &c_b60, &vl[(ki + 1) * vl_dim1 + 1],
-			     ldvl, &work[ki + 1], &c__1, &z__1, &vl[ki *
-			    vl_dim1 + 1], &c__1);
+		    i__1 = *n - j - jb + 1;
+		    ztrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
+			    &c_b60, &a[j + jb + (j + jb) * a_dim1], lda, &a[j
+			    + jb + j * a_dim1], lda);
+		    i__1 = *n - j - jb + 1;
+		    z__1.r = -1., z__1.i = -0.;
+		    ztrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
+			     &z__1, &a[j + j * a_dim1], lda, &a[j + jb + j *
+			    a_dim1], lda);
 		}
 
-		ii = izamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
-		i__2 = ii + ki * vl_dim1;
-		remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag(
-			&vl[ii + ki * vl_dim1]), abs(d__2)));
-		zdscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
-	    }
-
-/*           Set back the original diagonal elements of T. */
+/*              Compute inverse of current diagonal block */
 
-	    i__2 = *n;
-	    for (k = ki + 1; k <= i__2; ++k) {
-		i__3 = k + k * t_dim1;
-		i__4 = k + *n;
-		t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i;
-/* L120: */
+		ztrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L30: */
 	    }
-
-	    ++is;
-L130:
-	    ;
 	}
     }
 
     return 0;
 
-/*     End of ZTREVC */
+/*     End of ZTRTRI */
 
-} /* ztrevc_ */
+} /* ztrtri_ */
 
 /* Subroutine */ int zung2r_(integer *m, integer *n, integer *k,
 	doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
-- 
cgit v1.2.1


From b5306344ff6977ae38817915f80150ff112afca2 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Mon, 20 Feb 2017 20:38:12 +0000
Subject: TST: Add travis build to force use of lapack_lite

---
 .travis.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index ba226fcb2..cde8db23d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -72,6 +72,12 @@ matrix:
       env:
        - PYTHONOPTIMIZE=2
        - USE_ASV=1
+    - python: 2.7
+      env:
+       - BLAS=None
+       - LAPACK=None
+       - ATLAS=None
+
 before_install:
   - ./tools/travis-before-install.sh
 
-- 
cgit v1.2.1