PROGRAM TESTSVD
DOUBLE PRECISION A(4,2),U(4,4),S(2),V(2,2)
K=1
DO I=1,4
DO J=1,2
A(I,J)=K*1.0D0
K=K+1
END DO
END DO
WRITE(*,*) 'A ='
WRITE(*,'(2F10.4)') ((A(I,J),J=1,2),I=1,4)
CALL SVD(A,U,S,V,4,2)
WRITE(*,*) 'U ='
WRITE(*,'(4F10.4)') ((U(I,J),J=1,4),I=1,4)
WRITE(*,*) 'V='
WRITE(*,'(2F10.4)') V
WRITE(*,*) 'S ='
WRITE(*,'(2F10.4)')S
END
SUBROUTINE SVD(A,U,S,V,M,N)
DOUBLE PRECISION A(M,N),U(M,M),VT(N,N),S(N),V(N,N)
DOUBLE PRECISION,ALLOCATABLE :: WORK(:)
INTEGER LDA,M,N,LWORK,LDVT,INFO
CHARACTER JOBU, JOBVT
JOBU='A'
JOBVT='A'
LDA=M
LDU=M
LDVT=N
LWORK=MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N))
ALLOCATE(work(lwork))
CALL DGESVD(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
$ WORK, LWORK, INFO )
DO I=1,2
DO J=1,2
V(J,I)=VT(I,J)
END DO
END DO
END
SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
$ LDU, C, LDC, WORK, INFO )
*
* -- LAPACK routine (version 3.2) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* January 2007
*
* .. Scalar Arguments ..
CHARACTER UPLO
INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
* ..
* .. Array Arguments ..
DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ),
$ VT( LDVT, * ), WORK( * )
* ..
*
* Purpose
* =======
*
* DBDSQR computes the singular values and, optionally, the right and/or
* left singular vectors from the singular value decomposition (SVD) of
* a real N-by-N (upper or lower) bidiagonal matrix B using the implicit
* zero-shift QR algorithm. The SVD of B has the form
*
* B = Q * S * P**T
*
* where S is the diagonal matrix of singular values, Q is an orthogonal
* matrix of left singular vectors, and P is an orthogonal matrix of
* right singular vectors. If left singular vectors are requested, this
* subroutine actually returns U*Q instead of Q, and, if right singular
* vectors are requested, this subroutine returns P**T*VT instead of
* P**T, for given real input matrices U and VT. When U and VT are the
* orthogonal matrices that reduce a general matrix A to bidiagonal
* form: A = U*B*VT, as computed by DGEBRD, then
*
* A = (U*Q) * S * (P**T*VT)
*
* is the SVD of A. Optionally, the subroutine may also compute Q**T*C
* for a given real input matrix C.
*
* See "Computing Small Singular Values of Bidiagonal Matrices With
* Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
* LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
* no. 5, pp. 873-912, Sept 1990) and
* "Accurate singular values and differential qd algorithms," by
* B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
* Department, University of California at Berkeley, July 1992
* for a detailed description of the algorithm.
*
* Arguments
* =========
*
* UPLO (input) CHARACTER*1
* = 'U': B is upper bidiagonal;
* = 'L': B is lower bidiagonal.
*
* N (input) INTEGER
* The order of the matrix B. N >= 0.
*
* NCVT (input) INTEGER
* The number of columns of the matrix VT. NCVT >= 0.
*
* NRU (input) INTEGER
* The number of rows of the matrix U. NRU >= 0.
*
* NCC (input) INTEGER
* The number of columns of the matrix C. NCC >= 0.
*
* D (input/output) DOUBLE PRECISION array, dimension (N)
* On entry, the n diagonal elements of the bidiagonal matrix B.
* On exit, if INFO=0, the singular values of B in decreasing
* order.
*
* E (input/output) DOUBLE PRECISION array, dimension (N-1)
* On entry, the N-1 offdiagonal elements of the bidiagonal
* matrix B.
* On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E
* will contain the diagonal and superdiagonal elements of a
* bidiagonal matrix orthogonally equivalent to the one given
* as input.
*
* VT (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
* On entry, an N-by-NCVT matrix VT.
* On exit, VT is overwritten by P**T * VT.
* Not referenced if NCVT = 0.
*
* LDVT (input) INTEGER
* The leading dimension of the array VT.
* LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
*
* U (input/output) DOUBLE PRECISION array, dimension (LDU, N)
* On entry, an NRU-by-N matrix U.
* On exit, U is overwritten by U * Q.
* Not referenced if NRU = 0.
*
* LDU (input) INTEGER
* The leading dimension of the array U. LDU >= max(1,NRU).
*
* C (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
* On entry, an N-by-NCC matrix C.
* On exit, C is overwritten by Q**T * C.
* Not referenced if NCC = 0.
*
* LDC (input) INTEGER
* The leading dimension of the array C.
* LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
*
* WORK (workspace) DOUBLE PRECISION array, dimension (4*N)
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: If INFO = -i, the i-th argument had an illegal value
* > 0:
* if NCVT = NRU = NCC = 0,
* = 1, a split was marked by a positive value in E
* = 2, current block of Z not diagonalized after 30*N
* iterations (in inner while loop)
* = 3, termination criterion of outer while loop not met
* (program created more than N unreduced blocks)
* else NCVT = NRU = NCC = 0,
* the algorithm did not converge; D and E contain the
* elements of a bidiagonal matrix which is orthogonally
* similar to the input matrix B; if INFO = i, i
* elements of E have not converged to zero.
*
* Internal Parameters
* ===================
*
* TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
* TOLMUL controls the convergence criterion of the QR loop.
* If it is positive, TOLMUL*EPS is the desired relative
* precision in the computed singular values.
* If it is negative, abs(TOLMUL*EPS*sigma_max) is the
* desired absolute accuracy in the computed singular
* values (corresponds to relative accuracy
* abs(TOLMUL*EPS) in the largest singular value.
* abs(TOLMUL) should be between 1 and 1/EPS, and preferably
* between 10 (for fast convergence) and .1/EPS
* (for there to be some accuracy in the results).
* Default is to lose at either one eighth or 2 of the
* available decimal digits in each computed singular value
* (whichever is smaller).
*
* MAXITR INTEGER, default = 6
* MAXITR controls the maximum number of passes of the
* algorithm through its inner loop. The algorithms stops
* (and so fails to converge) if the number of passes
* through the inner loop exceeds MAXITR*N**2.
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0D0 )
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D0 )
DOUBLE PRECISION NEGONE
PARAMETER ( NEGONE = -1.0D0 )
DOUBLE PRECISION HNDRTH
PARAMETER ( HNDRTH = 0.01D0 )
DOUBLE PRECISION TEN
PARAMETER ( TEN = 10.0D0 )
DOUBLE PRECISION HNDRD
PARAMETER ( HNDRD = 100.0D0 )
DOUBLE PRECISION MEIGTH
PARAMETER ( MEIGTH = -0.125D0 )
INTEGER MAXITR
PARAMETER ( MAXITR = 6 )
* ..
* .. Local Scalars ..
LOGICAL LOWER, ROTATE
INTEGER I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1,
$ NM12, NM13, OLDLL, OLDM
DOUBLE PRECISION ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU,
$ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL,
$ SINR, SLL, SMAX, SMIN, SMINL, SMINOA,
$ SN, THRESH, TOL, TOLMUL, UNFL
* ..
* .. External Functions ..
LOGICAL LSAME
DOUBLE PRECISION DLAMCH
EXTERNAL LSAME, DLAMCH
* ..
* .. External Subroutines ..
EXTERNAL DLARTG, DLAS2, DLASQ1, DLASR, DLASV2, DROT,
$ DSCAL, DSWAP, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, DBLE, MAX, MIN, SIGN, SQRT
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
INFO = 0
LOWER = LSAME( UPLO, 'L' )
IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( NCVT.LT.0 ) THEN
INFO = -3
ELSE IF( NRU.LT.0 ) THEN
INFO = -4
ELSE IF( NCC.LT.0 ) THEN
INFO = -5
ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
$ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
INFO = -9
ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
INFO = -11
ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
$ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
INFO = -13
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DBDSQR', -INFO )
RETURN
END IF
IF( N.EQ.0 )
$ RETURN
IF( N.EQ.1 )
$ GO TO 160
*
* ROTATE is true if any singular vectors desired, false otherwise
*
ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
*
* If no singular vectors desired, use qd algorithm
*
IF( .NOT.ROTATE ) THEN
CALL DLASQ1( N, D, E, WORK, INFO )
RETURN
END IF
*
NM1 = N - 1
NM12 = NM1 + NM1
NM13 = NM12 + NM1
IDIR = 0
*
* Get machine constants
*
EPS = DLAMCH( 'Epsilon' )
UNFL = DLAMCH( 'Safe minimum' )
*
* If matrix lower bidiagonal, rotate to be upper bidiagonal
* by applying Givens rotations on the left
*
IF( LOWER ) THEN
DO 10 I = 1, N - 1
CALL DLARTG( D( I ), E( I ), CS, SN, R )
D( I ) = R
E( I ) = SN*D( I+1 )
D( I+1 ) = CS*D( I+1 )
WORK( I ) = CS
WORK( NM1+I ) = SN
10 CONTINUE
*
* Update singular vectors if desired
*
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U,
$ LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C,
$ LDC )
END IF
*
* Compute singular values to relative accuracy TOL
* (By setting TOL to be negative, algorithm will compute
* singular values to absolute accuracy ABS(TOL)*norm(input matrix))
*
TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) )
TOL = TOLMUL*EPS
*
* Compute approximate maximum, minimum singular values
*
SMAX = ZERO
DO 20 I = 1, N
SMAX = MAX( SMAX, ABS( D( I ) ) )
20 CONTINUE
DO 30 I = 1, N - 1
SMAX = MAX( SMAX, ABS( E( I ) ) )
30 CONTINUE
SMINL = ZERO
IF( TOL.GE.ZERO ) THEN
*
* Relative accuracy desired
*
SMINOA = ABS( D( 1 ) )
IF( SMINOA.EQ.ZERO )
$ GO TO 50
MU = SMINOA
DO 40 I = 2, N
MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) )
SMINOA = MIN( SMINOA, MU )
IF( SMINOA.EQ.ZERO )
$ GO TO 50
40 CONTINUE
50 CONTINUE
SMINOA = SMINOA / SQRT( DBLE( N ) )
THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL )
ELSE
*
* Absolute accuracy desired
*
THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL )
END IF
*
* Prepare for main iteration loop for the singular values
* (MAXIT is the maximum number of passes through the inner
* loop permitted before nonconvergence signalled.)
*
MAXIT = MAXITR*N*N
ITER = 0
OLDLL = -1
OLDM = -1
*
* M points to last element of unconverged part of matrix
*
M = N
*
* Begin main iteration loop
*
60 CONTINUE
*
* Check for convergence or exceeding iteration count
*
IF( M.LE.1 )
$ GO TO 160
IF( ITER.GT.MAXIT )
$ GO TO 200
*
* Find diagonal block of matrix to work on
*
IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH )
$ D( M ) = ZERO
SMAX = ABS( D( M ) )
SMIN = SMAX
DO 70 LLL = 1, M - 1
LL = M - LLL
ABSS = ABS( D( LL ) )
ABSE = ABS( E( LL ) )
IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH )
$ D( LL ) = ZERO
IF( ABSE.LE.THRESH )
$ GO TO 80
SMIN = MIN( SMIN, ABSS )
SMAX = MAX( SMAX, ABSS, ABSE )
70 CONTINUE
LL = 0
GO TO 90
80 CONTINUE
E( LL ) = ZERO
*
* Matrix splits since E(LL) = 0
*
IF( LL.EQ.M-1 ) THEN
*
* Convergence of bottom singular value, return to top of loop
*
M = M - 1
GO TO 60
END IF
90 CONTINUE
LL = LL + 1
*
* E(LL) through E(M-1) are nonzero, E(LL-1) is zero
*
IF( LL.EQ.M-1 ) THEN
*
* 2 by 2 block, handle separately
*
CALL DLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR,
$ COSR, SINL, COSL )
D( M-1 ) = SIGMX
E( M-1 ) = ZERO
D( M ) = SIGMN
*
* Compute singular vectors, if desired
*
IF( NCVT.GT.0 )
$ CALL DROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR,
$ SINR )
IF( NRU.GT.0 )
$ CALL DROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL )
IF( NCC.GT.0 )
$ CALL DROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL,
$ SINL )
M = M - 2
GO TO 60
END IF
*
* If working on new submatrix, choose shift direction
* (from larger end diagonal element towards smaller)
*
IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN
IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN
*
* Chase bulge from top (big end) to bottom (small end)
*
IDIR = 1
ELSE
*
* Chase bulge from bottom (big end) to top (small end)
*
IDIR = 2
END IF
END IF
*
* Apply convergence tests
*
IF( IDIR.EQ.1 ) THEN
*
* Run convergence test in forward direction
* First apply standard test to bottom of matrix
*
IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR.
$ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN
E( M-1 ) = ZERO
GO TO 60
END IF
*
IF( TOL.GE.ZERO ) THEN
*
* If relative accuracy desired,
* apply convergence criterion forward
*
MU = ABS( D( LL ) )
SMINL = MU
DO 100 LLL = LL, M - 1
IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
E( LLL ) = ZERO
GO TO 60
END IF
MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
SMINL = MIN( SMINL, MU )
100 CONTINUE
END IF
*
ELSE
*
* Run convergence test in backward direction
* First apply standard test to top of matrix
*
IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR.
$ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN
E( LL ) = ZERO
GO TO 60
END IF
*
IF( TOL.GE.ZERO ) THEN
*
* If relative accuracy desired,
* apply convergence criterion backward
*
MU = ABS( D( M ) )
SMINL = MU
DO 110 LLL = M - 1, LL, -1
IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
E( LLL ) = ZERO
GO TO 60
END IF
MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
SMINL = MIN( SMINL, MU )
110 CONTINUE
END IF
END IF
OLDLL = LL
OLDM = M
*
* Compute shift. First, test if shifting would ruin relative
* accuracy, and if so set the shift to zero.
*
IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE.
$ MAX( EPS, HNDRTH*TOL ) ) THEN
*
* Use a zero shift to avoid loss of relative accuracy
*
SHIFT = ZERO
ELSE
*
* Compute the shift from 2-by-2 block at end of matrix
*
IF( IDIR.EQ.1 ) THEN
SLL = ABS( D( LL ) )
CALL DLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R )
ELSE
SLL = ABS( D( M ) )
CALL DLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R )
END IF
*
* Test if shift negligible, and if so set to zero
*
IF( SLL.GT.ZERO ) THEN
IF( ( SHIFT / SLL )**2.LT.EPS )
$ SHIFT = ZERO
END IF
END IF
*
* Increment iteration count
*
ITER = ITER + M - LL
*
* If SHIFT = 0, do simplified QR iteration
*
IF( SHIFT.EQ.ZERO ) THEN
IF( IDIR.EQ.1 ) THEN
*
* Chase bulge from top to bottom
* Save cosines and sines for later singular vector updates
*
CS = ONE
OLDCS = ONE
DO 120 I = LL, M - 1
CALL DLARTG( D( I )*CS, E( I ), CS, SN, R )
IF( I.GT.LL )
$ E( I-1 ) = OLDSN*R
CALL DLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) )
WORK( I-LL+1 ) = CS
WORK( I-LL+1+NM1 ) = SN
WORK( I-LL+1+NM12 ) = OLDCS
WORK( I-LL+1+NM13 ) = OLDSN
120 CONTINUE
H = D( M )*CS
D( M ) = H*OLDCS
E( M-1 ) = H*OLDSN
*
* Update singular vectors
*
IF( NCVT.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
$ WORK( N ), VT( LL, 1 ), LDVT )
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
$ WORK( NM13+1 ), U( 1, LL ), LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
$ WORK( NM13+1 ), C( LL, 1 ), LDC )
*
* Test convergence
*
IF( ABS( E( M-1 ) ).LE.THRESH )
$ E( M-1 ) = ZERO
*
ELSE
*
* Chase bulge from bottom to top
* Save cosines and sines for later singular vector updates
*
CS = ONE
OLDCS = ONE
DO 130 I = M, LL + 1, -1
CALL DLARTG( D( I )*CS, E( I-1 ), CS, SN, R )
IF( I.LT.M )
$ E( I ) = OLDSN*R
CALL DLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) )
WORK( I-LL ) = CS
WORK( I-LL+NM1 ) = -SN
WORK( I-LL+NM12 ) = OLDCS
WORK( I-LL+NM13 ) = -OLDSN
130 CONTINUE
H = D( LL )*CS
D( LL ) = H*OLDCS
E( LL ) = H*OLDSN
*
* Update singular vectors
*
IF( NCVT.GT.0 )
$ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
$ WORK( NM13+1 ), VT( LL, 1 ), LDVT )
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
$ WORK( N ), U( 1, LL ), LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
$ WORK( N ), C( LL, 1 ), LDC )
*
* Test convergence
*
IF( ABS( E( LL ) ).LE.THRESH )
$ E( LL ) = ZERO
END IF
ELSE
*
* Use nonzero shift
*
IF( IDIR.EQ.1 ) THEN
*
* Chase bulge from top to bottom
* Save cosines and sines for later singular vector updates
*
F = ( ABS( D( LL ) )-SHIFT )*
$ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) )
G = E( LL )
DO 140 I = LL, M - 1
CALL DLARTG( F, G, COSR, SINR, R )
IF( I.GT.LL )
$ E( I-1 ) = R
F = COSR*D( I ) + SINR*E( I )
E( I ) = COSR*E( I ) - SINR*D( I )
G = SINR*D( I+1 )
D( I+1 ) = COSR*D( I+1 )
CALL DLARTG( F, G, COSL, SINL, R )
D( I ) = R
F = COSL*E( I ) + SINL*D( I+1 )
D( I+1 ) = COSL*D( I+1 ) - SINL*E( I )
IF( I.LT.M-1 ) THEN
G = SINL*E( I+1 )
E( I+1 ) = COSL*E( I+1 )
END IF
WORK( I-LL+1 ) = COSR
WORK( I-LL+1+NM1 ) = SINR
WORK( I-LL+1+NM12 ) = COSL
WORK( I-LL+1+NM13 ) = SINL
140 CONTINUE
E( M-1 ) = F
*
* Update singular vectors
*
IF( NCVT.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
$ WORK( N ), VT( LL, 1 ), LDVT )
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
$ WORK( NM13+1 ), U( 1, LL ), LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
$ WORK( NM13+1 ), C( LL, 1 ), LDC )
*
* Test convergence
*
IF( ABS( E( M-1 ) ).LE.THRESH )
$ E( M-1 ) = ZERO
*
ELSE
*
* Chase bulge from bottom to top
* Save cosines and sines for later singular vector updates
*
F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT /
$ D( M ) )
G = E( M-1 )
DO 150 I = M, LL + 1, -1
CALL DLARTG( F, G, COSR, SINR, R )
IF( I.LT.M )
$ E( I ) = R
F = COSR*D( I ) + SINR*E( I-1 )
E( I-1 ) = COSR*E( I-1 ) - SINR*D( I )
G = SINR*D( I-1 )
D( I-1 ) = COSR*D( I-1 )
CALL DLARTG( F, G, COSL, SINL, R )
D( I ) = R
F = COSL*E( I-1 ) + SINL*D( I-1 )
D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 )
IF( I.GT.LL+1 ) THEN
G = SINL*E( I-2 )
E( I-2 ) = COSL*E( I-2 )
END IF
WORK( I-LL ) = COSR
WORK( I-LL+NM1 ) = -SINR
WORK( I-LL+NM12 ) = COSL
WORK( I-LL+NM13 ) = -SINL
150 CONTINUE
E( LL ) = F
*
* Test convergence
*
IF( ABS( E( LL ) ).LE.THRESH )
$ E( LL ) = ZERO
*
* Update singular vectors if desired
*
IF( NCVT.GT.0 )
$ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
$ WORK( NM13+1 ), VT( LL, 1 ), LDVT )
IF( NRU.GT.0 )
$ CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
$ WORK( N ), U( 1, LL ), LDU )
IF( NCC.GT.0 )
$ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
$ WORK( N ), C( LL, 1 ), LDC )
END IF
END IF
*
* QR iteration finished, go back and check convergence
*
GO TO 60
*
* All singular values converged, so make them positive
*
160 CONTINUE
DO 170 I = 1, N
IF( D( I ).LT.ZERO ) THEN
D( I ) = -D( I )
*
* Change sign of singular vectors, if desired
*
IF( NCVT.GT.0 )
$ CALL DSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT )
END IF
170 CONTINUE
*
* Sort the singular values into decreasing order (insertion sort on
* singular values, but only one transposition per singular vector)
*
DO 190 I = 1, N - 1
*
* Scan for smallest D(I)
*
ISUB = 1
SMIN = D( 1 )
DO 180 J = 2, N + 1 - I
IF( D( J ).LE.SMIN ) THEN
ISUB = J
SMIN = D( J )
END IF
180 CONTINUE
IF( ISUB.NE.N+1-I ) THEN
*
* Swap singular values and vectors
*
D( ISUB ) = D( N+1-I )
D( N+1-I ) = SMIN
IF( NCVT.GT.0 )
$ CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ),
$ LDVT )
IF( NRU.GT.0 )
$ CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 )
IF( NCC.GT.0 )
$ CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC )
END IF
190 CONTINUE
GO TO 220
*
* Maximum number of iterations exceeded, failure to converge
*
200 CONTINUE
INFO = 0
DO 210 I = 1, N - 1
IF( E( I ).NE.ZERO )
$ INFO = INFO + 1
210 CONTINUE
220 CONTINUE
RETURN
*
* End of DBDSQR
*
END
SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
*
* -- LAPACK routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
$ TAUQ( * ), WORK( * )
* ..
*
* Purpose
* =======
*
* DGEBD2 reduces a real general m by n matrix A to upper or lower
* bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
*
* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
*
* Arguments
* =========
*
* M (input) INTEGER
* The number of rows in the matrix A. M >= 0.
*
* N (input) INTEGER
* The number of columns in the matrix A. N >= 0.
*
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
* On entry, the m by n general matrix to be reduced.
* On exit,
* if m >= n, the diagonal and the first superdiagonal are
* overwritten with the upper bidiagonal matrix B; the
* elements below the diagonal, with the array TAUQ, represent
* the orthogonal matrix Q as a product of elementary
* reflectors, and the elements above the first superdiagonal,
* with the array TAUP, represent the orthogonal matrix P as
* a product of elementary reflectors;
* if m < n, the diagonal and the first subdiagonal are
* overwritten with the lower bidiagonal matrix B; the
* elements below the first subdiagonal, with the array TAUQ,
* represent the orthogonal matrix Q as a product of
* elementary reflectors, and the elements above the diagonal,
* with the array TAUP, represent the orthogonal matrix P as
* a product of elementary reflectors.
* See Further Details.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,M).
*
* D (output) DOUBLE PRECISION array, dimension (min(M,N))
* The diagonal elements of the bidiagonal matrix B:
* D(i) = A(i,i).
*
* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
* The off-diagonal elements of the bidiagonal matrix B:
* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
*
* TAUQ (output) DOUBLE PRECISION array dimension (min(M,N))
* The scalar factors of the elementary reflectors which
* represent the orthogonal matrix Q. See Further Details.
*
* TAUP (output) DOUBLE PRECISION array, dimension (min(M,N))
* The scalar factors of the elementary reflectors which
* represent the orthogonal matrix P. See Further Details.
*
* WORK (workspace) DOUBLE PRECISION array, dimension (max(M,N))
*
* INFO (output) INTEGER
* = 0: successful exit.
* < 0: if INFO = -i, the i-th argument had an illegal value.
*
* Further Details
* ===============
*
* The matrices Q and P are represented as products of elementary
* reflectors:
*
* If m >= n,
*
* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
*
* Each H(i) and G(i) has the form:
*
* H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*
* where tauq and taup are real scalars, and v and u are real vectors;
* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
* tauq is stored in TAUQ(i) and taup in TAUP(i).
*
* If m < n,
*
* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
*
* Each H(i) and G(i) has the form:
*
* H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*
* where tauq and taup are real scalars, and v and u are real vectors;
* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
* tauq is stored in TAUQ(i) and taup in TAUP(i).
*
* The contents of A on exit are illustrated by the following examples:
*
* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
*
* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
* ( v1 v2 v3 v4 v5 )
*
* where d and e denote diagonal and off-diagonal elements of B, vi
* denotes an element of the vector defining H(i), and ui an element of
* the vector defining G(i).
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I
* ..
* .. External Subroutines ..
EXTERNAL DLARF, DLARFG, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input parameters
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.LT.0 ) THEN
CALL XERBLA( 'DGEBD2', -INFO )
RETURN
END IF
*
IF( M.GE.N ) THEN
*
* Reduce to upper bidiagonal form
*
DO 10 I = 1, N
*
* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
*
CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
$ TAUQ( I ) )
D( I ) = A( I, I )
A( I, I ) = ONE
*
* Apply H(i) to A(i:m,i+1:n) from the left
*
IF( I.LT.N )
$ CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ),
$ A( I, I+1 ), LDA, WORK )
A( I, I ) = D( I )
*
IF( I.LT.N ) THEN
*
* Generate elementary reflector G(i) to annihilate
* A(i,i+2:n)
*
CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
$ LDA, TAUP( I ) )
E( I ) = A( I, I+1 )
A( I, I+1 ) = ONE
*
* Apply G(i) to A(i+1:m,i+1:n) from the right
*
CALL DLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA,
$ TAUP( I ), A( I+1, I+1 ), LDA, WORK )
A( I, I+1 ) = E( I )
ELSE
TAUP( I ) = ZERO
END IF
10 CONTINUE
ELSE
*
* Reduce to lower bidiagonal form
*
DO 20 I = 1, M
*
* Generate elementary reflector G(i) to annihilate A(i,i+1:n)
*
CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
$ TAUP( I ) )
D( I ) = A( I, I )
A( I, I ) = ONE
*
* Apply G(i) to A(i+1:m,i:n) from the right
*
IF( I.LT.M )
$ CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA,
$ TAUP( I ), A( I+1, I ), LDA, WORK )
A( I, I ) = D( I )
*
IF( I.LT.M ) THEN
*
* Generate elementary reflector H(i) to annihilate
* A(i+2:m,i)
*
CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
$ TAUQ( I ) )
E( I ) = A( I+1, I )
A( I+1, I ) = ONE
*
* Apply H(i) to A(i+1:m,i+1:n) from the left
*
CALL DLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ),
$ A( I+1, I+1 ), LDA, WORK )
A( I+1, I ) = E( I )
ELSE
TAUQ( I ) = ZERO
END IF
20 CONTINUE
END IF
RETURN
*
* End of DGEBD2
*
END
SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
$ INFO )
*
* -- LAPACK routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
$ TAUQ( * ), WORK( * )
* ..
*
* Purpose
* =======
*
* DGEBRD reduces a general real M-by-N matrix A to upper or lower
* bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
*
* If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
*
* Arguments
* =========
*
* M (input) INTEGER
* The number of rows in the matrix A. M >= 0.
*
* N (input) INTEGER
* The number of columns in the matrix A. N >= 0.
*
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
* On entry, the M-by-N general matrix to be reduced.
* On exit,
* if m >= n, the diagonal and the first superdiagonal are
* overwritten with the upper bidiagonal matrix B; the
* elements below the diagonal, with the array TAUQ, represent
* the orthogonal matrix Q as a product of elementary
* reflectors, and the elements above the first superdiagonal,
* with the array TAUP, represent the orthogonal matrix P as
* a product of elementary reflectors;
* if m < n, the diagonal and the first subdiagonal are
* overwritten with the lower bidiagonal matrix B; the
* elements below the first subdiagonal, with the array TAUQ,
* represent the orthogonal matrix Q as a product of
* elementary reflectors, and the elements above the diagonal,
* with the array TAUP, represent the orthogonal matrix P as
* a product of elementary reflectors.
* See Further Details.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,M).
*
* D (output) DOUBLE PRECISION array, dimension (min(M,N))
* The diagonal elements of the bidiagonal matrix B:
* D(i) = A(i,i).
*
* E (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
* The off-diagonal elements of the bidiagonal matrix B:
* if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
* if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
*
* TAUQ (output) DOUBLE PRECISION array dimension (min(M,N))
* The scalar factors of the elementary reflectors which
* represent the orthogonal matrix Q. See Further Details.
*
* TAUP (output) DOUBLE PRECISION array, dimension (min(M,N))
* The scalar factors of the elementary reflectors which
* represent the orthogonal matrix P. See Further Details.
*
* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
* On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*
* LWORK (input) INTEGER
* The length of the array WORK. LWORK >= max(1,M,N).
* For optimum performance LWORK >= (M+N)*NB, where NB
* is the optimal blocksize.
*
* If LWORK = -1, then a workspace query is assumed; the routine
* only calculates the optimal size of the WORK array, returns
* this value as the first entry of the WORK array, and no error
* message related to LWORK is issued by XERBLA.
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -i, the i-th argument had an illegal value.
*
* Further Details
* ===============
*
* The matrices Q and P are represented as products of elementary
* reflectors:
*
* If m >= n,
*
* Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1)
*
* Each H(i) and G(i) has the form:
*
* H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*
* where tauq and taup are real scalars, and v and u are real vectors;
* v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
* u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
* tauq is stored in TAUQ(i) and taup in TAUP(i).
*
* If m < n,
*
* Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m)
*
* Each H(i) and G(i) has the form:
*
* H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*
* where tauq and taup are real scalars, and v and u are real vectors;
* v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
* u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
* tauq is stored in TAUQ(i) and taup in TAUP(i).
*
* The contents of A on exit are illustrated by the following examples:
*
* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
*
* ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
* ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
* ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
* ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
* ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
* ( v1 v2 v3 v4 v5 )
*
* where d and e denote diagonal and off-diagonal elements of B, vi
* denotes an element of the vector defining H(i), and ui an element of
* the vector defining G(i).
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB,
$ NBMIN, NX
DOUBLE PRECISION WS
* ..
* .. External Subroutines ..
EXTERNAL DGEBD2, DGEMM, DLABRD, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC DBLE, MAX, MIN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
*
* Test the input parameters
*
INFO = 0
NB = MAX( 1, ILAENV( 1, 'DGEBRD', ' ', M, N, -1, -1 ) )
LWKOPT = ( M+N )*NB
WORK( 1 ) = DBLE( LWKOPT )
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN
INFO = -10
END IF
IF( INFO.LT.0 ) THEN
CALL XERBLA( 'DGEBRD', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
MINMN = MIN( M, N )
IF( MINMN.EQ.0 ) THEN
WORK( 1 ) = 1
RETURN
END IF
*
WS = MAX( M, N )
LDWRKX = M
LDWRKY = N
*
IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN
*
* Set the crossover point NX.
*
NX = MAX( NB, ILAENV( 3, 'DGEBRD', ' ', M, N, -1, -1 ) )
*
* Determine when to switch from blocked to unblocked code.
*
IF( NX.LT.MINMN ) THEN
WS = ( M+N )*NB
IF( LWORK.LT.WS ) THEN
*
* Not enough work space for the optimal NB, consider using
* a smaller block size.
*
NBMIN = ILAENV( 2, 'DGEBRD', ' ', M, N, -1, -1 )
IF( LWORK.GE.( M+N )*NBMIN ) THEN
NB = LWORK / ( M+N )
ELSE
NB = 1
NX = MINMN
END IF
END IF
END IF
ELSE
NX = MINMN
END IF
*
DO 30 I = 1, MINMN - NX, NB
*
* Reduce rows and columns i:i+nb-1 to bidiagonal form and return
* the matrices X and Y which are needed to update the unreduced
* part of the matrix
*
CALL DLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ),
$ TAUQ( I ), TAUP( I ), WORK, LDWRKX,
$ WORK( LDWRKX*NB+1 ), LDWRKY )
*
* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
* of the form A := A - V*Y**T - X*U**T
*
CALL DGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1,
$ NB, -ONE, A( I+NB, I ), LDA,
$ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE,
$ A( I+NB, I+NB ), LDA )
CALL DGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1,
$ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA,
$ ONE, A( I+NB, I+NB ), LDA )
*
* Copy diagonal and off-diagonal elements of B back into A
*
IF( M.GE.N ) THEN
DO 10 J = I, I + NB - 1
A( J, J ) = D( J )
A( J, J+1 ) = E( J )
10 CONTINUE
ELSE
DO 20 J = I, I + NB - 1
A( J, J ) = D( J )
A( J+1, J ) = E( J )
20 CONTINUE
END IF
30 CONTINUE
*
* Use unblocked code to reduce the remainder of the matrix
*
CALL DGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ),
$ TAUQ( I ), TAUP( I ), WORK, IINFO )
WORK( 1 ) = WS
RETURN
*
* End of DGEBRD
*
END
SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO )
*
* -- LAPACK routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* Purpose
* =======
*
* DGELQ2 computes an LQ factorization of a real m by n matrix A:
* A = L * Q.
*
* Arguments
* =========
*
* M (input) INTEGER
* The number of rows of the matrix A. M >= 0.
*
* N (input) INTEGER
* The number of columns of the matrix A. N >= 0.
*
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
* On entry, the m by n matrix A.
* On exit, the elements on and below the diagonal of the array
* contain the m by min(m,n) lower trapezoidal matrix L (L is
* lower triangular if m <= n); the elements above the diagonal,
* with the array TAU, represent the orthogonal matrix Q as a
* product of elementary reflectors (see Further Details).
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,M).
*
* TAU (output) DOUBLE PRECISION array, dimension (min(M,N))
* The scalar factors of the elementary reflectors (see Further
* Details).
*
* WORK (workspace) DOUBLE PRECISION array, dimension (M)
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -i, the i-th argument had an illegal value
*
* Further Details
* ===============
*
* The matrix Q is represented as a product of elementary reflectors
*
* Q = H(k) . . . H(2) H(1), where k = min(m,n).
*
* Each H(i) has the form
*
* H(i) = I - tau * v * v**T
*
* where tau is a real scalar, and v is a real vector with
* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
* and tau in TAU(i).
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I, K
DOUBLE PRECISION AII
* ..
* .. External Subroutines ..
EXTERNAL DLARF, DLARFG, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGELQ2', -INFO )
RETURN
END IF
*
K = MIN( M, N )
*
DO 10 I = 1, K
*
* Generate elementary reflector H(i) to annihilate A(i,i+1:n)
*
CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
$ TAU( I ) )
IF( I.LT.M ) THEN
*
* Apply H(i) to A(i+1:m,i:n) from the right
*
AII = A( I, I )
A( I, I ) = ONE
CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ),
$ A( I+1, I ), LDA, WORK )
A( I, I ) = AII
END IF
10 CONTINUE
RETURN
*
* End of DGELQ2
*
END
SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* -- LAPACK routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* Purpose
* =======
*
* DGELQF computes an LQ factorization of a real M-by-N matrix A:
* A = L * Q.
*
* Arguments
* =========
*
* M (input) INTEGER
* The number of rows of the matrix A. M >= 0.
*
* N (input) INTEGER
* The number of columns of the matrix A. N >= 0.
*
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
* On entry, the M-by-N matrix A.
* On exit, the elements on and below the diagonal of the array
* contain the m-by-min(m,n) lower trapezoidal matrix L (L is
* lower triangular if m <= n); the elements above the diagonal,
* with the array TAU, represent the orthogonal matrix Q as a
* product of elementary reflectors (see Further Details).
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,M).
*
* TAU (output) DOUBLE PRECISION array, dimension (min(M,N))
* The scalar factors of the elementary reflectors (see Further
* Details).
*
* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
* On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*
* LWORK (input) INTEGER
* The dimension of the array WORK. LWORK >= max(1,M).
* For optimum performance LWORK >= M*NB, where NB is the
* optimal blocksize.
*
* If LWORK = -1, then a workspace query is assumed; the routine
* only calculates the optimal size of the WORK array, returns
* this value as the first entry of the WORK array, and no error
* message related to LWORK is issued by XERBLA.
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -i, the i-th argument had an illegal value
*
* Further Details
* ===============
*
* The matrix Q is represented as a product of elementary reflectors
*
* Q = H(k) . . . H(2) H(1), where k = min(m,n).
*
* Each H(i) has the form
*
* H(i) = I - tau * v * v**T
*
* where tau is a real scalar, and v is a real vector with
* v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
* and tau in TAU(i).
*
* =====================================================================
*
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
$ NBMIN, NX
* ..
* .. External Subroutines ..
EXTERNAL DGELQ2, DLARFB, DLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
NB = ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
LWKOPT = M*NB
WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
INFO = -7
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGELQF', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
K = MIN( M, N )
IF( K.EQ.0 ) THEN
WORK( 1 ) = 1
RETURN
END IF
*
NBMIN = 2
NX = 0
IWS = M
IF( NB.GT.1 .AND. NB.LT.K ) THEN
*
* Determine when to cross over from blocked to unblocked code.
*
NX = MAX( 0, ILAENV( 3, 'DGELQF', ' ', M, N, -1, -1 ) )
IF( NX.LT.K ) THEN
*
* Determine if workspace is large enough for blocked code.
*
LDWORK = M
IWS = LDWORK*NB
IF( LWORK.LT.IWS ) THEN
*
* Not enough workspace to use optimal NB: reduce NB and
* determine the minimum value of NB.
*
NB = LWORK / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DGELQF', ' ', M, N, -1,
$ -1 ) )
END IF
END IF
END IF
*
IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
*
* Use blocked code initially
*
DO 10 I = 1, K - NX, NB
IB = MIN( K-I+1, NB )
*
* Compute the LQ factorization of the current block
* A(i:i+ib-1,i:n)
*
CALL DGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
$ IINFO )
IF( I+IB.LE.M ) THEN
*
* Form the triangular factor of the block reflector
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
$ LDA, TAU( I ), WORK, LDWORK )
*
* Apply H to A(i+ib:m,i:n) from the right
*
CALL DLARFB( 'Right', 'No transpose', 'Forward',
$ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ),
$ LDA, WORK, LDWORK, A( I+IB, I ), LDA,
$ WORK( IB+1 ), LDWORK )
END IF
10 CONTINUE
ELSE
I = 1
END IF
*
* Use unblocked code to factor the last or only block.
*
IF( I.LE.K )
$ CALL DGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
$ IINFO )
*
WORK( 1 ) = IWS
RETURN
*
* End of DGELQF
*
END
SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO )
*
* -- LAPACK routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* Purpose
* =======
*
* DGEQR2 computes a QR factorization of a real m by n matrix A:
* A = Q * R.
*
* Arguments
* =========
*
* M (input) INTEGER
* The number of rows of the matrix A. M >= 0.
*
* N (input) INTEGER
* The number of columns of the matrix A. N >= 0.
*
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
* On entry, the m by n matrix A.
* On exit, the elements on and above the diagonal of the array
* contain the min(m,n) by n upper trapezoidal matrix R (R is
* upper triangular if m >= n); the elements below the diagonal,
* with the array TAU, represent the orthogonal matrix Q as a
* product of elementary reflectors (see Further Details).
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,M).
*
* TAU (output) DOUBLE PRECISION array, dimension (min(M,N))
* The scalar factors of the elementary reflectors (see Further
* Details).
*
* WORK (workspace) DOUBLE PRECISION array, dimension (N)
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -i, the i-th argument had an illegal value
*
* Further Details
* ===============
*
* The matrix Q is represented as a product of elementary reflectors
*
* Q = H(1) H(2) . . . H(k), where k = min(m,n).
*
* Each H(i) has the form
*
* H(i) = I - tau * v * v**T
*
* where tau is a real scalar, and v is a real vector with
* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
* and tau in TAU(i).
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I, K
DOUBLE PRECISION AII
* ..
* .. External Subroutines ..
EXTERNAL DLARF, DLARFG, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGEQR2', -INFO )
RETURN
END IF
*
K = MIN( M, N )
*
DO 10 I = 1, K
*
* Generate elementary reflector H(i) to annihilate A(i+1:m,i)
*
CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
$ TAU( I ) )
IF( I.LT.N ) THEN
*
* Apply H(i) to A(i:m,i+1:n) from the left
*
AII = A( I, I )
A( I, I ) = ONE
CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ),
$ A( I, I+1 ), LDA, WORK )
A( I, I ) = AII
END IF
10 CONTINUE
RETURN
*
* End of DGEQR2
*
END
SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* -- LAPACK routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * )
* ..
*
* Purpose
* =======
*
* DGEQRF computes a QR factorization of a real M-by-N matrix A:
* A = Q * R.
*
* Arguments
* =========
*
* M (input) INTEGER
* The number of rows of the matrix A. M >= 0.
*
* N (input) INTEGER
* The number of columns of the matrix A. N >= 0.
*
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
* On entry, the M-by-N matrix A.
* On exit, the elements on and above the diagonal of the array
* contain the min(M,N)-by-N upper trapezoidal matrix R (R is
* upper triangular if m >= n); the elements below the diagonal,
* with the array TAU, represent the orthogonal matrix Q as a
* product of min(m,n) elementary reflectors (see Further
* Details).
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,M).
*
* TAU (output) DOUBLE PRECISION array, dimension (min(M,N))
* The scalar factors of the elementary reflectors (see Further
* Details).
*
* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
* On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
*
* LWORK (input) INTEGER
* The dimension of the array WORK. LWORK >= max(1,N).
* For optimum performance LWORK >= N*NB, where NB is
* the optimal blocksize.
*
* If LWORK = -1, then a workspace query is assumed; the routine
* only calculates the optimal size of the WORK array, returns
* this value as the first entry of the WORK array, and no error
* message related to LWORK is issued by XERBLA.
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -i, the i-th argument had an illegal value
*
* Further Details
* ===============
*
* The matrix Q is represented as a product of elementary reflectors
*
* Q = H(1) H(2) . . . H(k), where k = min(m,n).
*
* Each H(i) has the form
*
* H(i) = I - tau * v * v**T
*
* where tau is a real scalar, and v is a real vector with
* v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
* and tau in TAU(i).
*
* =====================================================================
*
* .. Local Scalars ..
LOGICAL LQUERY
INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
$ NBMIN, NX
* ..
* .. External Subroutines ..
EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* ..
* .. External Functions ..
INTEGER ILAENV
EXTERNAL ILAENV
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
LWKOPT = N*NB
WORK( 1 ) = LWKOPT
LQUERY = ( LWORK.EQ.-1 )
IF( M.LT.0 ) THEN
INFO = -1
ELSE IF( N.LT.0 ) THEN
INFO = -2
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -4
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
INFO = -7
END IF
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGEQRF', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
K = MIN( M, N )
IF( K.EQ.0 ) THEN
WORK( 1 ) = 1
RETURN
END IF
*
NBMIN = 2
NX = 0
IWS = N
IF( NB.GT.1 .AND. NB.LT.K ) THEN
*
* Determine when to cross over from blocked to unblocked code.
*
NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) )
IF( NX.LT.K ) THEN
*
* Determine if workspace is large enough for blocked code.
*
LDWORK = N
IWS = LDWORK*NB
IF( LWORK.LT.IWS ) THEN
*
* Not enough workspace to use optimal NB: reduce NB and
* determine the minimum value of NB.
*
NB = LWORK / LDWORK
NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1,
$ -1 ) )
END IF
END IF
END IF
*
IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
*
* Use blocked code initially
*
DO 10 I = 1, K - NX, NB
IB = MIN( K-I+1, NB )
*
* Compute the QR factorization of the current block
* A(i:m,i:i+ib-1)
*
CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
$ IINFO )
IF( I+IB.LE.N ) THEN
*
* Form the triangular factor of the block reflector
* H = H(i) H(i+1) . . . H(i+ib-1)
*
CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB,
$ A( I, I ), LDA, TAU( I ), WORK, LDWORK )
*
* Apply H**T to A(i:m,i+ib:n) from the left
*
CALL DLARFB( 'Left', 'Transpose', 'Forward',
$ 'Columnwise', M-I+1, N-I-IB+1, IB,
$ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
$ LDA, WORK( IB+1 ), LDWORK )
END IF
10 CONTINUE
ELSE
I = 1
END IF
*
* Use unblocked code to factor the last or only block.
*
IF( I.LE.K )
$ CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
$ IINFO )
*
WORK( 1 ) = IWS
RETURN
*
* End of DGEQRF
*
END
SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
$ WORK, LWORK, INFO )
*
* -- LAPACK driver routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
CHARACTER JOBU, JOBVT
INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
$ VT( LDVT, * ), WORK( * )
* ..
*
* Purpose
* =======
*
* DGESVD computes the singular value decomposition (SVD) of a real
* M-by-N matrix A, optionally computing the left and/or right singular
* vectors. The SVD is written
*
* A = U * SIGMA * transpose(V)
*
* where SIGMA is an M-by-N matrix which is zero except for its
* min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
* V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
* are the singular values of A; they are real and non-negative, and
* are returned in descending order. The first min(m,n) columns of
* U and V are the left and right singular vectors of A.
*
* Note that the routine returns V**T, not V.
*
* Arguments
* =========
*
* JOBU (input) CHARACTER*1
* Specifies options for computing all or part of the matrix U:
* = 'A': all M columns of U are returned in array U:
* = 'S': the first min(m,n) columns of U (the left singular
* vectors) are returned in the array U;
* = 'O': the first min(m,n) columns of U (the left singular
* vectors) are overwritten on the array A;
* = 'N': no columns of U (no left singular vectors) are
* computed.
*
* JOBVT (input) CHARACTER*1
* Specifies options for computing all or part of the matrix
* V**T:
* = 'A': all N rows of V**T are returned in the array VT;
* = 'S': the first min(m,n) rows of V**T (the right singular
* vectors) are returned in the array VT;
* = 'O': the first min(m,n) rows of V**T (the right singular
* vectors) are overwritten on the array A;
* = 'N': no rows of V**T (no right singular vectors) are
* computed.
*
* JOBVT and JOBU cannot both be 'O'.
*
* M (input) INTEGER
* The number of rows of the input matrix A. M >= 0.
*
* N (input) INTEGER
* The number of columns of the input matrix A. N >= 0.
*
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
* On entry, the M-by-N matrix A.
* On exit,
* if JOBU = 'O', A is overwritten with the first min(m,n)
* columns of U (the left singular vectors,
* stored columnwise);
* if JOBVT = 'O', A is overwritten with the first min(m,n)
* rows of V**T (the right singular vectors,
* stored rowwise);
* if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
* are destroyed.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,M).
*
* S (output) DOUBLE PRECISION array, dimension (min(M,N))
* The singular values of A, sorted so that S(i) >= S(i+1).
*
* U (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
* (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
* If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
* if JOBU = 'S', U contains the first min(m,n) columns of U
* (the left singular vectors, stored columnwise);
* if JOBU = 'N' or 'O', U is not referenced.
*
* LDU (input) INTEGER
* The leading dimension of the array U. LDU >= 1; if
* JOBU = 'S' or 'A', LDU >= M.
*
* VT (output) DOUBLE PRECISION array, dimension (LDVT,N)
* If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
* V**T;
* if JOBVT = 'S', VT contains the first min(m,n) rows of
* V**T (the right singular vectors, stored rowwise);
* if JOBVT = 'N' or 'O', VT is not referenced.
*
* LDVT (input) INTEGER
* The leading dimension of the array VT. LDVT >= 1; if
* JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
*
* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
* On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
* if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
* superdiagonal elements of an upper bidiagonal matrix B
* whose diagonal is in S (not necessarily sorted). B
* satisfies A = U * B * VT, so it has the same singular values
* as A, and singular vectors related by U and VT.
*
* LWORK (input) INTEGER
* The dimension of the array WORK.
* LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
* - PATH 1 (M much larger than N, JOBU='N')
* - PATH 1t (N much larger than M, JOBVT='N')
* LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
* For good performance, LWORK should generally be larger.
*
* If LWORK = -1, then a workspace query is assumed; the routine
* only calculates the optimal size of the WORK array, returns
* this value as the first entry of the WORK array, and no error
* message related to LWORK is issued by XERBLA.
*
* INFO (output) INTEGER
* = 0: successful exit.
* < 0: if INFO = -i, the i-th argument had an illegal value.
* > 0: if DBDSQR did not converge, INFO specifies how many
* superdiagonals of an intermediate bidiagonal form B
* did not converge to zero. See the description of WORK
* above for details.
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
* ..
* .. Local Scalars ..
LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
$ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
$ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
$ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
$ NRVT, WRKBL
DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
* ..
* .. Local Arrays ..
DOUBLE PRECISION DUM( 1 )
* ..
* .. External Subroutines ..
EXTERNAL DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
$ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
$ XERBLA
* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILAENV
DOUBLE PRECISION DLAMCH, DLANGE
EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
MINMN = MIN( M, N )
WNTUA = LSAME( JOBU, 'A' )
WNTUS = LSAME( JOBU, 'S' )
WNTUAS = WNTUA .OR. WNTUS
WNTUO = LSAME( JOBU, 'O' )
WNTUN = LSAME( JOBU, 'N' )
WNTVA = LSAME( JOBVT, 'A' )
WNTVS = LSAME( JOBVT, 'S' )
WNTVAS = WNTVA .OR. WNTVS
WNTVO = LSAME( JOBVT, 'O' )
WNTVN = LSAME( JOBVT, 'N' )
LQUERY = ( LWORK.EQ.-1 )
*
IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
INFO = -1
ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
$ ( WNTVO .AND. WNTUO ) ) THEN
INFO = -2
ELSE IF( M.LT.0 ) THEN
INFO = -3
ELSE IF( N.LT.0 ) THEN
INFO = -4
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -6
ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
INFO = -9
ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
$ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
INFO = -11
END IF
*
* Compute workspace
* (Note: Comments in the code beginning "Workspace:" describe the
* minimal amount of workspace needed at that point in the code,
* as well as the preferred amount for good performance.
* NB refers to the optimal block size for the immediately
* following subroutine, as returned by ILAENV.)
*
IF( INFO.EQ.0 ) THEN
MINWRK = 1
MAXWRK = 1
IF( M.GE.N .AND. MINMN.GT.0 ) THEN
*
* Compute space needed for DBDSQR
*
MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
BDSPAC = 5*N
IF( M.GE.MNTHR ) THEN
IF( WNTUN ) THEN
*
* Path 1 (M much larger than N, JOBU='N')
*
MAXWRK = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1,
$ -1 )
MAXWRK = MAX( MAXWRK, 3*N+2*N*
$ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
IF( WNTVO .OR. WNTVAS )
$ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
$ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
MAXWRK = MAX( MAXWRK, BDSPAC )
MINWRK = MAX( 4*N, BDSPAC )
ELSE IF( WNTUO .AND. WNTVN ) THEN
*
* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
*
WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
$ N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+2*N*
$ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
$ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
MINWRK = MAX( 3*N+M, BDSPAC )
ELSE IF( WNTUO .AND. WNTVAS ) THEN
*
* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
* 'A')
*
WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
$ N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+2*N*
$ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
$ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+( N-1 )*
$ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
MINWRK = MAX( 3*N+M, BDSPAC )
ELSE IF( WNTUS .AND. WNTVN ) THEN
*
* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
*
WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
$ N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+2*N*
$ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
$ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = N*N + WRKBL
MINWRK = MAX( 3*N+M, BDSPAC )
ELSE IF( WNTUS .AND. WNTVO ) THEN
*
* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
*
WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
$ N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+2*N*
$ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
$ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+( N-1 )*
$ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = 2*N*N + WRKBL
MINWRK = MAX( 3*N+M, BDSPAC )
ELSE IF( WNTUS .AND. WNTVAS ) THEN
*
* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
* 'A')
*
WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
$ N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+2*N*
$ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
$ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+( N-1 )*
$ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = N*N + WRKBL
MINWRK = MAX( 3*N+M, BDSPAC )
ELSE IF( WNTUA .AND. WNTVN ) THEN
*
* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
*
WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
$ M, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+2*N*
$ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
$ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = N*N + WRKBL
MINWRK = MAX( 3*N+M, BDSPAC )
ELSE IF( WNTUA .AND. WNTVO ) THEN
*
* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
*
WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
$ M, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+2*N*
$ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
$ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+( N-1 )*
$ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = 2*N*N + WRKBL
MINWRK = MAX( 3*N+M, BDSPAC )
ELSE IF( WNTUA .AND. WNTVAS ) THEN
*
* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
* 'A')
*
WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
$ M, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+2*N*
$ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*N+N*
$ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, 3*N+( N-1 )*
$ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = N*N + WRKBL
MINWRK = MAX( 3*N+M, BDSPAC )
END IF
ELSE
*
* Path 10 (M at least N, but not much larger)
*
MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
$ -1, -1 )
IF( WNTUS .OR. WNTUO )
$ MAXWRK = MAX( MAXWRK, 3*N+N*
$ ILAENV( 1, 'DORGBR', 'Q', M, N, N, -1 ) )
IF( WNTUA )
$ MAXWRK = MAX( MAXWRK, 3*N+M*
$ ILAENV( 1, 'DORGBR', 'Q', M, M, N, -1 ) )
IF( .NOT.WNTVN )
$ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
$ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
MAXWRK = MAX( MAXWRK, BDSPAC )
MINWRK = MAX( 3*N+M, BDSPAC )
END IF
ELSE IF( MINMN.GT.0 ) THEN
*
* Compute space needed for DBDSQR
*
MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
BDSPAC = 5*M
IF( N.GE.MNTHR ) THEN
IF( WNTVN ) THEN
*
* Path 1t(N much larger than M, JOBVT='N')
*
MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1,
$ -1 )
MAXWRK = MAX( MAXWRK, 3*M+2*M*
$ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
IF( WNTUO .OR. WNTUAS )
$ MAXWRK = MAX( MAXWRK, 3*M+M*
$ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
MAXWRK = MAX( MAXWRK, BDSPAC )
MINWRK = MAX( 4*M, BDSPAC )
ELSE IF( WNTVO .AND. WNTUN ) THEN
*
* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
*
WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
$ N, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+2*M*
$ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*M+( M-1 )*
$ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
MINWRK = MAX( 3*M+N, BDSPAC )
ELSE IF( WNTVO .AND. WNTUAS ) THEN
*
* Path 3t(N much larger than M, JOBU='S' or 'A',
* JOBVT='O')
*
WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
$ N, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+2*M*
$ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*M+( M-1 )*
$ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
$ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
MINWRK = MAX( 3*M+N, BDSPAC )
ELSE IF( WNTVS .AND. WNTUN ) THEN
*
* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
*
WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
$ N, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+2*M*
$ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*M+( M-1 )*
$ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = M*M + WRKBL
MINWRK = MAX( 3*M+N, BDSPAC )
ELSE IF( WNTVS .AND. WNTUO ) THEN
*
* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
*
WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
$ N, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+2*M*
$ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*M+( M-1 )*
$ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
$ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = 2*M*M + WRKBL
MINWRK = MAX( 3*M+N, BDSPAC )
ELSE IF( WNTVS .AND. WNTUAS ) THEN
*
* Path 6t(N much larger than M, JOBU='S' or 'A',
* JOBVT='S')
*
WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
$ N, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+2*M*
$ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*M+( M-1 )*
$ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
$ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = M*M + WRKBL
MINWRK = MAX( 3*M+N, BDSPAC )
ELSE IF( WNTVA .AND. WNTUN ) THEN
*
* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
*
WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
$ N, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+2*M*
$ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*M+( M-1 )*
$ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = M*M + WRKBL
MINWRK = MAX( 3*M+N, BDSPAC )
ELSE IF( WNTVA .AND. WNTUO ) THEN
*
* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
*
WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
$ N, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+2*M*
$ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*M+( M-1 )*
$ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
$ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = 2*M*M + WRKBL
MINWRK = MAX( 3*M+N, BDSPAC )
ELSE IF( WNTVA .AND. WNTUAS ) THEN
*
* Path 9t(N much larger than M, JOBU='S' or 'A',
* JOBVT='A')
*
WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
$ N, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+2*M*
$ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
WRKBL = MAX( WRKBL, 3*M+( M-1 )*
$ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, 3*M+M*
$ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
WRKBL = MAX( WRKBL, BDSPAC )
MAXWRK = M*M + WRKBL
MINWRK = MAX( 3*M+N, BDSPAC )
END IF
ELSE
*
* Path 10t(N greater than M, but not much larger)
*
MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
$ -1, -1 )
IF( WNTVS .OR. WNTVO )
$ MAXWRK = MAX( MAXWRK, 3*M+M*
$ ILAENV( 1, 'DORGBR', 'P', M, N, M, -1 ) )
IF( WNTVA )
$ MAXWRK = MAX( MAXWRK, 3*M+N*
$ ILAENV( 1, 'DORGBR', 'P', N, N, M, -1 ) )
IF( .NOT.WNTUN )
$ MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
$ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
MAXWRK = MAX( MAXWRK, BDSPAC )
MINWRK = MAX( 3*M+N, BDSPAC )
END IF
END IF
MAXWRK = MAX( MAXWRK, MINWRK )
WORK( 1 ) = MAXWRK
*
IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
INFO = -13
END IF
END IF
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DGESVD', -INFO )
RETURN
ELSE IF( LQUERY ) THEN
RETURN
END IF
*
* Quick return if possible
*
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
RETURN
END IF
*
* Get machine constants
*
EPS = DLAMCH( 'P' )
SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
BIGNUM = ONE / SMLNUM
*
* Scale A if max element outside range [SMLNUM,BIGNUM]
*
ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
ISCL = 0
IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
ISCL = 1
CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
ELSE IF( ANRM.GT.BIGNUM ) THEN
ISCL = 1
CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
END IF
*
IF( M.GE.N ) THEN
*
* A has at least as many rows as columns. If A has sufficiently
* more rows than columns, first reduce using the QR
* decomposition (if sufficient workspace available)
*
IF( M.GE.MNTHR ) THEN
*
IF( WNTUN ) THEN
*
* Path 1 (M much larger than N, JOBU='N')
* No left singular vectors to be computed
*
ITAU = 1
IWORK = ITAU + N
*
* Compute A=Q*R
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Zero out below R
*
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
IE = 1
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in A
* (Workspace: need 4*N, prefer 3*N+2*N*NB)
*
CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
$ IERR )
NCVT = 0
IF( WNTVO .OR. WNTVAS ) THEN
*
* If right singular vectors desired, generate P'.
* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
NCVT = N
END IF
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing right
* singular vectors of A in A if desired
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
$ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
*
* If right singular vectors desired in VT, copy them there
*
IF( WNTVAS )
$ CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
*
ELSE IF( WNTUO .AND. WNTVN ) THEN
*
* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
* N left singular vectors to be overwritten on A and
* no right singular vectors to be computed
*
IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IR = 1
IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
*
* WORK(IU) is LDA by N, WORK(IR) is LDA by N
*
LDWRKU = LDA
LDWRKR = LDA
ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
*
* WORK(IU) is LDA by N, WORK(IR) is N by N
*
LDWRKU = LDA
LDWRKR = N
ELSE
*
* WORK(IU) is LDWRKU by N, WORK(IR) is N by N
*
LDWRKU = ( LWORK-N*N-N ) / N
LDWRKR = N
END IF
ITAU = IR + LDWRKR*N
IWORK = ITAU + N
*
* Compute A=Q*R
* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy R to WORK(IR) and zero out below it
*
CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
$ LDWRKR )
*
* Generate Q in A
* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in WORK(IR)
* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
*
CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Generate left vectors bidiagonalizing R
* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
*
CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
$ WORK( ITAUQ ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of R in WORK(IR)
* (Workspace: need N*N+BDSPAC)
*
CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
$ WORK( IR ), LDWRKR, DUM, 1,
$ WORK( IWORK ), INFO )
IU = IE + N
*
* Multiply Q in A by left singular vectors of R in
* WORK(IR), storing result in WORK(IU) and copying to A
* (Workspace: need N*N+2*N, prefer N*N+M*N+N)
*
DO 10 I = 1, M, LDWRKU
CHUNK = MIN( M-I+1, LDWRKU )
CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
$ LDA, WORK( IR ), LDWRKR, ZERO,
$ WORK( IU ), LDWRKU )
CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
$ A( I, 1 ), LDA )
10 CONTINUE
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
IE = 1
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize A
* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
*
CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Generate left vectors bidiagonalizing A
* (Workspace: need 4*N, prefer 3*N+N*NB)
*
CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of A in A
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
$ A, LDA, DUM, 1, WORK( IWORK ), INFO )
*
END IF
*
ELSE IF( WNTUO .AND. WNTVAS ) THEN
*
* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
* N left singular vectors to be overwritten on A and
* N right singular vectors to be computed in VT
*
IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IR = 1
IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
*
* WORK(IU) is LDA by N and WORK(IR) is LDA by N
*
LDWRKU = LDA
LDWRKR = LDA
ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
*
* WORK(IU) is LDA by N and WORK(IR) is N by N
*
LDWRKU = LDA
LDWRKR = N
ELSE
*
* WORK(IU) is LDWRKU by N and WORK(IR) is N by N
*
LDWRKU = ( LWORK-N*N-N ) / N
LDWRKR = N
END IF
ITAU = IR + LDWRKR*N
IWORK = ITAU + N
*
* Compute A=Q*R
* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy R to VT, zeroing out below it
*
CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
IF( N.GT.1 )
$ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
$ VT( 2, 1 ), LDVT )
*
* Generate Q in A
* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in VT, copying result to WORK(IR)
* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
*
CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
*
* Generate left vectors bidiagonalizing R in WORK(IR)
* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
*
CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
$ WORK( ITAUQ ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate right vectors bidiagonalizing R in VT
* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of R in WORK(IR) and computing right
* singular vectors of R in VT
* (Workspace: need N*N+BDSPAC)
*
CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
$ WORK( IR ), LDWRKR, DUM, 1,
$ WORK( IWORK ), INFO )
IU = IE + N
*
* Multiply Q in A by left singular vectors of R in
* WORK(IR), storing result in WORK(IU) and copying to A
* (Workspace: need N*N+2*N, prefer N*N+M*N+N)
*
DO 20 I = 1, M, LDWRKU
CHUNK = MIN( M-I+1, LDWRKU )
CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
$ LDA, WORK( IR ), LDWRKR, ZERO,
$ WORK( IU ), LDWRKU )
CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
$ A( I, 1 ), LDA )
20 CONTINUE
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + N
*
* Compute A=Q*R
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy R to VT, zeroing out below it
*
CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
IF( N.GT.1 )
$ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
$ VT( 2, 1 ), LDVT )
*
* Generate Q in A
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in VT
* (Workspace: need 4*N, prefer 3*N+2*N*NB)
*
CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply Q in A by left vectors bidiagonalizing R
* (Workspace: need 3*N+M, prefer 3*N+M*NB)
*
CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
$ WORK( ITAUQ ), A, LDA, WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate right vectors bidiagonalizing R in VT
* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of A in A and computing right
* singular vectors of A in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
$ A, LDA, DUM, 1, WORK( IWORK ), INFO )
*
END IF
*
ELSE IF( WNTUS ) THEN
*
IF( WNTVN ) THEN
*
* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
* N left singular vectors to be computed in U and
* no right singular vectors to be computed
*
IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IR = 1
IF( LWORK.GE.WRKBL+LDA*N ) THEN
*
* WORK(IR) is LDA by N
*
LDWRKR = LDA
ELSE
*
* WORK(IR) is N by N
*
LDWRKR = N
END IF
ITAU = IR + LDWRKR*N
IWORK = ITAU + N
*
* Compute A=Q*R
* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy R to WORK(IR), zeroing out below it
*
CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
$ LDWRKR )
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
$ WORK( IR+1 ), LDWRKR )
*
* Generate Q in A
* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in WORK(IR)
* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
*
CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
$ WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate left vectors bidiagonalizing R in WORK(IR)
* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
*
CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
$ WORK( ITAUQ ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of R in WORK(IR)
* (Workspace: need N*N+BDSPAC)
*
CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
$ 1, WORK( IR ), LDWRKR, DUM, 1,
$ WORK( IWORK ), INFO )
*
* Multiply Q in A by left singular vectors of R in
* WORK(IR), storing result in U
* (Workspace: need N*N)
*
CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
$ WORK( IR ), LDWRKR, ZERO, U, LDU )
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + N
*
* Compute A=Q*R, copying result to U
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
*
* Generate Q in U
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Zero out below R in A
*
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
$ LDA )
*
* Bidiagonalize R in A
* (Workspace: need 4*N, prefer 3*N+2*N*NB)
*
CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply Q in U by left vectors bidiagonalizing R
* (Workspace: need 3*N+M, prefer 3*N+M*NB)
*
CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
$ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of A in U
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
$ 1, U, LDU, DUM, 1, WORK( IWORK ),
$ INFO )
*
END IF
*
ELSE IF( WNTVO ) THEN
*
* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
* N left singular vectors to be computed in U and
* N right singular vectors to be overwritten on A
*
IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IU = 1
IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
*
* WORK(IU) is LDA by N and WORK(IR) is LDA by N
*
LDWRKU = LDA
IR = IU + LDWRKU*N
LDWRKR = LDA
ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
*
* WORK(IU) is LDA by N and WORK(IR) is N by N
*
LDWRKU = LDA
IR = IU + LDWRKU*N
LDWRKR = N
ELSE
*
* WORK(IU) is N by N and WORK(IR) is N by N
*
LDWRKU = N
IR = IU + LDWRKU*N
LDWRKR = N
END IF
ITAU = IR + LDWRKR*N
IWORK = ITAU + N
*
* Compute A=Q*R
* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy R to WORK(IU), zeroing out below it
*
CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
$ LDWRKU )
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
$ WORK( IU+1 ), LDWRKU )
*
* Generate Q in A
* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
*
CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in WORK(IU), copying result to
* WORK(IR)
* (Workspace: need 2*N*N+4*N,
* prefer 2*N*N+3*N+2*N*NB)
*
CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
$ WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
$ WORK( IR ), LDWRKR )
*
* Generate left bidiagonalizing vectors in WORK(IU)
* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
*
CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
$ WORK( ITAUQ ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate right bidiagonalizing vectors in WORK(IR)
* (Workspace: need 2*N*N+4*N-1,
* prefer 2*N*N+3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of R in WORK(IU) and computing
* right singular vectors of R in WORK(IR)
* (Workspace: need 2*N*N+BDSPAC)
*
CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
$ WORK( IR ), LDWRKR, WORK( IU ),
$ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
*
* Multiply Q in A by left singular vectors of R in
* WORK(IU), storing result in U
* (Workspace: need N*N)
*
CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
$ WORK( IU ), LDWRKU, ZERO, U, LDU )
*
* Copy right singular vectors of R to A
* (Workspace: need N*N)
*
CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
$ LDA )
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + N
*
* Compute A=Q*R, copying result to U
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
*
* Generate Q in U
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Zero out below R in A
*
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
$ LDA )
*
* Bidiagonalize R in A
* (Workspace: need 4*N, prefer 3*N+2*N*NB)
*
CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply Q in U by left vectors bidiagonalizing R
* (Workspace: need 3*N+M, prefer 3*N+M*NB)
*
CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
$ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate right vectors bidiagonalizing R in A
* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of A in U and computing right
* singular vectors of A in A
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
$ LDA, U, LDU, DUM, 1, WORK( IWORK ),
$ INFO )
*
END IF
*
ELSE IF( WNTVAS ) THEN
*
* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
* or 'A')
* N left singular vectors to be computed in U and
* N right singular vectors to be computed in VT
*
IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IU = 1
IF( LWORK.GE.WRKBL+LDA*N ) THEN
*
* WORK(IU) is LDA by N
*
LDWRKU = LDA
ELSE
*
* WORK(IU) is N by N
*
LDWRKU = N
END IF
ITAU = IU + LDWRKU*N
IWORK = ITAU + N
*
* Compute A=Q*R
* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy R to WORK(IU), zeroing out below it
*
CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
$ LDWRKU )
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
$ WORK( IU+1 ), LDWRKU )
*
* Generate Q in A
* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in WORK(IU), copying result to VT
* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
*
CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
$ WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
$ LDVT )
*
* Generate left bidiagonalizing vectors in WORK(IU)
* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
*
CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
$ WORK( ITAUQ ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate right bidiagonalizing vectors in VT
* (Workspace: need N*N+4*N-1,
* prefer N*N+3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of R in WORK(IU) and computing
* right singular vectors of R in VT
* (Workspace: need N*N+BDSPAC)
*
CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
$ LDVT, WORK( IU ), LDWRKU, DUM, 1,
$ WORK( IWORK ), INFO )
*
* Multiply Q in A by left singular vectors of R in
* WORK(IU), storing result in U
* (Workspace: need N*N)
*
CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
$ WORK( IU ), LDWRKU, ZERO, U, LDU )
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + N
*
* Compute A=Q*R, copying result to U
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
*
* Generate Q in U
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy R to VT, zeroing out below it
*
CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
IF( N.GT.1 )
$ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
$ VT( 2, 1 ), LDVT )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in VT
* (Workspace: need 4*N, prefer 3*N+2*N*NB)
*
CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply Q in U by left bidiagonalizing vectors
* in VT
* (Workspace: need 3*N+M, prefer 3*N+M*NB)
*
CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
$ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate right bidiagonalizing vectors in VT
* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of A in U and computing right
* singular vectors of A in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
$ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
$ INFO )
*
END IF
*
END IF
*
ELSE IF( WNTUA ) THEN
*
IF( WNTVN ) THEN
*
* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
* M left singular vectors to be computed in U and
* no right singular vectors to be computed
*
IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IR = 1
IF( LWORK.GE.WRKBL+LDA*N ) THEN
*
* WORK(IR) is LDA by N
*
LDWRKR = LDA
ELSE
*
* WORK(IR) is N by N
*
LDWRKR = N
END IF
ITAU = IR + LDWRKR*N
IWORK = ITAU + N
*
* Compute A=Q*R, copying result to U
* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
*
* Copy R to WORK(IR), zeroing out below it
*
CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
$ LDWRKR )
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
$ WORK( IR+1 ), LDWRKR )
*
* Generate Q in U
* (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
*
CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in WORK(IR)
* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
*
CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
$ WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate left bidiagonalizing vectors in WORK(IR)
* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
*
CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
$ WORK( ITAUQ ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of R in WORK(IR)
* (Workspace: need N*N+BDSPAC)
*
CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
$ 1, WORK( IR ), LDWRKR, DUM, 1,
$ WORK( IWORK ), INFO )
*
* Multiply Q in U by left singular vectors of R in
* WORK(IR), storing result in A
* (Workspace: need N*N)
*
CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
$ WORK( IR ), LDWRKR, ZERO, A, LDA )
*
* Copy left singular vectors of A from A to U
*
CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + N
*
* Compute A=Q*R, copying result to U
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
*
* Generate Q in U
* (Workspace: need N+M, prefer N+M*NB)
*
CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Zero out below R in A
*
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
$ LDA )
*
* Bidiagonalize R in A
* (Workspace: need 4*N, prefer 3*N+2*N*NB)
*
CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply Q in U by left bidiagonalizing vectors
* in A
* (Workspace: need 3*N+M, prefer 3*N+M*NB)
*
CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
$ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of A in U
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
$ 1, U, LDU, DUM, 1, WORK( IWORK ),
$ INFO )
*
END IF
*
ELSE IF( WNTVO ) THEN
*
* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
* M left singular vectors to be computed in U and
* N right singular vectors to be overwritten on A
*
IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IU = 1
IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
*
* WORK(IU) is LDA by N and WORK(IR) is LDA by N
*
LDWRKU = LDA
IR = IU + LDWRKU*N
LDWRKR = LDA
ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
*
* WORK(IU) is LDA by N and WORK(IR) is N by N
*
LDWRKU = LDA
IR = IU + LDWRKU*N
LDWRKR = N
ELSE
*
* WORK(IU) is N by N and WORK(IR) is N by N
*
LDWRKU = N
IR = IU + LDWRKU*N
LDWRKR = N
END IF
ITAU = IR + LDWRKR*N
IWORK = ITAU + N
*
* Compute A=Q*R, copying result to U
* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
*
* Generate Q in U
* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
*
CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy R to WORK(IU), zeroing out below it
*
CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
$ LDWRKU )
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
$ WORK( IU+1 ), LDWRKU )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in WORK(IU), copying result to
* WORK(IR)
* (Workspace: need 2*N*N+4*N,
* prefer 2*N*N+3*N+2*N*NB)
*
CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
$ WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
$ WORK( IR ), LDWRKR )
*
* Generate left bidiagonalizing vectors in WORK(IU)
* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
*
CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
$ WORK( ITAUQ ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate right bidiagonalizing vectors in WORK(IR)
* (Workspace: need 2*N*N+4*N-1,
* prefer 2*N*N+3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of R in WORK(IU) and computing
* right singular vectors of R in WORK(IR)
* (Workspace: need 2*N*N+BDSPAC)
*
CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
$ WORK( IR ), LDWRKR, WORK( IU ),
$ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
*
* Multiply Q in U by left singular vectors of R in
* WORK(IU), storing result in A
* (Workspace: need N*N)
*
CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
$ WORK( IU ), LDWRKU, ZERO, A, LDA )
*
* Copy left singular vectors of A from A to U
*
CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
*
* Copy right singular vectors of R from WORK(IR) to A
*
CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
$ LDA )
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + N
*
* Compute A=Q*R, copying result to U
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
*
* Generate Q in U
* (Workspace: need N+M, prefer N+M*NB)
*
CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Zero out below R in A
*
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
$ LDA )
*
* Bidiagonalize R in A
* (Workspace: need 4*N, prefer 3*N+2*N*NB)
*
CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply Q in U by left bidiagonalizing vectors
* in A
* (Workspace: need 3*N+M, prefer 3*N+M*NB)
*
CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
$ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate right bidiagonalizing vectors in A
* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of A in U and computing right
* singular vectors of A in A
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
$ LDA, U, LDU, DUM, 1, WORK( IWORK ),
$ INFO )
*
END IF
*
ELSE IF( WNTVAS ) THEN
*
* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
* or 'A')
* M left singular vectors to be computed in U and
* N right singular vectors to be computed in VT
*
IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IU = 1
IF( LWORK.GE.WRKBL+LDA*N ) THEN
*
* WORK(IU) is LDA by N
*
LDWRKU = LDA
ELSE
*
* WORK(IU) is N by N
*
LDWRKU = N
END IF
ITAU = IU + LDWRKU*N
IWORK = ITAU + N
*
* Compute A=Q*R, copying result to U
* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
*
* Generate Q in U
* (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
*
CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy R to WORK(IU), zeroing out below it
*
CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
$ LDWRKU )
CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
$ WORK( IU+1 ), LDWRKU )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in WORK(IU), copying result to VT
* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
*
CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
$ WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
$ LDVT )
*
* Generate left bidiagonalizing vectors in WORK(IU)
* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
*
CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
$ WORK( ITAUQ ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate right bidiagonalizing vectors in VT
* (Workspace: need N*N+4*N-1,
* prefer N*N+3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of R in WORK(IU) and computing
* right singular vectors of R in VT
* (Workspace: need N*N+BDSPAC)
*
CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
$ LDVT, WORK( IU ), LDWRKU, DUM, 1,
$ WORK( IWORK ), INFO )
*
* Multiply Q in U by left singular vectors of R in
* WORK(IU), storing result in A
* (Workspace: need N*N)
*
CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
$ WORK( IU ), LDWRKU, ZERO, A, LDA )
*
* Copy left singular vectors of A from A to U
*
CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + N
*
* Compute A=Q*R, copying result to U
* (Workspace: need 2*N, prefer N+N*NB)
*
CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
*
* Generate Q in U
* (Workspace: need N+M, prefer N+M*NB)
*
CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy R from A to VT, zeroing out below it
*
CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
IF( N.GT.1 )
$ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
$ VT( 2, 1 ), LDVT )
IE = ITAU
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize R in VT
* (Workspace: need 4*N, prefer 3*N+2*N*NB)
*
CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply Q in U by left bidiagonalizing vectors
* in VT
* (Workspace: need 3*N+M, prefer 3*N+M*NB)
*
CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
$ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate right bidiagonalizing vectors in VT
* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + N
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of A in U and computing right
* singular vectors of A in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
$ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
$ INFO )
*
END IF
*
END IF
*
END IF
*
ELSE
*
* M .LT. MNTHR
*
* Path 10 (M at least N, but not much larger)
* Reduce to bidiagonal form without QR decomposition
*
IE = 1
ITAUQ = IE + N
ITAUP = ITAUQ + N
IWORK = ITAUP + N
*
* Bidiagonalize A
* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
*
CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
$ IERR )
IF( WNTUAS ) THEN
*
* If left singular vectors desired in U, copy result to U
* and generate left bidiagonalizing vectors in U
* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
*
CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
IF( WNTUS )
$ NCU = N
IF( WNTUA )
$ NCU = M
CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
END IF
IF( WNTVAS ) THEN
*
* If right singular vectors desired in VT, copy result to
* VT and generate right bidiagonalizing vectors in VT
* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
*
CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
END IF
IF( WNTUO ) THEN
*
* If left singular vectors desired in A, generate left
* bidiagonalizing vectors in A
* (Workspace: need 4*N, prefer 3*N+N*NB)
*
CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
END IF
IF( WNTVO ) THEN
*
* If right singular vectors desired in A, generate right
* bidiagonalizing vectors in A
* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
*
CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
END IF
IWORK = IE + N
IF( WNTUAS .OR. WNTUO )
$ NRU = M
IF( WNTUN )
$ NRU = 0
IF( WNTVAS .OR. WNTVO )
$ NCVT = N
IF( WNTVN )
$ NCVT = 0
IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
*
* Perform bidiagonal QR iteration, if desired, computing
* left singular vectors in U and computing right singular
* vectors in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
$ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
*
* Perform bidiagonal QR iteration, if desired, computing
* left singular vectors in U and computing right singular
* vectors in A
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
$ U, LDU, DUM, 1, WORK( IWORK ), INFO )
ELSE
*
* Perform bidiagonal QR iteration, if desired, computing
* left singular vectors in A and computing right singular
* vectors in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
$ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
END IF
*
END IF
*
ELSE
*
* A has more columns than rows. If A has sufficiently more
* columns than rows, first reduce using the LQ decomposition (if
* sufficient workspace available)
*
IF( N.GE.MNTHR ) THEN
*
IF( WNTVN ) THEN
*
* Path 1t(N much larger than M, JOBVT='N')
* No right singular vectors to be computed
*
ITAU = 1
IWORK = ITAU + M
*
* Compute A=L*Q
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Zero out above L
*
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
IE = 1
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in A
* (Workspace: need 4*M, prefer 3*M+2*M*NB)
*
CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
$ IERR )
IF( WNTUO .OR. WNTUAS ) THEN
*
* If left singular vectors desired, generate Q
* (Workspace: need 4*M, prefer 3*M+M*NB)
*
CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
END IF
IWORK = IE + M
NRU = 0
IF( WNTUO .OR. WNTUAS )
$ NRU = M
*
* Perform bidiagonal QR iteration, computing left singular
* vectors of A in A if desired
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
$ LDA, DUM, 1, WORK( IWORK ), INFO )
*
* If left singular vectors desired in U, copy them there
*
IF( WNTUAS )
$ CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
*
ELSE IF( WNTVO .AND. WNTUN ) THEN
*
* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
* M right singular vectors to be overwritten on A and
* no left singular vectors to be computed
*
IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IR = 1
IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
*
* WORK(IU) is LDA by N and WORK(IR) is LDA by M
*
LDWRKU = LDA
CHUNK = N
LDWRKR = LDA
ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
*
* WORK(IU) is LDA by N and WORK(IR) is M by M
*
LDWRKU = LDA
CHUNK = N
LDWRKR = M
ELSE
*
* WORK(IU) is M by CHUNK and WORK(IR) is M by M
*
LDWRKU = M
CHUNK = ( LWORK-M*M-M ) / M
LDWRKR = M
END IF
ITAU = IR + LDWRKR*M
IWORK = ITAU + M
*
* Compute A=L*Q
* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy L to WORK(IR) and zero out above it
*
CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
$ WORK( IR+LDWRKR ), LDWRKR )
*
* Generate Q in A
* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in WORK(IR)
* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
*
CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Generate right vectors bidiagonalizing L
* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
*
CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing right
* singular vectors of L in WORK(IR)
* (Workspace: need M*M+BDSPAC)
*
CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
$ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
$ WORK( IWORK ), INFO )
IU = IE + M
*
* Multiply right singular vectors of L in WORK(IR) by Q
* in A, storing result in WORK(IU) and copying to A
* (Workspace: need M*M+2*M, prefer M*M+M*N+M)
*
DO 30 I = 1, N, CHUNK
BLK = MIN( N-I+1, CHUNK )
CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
$ LDWRKR, A( 1, I ), LDA, ZERO,
$ WORK( IU ), LDWRKU )
CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
$ A( 1, I ), LDA )
30 CONTINUE
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
IE = 1
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize A
* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
*
CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Generate right vectors bidiagonalizing A
* (Workspace: need 4*M, prefer 3*M+M*NB)
*
CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing right
* singular vectors of A in A
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
$ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
*
END IF
*
ELSE IF( WNTVO .AND. WNTUAS ) THEN
*
* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
* M right singular vectors to be overwritten on A and
* M left singular vectors to be computed in U
*
IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IR = 1
IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
*
* WORK(IU) is LDA by N and WORK(IR) is LDA by M
*
LDWRKU = LDA
CHUNK = N
LDWRKR = LDA
ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
*
* WORK(IU) is LDA by N and WORK(IR) is M by M
*
LDWRKU = LDA
CHUNK = N
LDWRKR = M
ELSE
*
* WORK(IU) is M by CHUNK and WORK(IR) is M by M
*
LDWRKU = M
CHUNK = ( LWORK-M*M-M ) / M
LDWRKR = M
END IF
ITAU = IR + LDWRKR*M
IWORK = ITAU + M
*
* Compute A=L*Q
* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy L to U, zeroing about above it
*
CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
$ LDU )
*
* Generate Q in A
* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in U, copying result to WORK(IR)
* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
*
CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
*
* Generate right vectors bidiagonalizing L in WORK(IR)
* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
*
CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate left vectors bidiagonalizing L in U
* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
*
CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of L in U, and computing right
* singular vectors of L in WORK(IR)
* (Workspace: need M*M+BDSPAC)
*
CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
$ WORK( IR ), LDWRKR, U, LDU, DUM, 1,
$ WORK( IWORK ), INFO )
IU = IE + M
*
* Multiply right singular vectors of L in WORK(IR) by Q
* in A, storing result in WORK(IU) and copying to A
* (Workspace: need M*M+2*M, prefer M*M+M*N+M))
*
DO 40 I = 1, N, CHUNK
BLK = MIN( N-I+1, CHUNK )
CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
$ LDWRKR, A( 1, I ), LDA, ZERO,
$ WORK( IU ), LDWRKU )
CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
$ A( 1, I ), LDA )
40 CONTINUE
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + M
*
* Compute A=L*Q
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy L to U, zeroing out above it
*
CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
$ LDU )
*
* Generate Q in A
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in U
* (Workspace: need 4*M, prefer 3*M+2*M*NB)
*
CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply right vectors bidiagonalizing L by Q in A
* (Workspace: need 3*M+N, prefer 3*M+N*NB)
*
CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
$ WORK( ITAUP ), A, LDA, WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate left vectors bidiagonalizing L in U
* (Workspace: need 4*M, prefer 3*M+M*NB)
*
CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of A in U and computing right
* singular vectors of A in A
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
$ U, LDU, DUM, 1, WORK( IWORK ), INFO )
*
END IF
*
ELSE IF( WNTVS ) THEN
*
IF( WNTUN ) THEN
*
* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
* M right singular vectors to be computed in VT and
* no left singular vectors to be computed
*
IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IR = 1
IF( LWORK.GE.WRKBL+LDA*M ) THEN
*
* WORK(IR) is LDA by M
*
LDWRKR = LDA
ELSE
*
* WORK(IR) is M by M
*
LDWRKR = M
END IF
ITAU = IR + LDWRKR*M
IWORK = ITAU + M
*
* Compute A=L*Q
* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy L to WORK(IR), zeroing out above it
*
CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
$ LDWRKR )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
$ WORK( IR+LDWRKR ), LDWRKR )
*
* Generate Q in A
* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in WORK(IR)
* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
*
CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
$ WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate right vectors bidiagonalizing L in
* WORK(IR)
* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
*
CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing right
* singular vectors of L in WORK(IR)
* (Workspace: need M*M+BDSPAC)
*
CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
$ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
$ WORK( IWORK ), INFO )
*
* Multiply right singular vectors of L in WORK(IR) by
* Q in A, storing result in VT
* (Workspace: need M*M)
*
CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
$ LDWRKR, A, LDA, ZERO, VT, LDVT )
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + M
*
* Compute A=L*Q
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy result to VT
*
CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
*
* Generate Q in VT
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Zero out above L in A
*
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
$ LDA )
*
* Bidiagonalize L in A
* (Workspace: need 4*M, prefer 3*M+2*M*NB)
*
CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply right vectors bidiagonalizing L by Q in VT
* (Workspace: need 3*M+N, prefer 3*M+N*NB)
*
CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
$ WORK( ITAUP ), VT, LDVT,
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing right
* singular vectors of A in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
$ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
$ INFO )
*
END IF
*
ELSE IF( WNTUO ) THEN
*
* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
* M right singular vectors to be computed in VT and
* M left singular vectors to be overwritten on A
*
IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IU = 1
IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
*
* WORK(IU) is LDA by M and WORK(IR) is LDA by M
*
LDWRKU = LDA
IR = IU + LDWRKU*M
LDWRKR = LDA
ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
*
* WORK(IU) is LDA by M and WORK(IR) is M by M
*
LDWRKU = LDA
IR = IU + LDWRKU*M
LDWRKR = M
ELSE
*
* WORK(IU) is M by M and WORK(IR) is M by M
*
LDWRKU = M
IR = IU + LDWRKU*M
LDWRKR = M
END IF
ITAU = IR + LDWRKR*M
IWORK = ITAU + M
*
* Compute A=L*Q
* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy L to WORK(IU), zeroing out below it
*
CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
$ LDWRKU )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
$ WORK( IU+LDWRKU ), LDWRKU )
*
* Generate Q in A
* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
*
CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in WORK(IU), copying result to
* WORK(IR)
* (Workspace: need 2*M*M+4*M,
* prefer 2*M*M+3*M+2*M*NB)
*
CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
$ WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
$ WORK( IR ), LDWRKR )
*
* Generate right bidiagonalizing vectors in WORK(IU)
* (Workspace: need 2*M*M+4*M-1,
* prefer 2*M*M+3*M+(M-1)*NB)
*
CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate left bidiagonalizing vectors in WORK(IR)
* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
*
CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
$ WORK( ITAUQ ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of L in WORK(IR) and computing
* right singular vectors of L in WORK(IU)
* (Workspace: need 2*M*M+BDSPAC)
*
CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
$ WORK( IU ), LDWRKU, WORK( IR ),
$ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
*
* Multiply right singular vectors of L in WORK(IU) by
* Q in A, storing result in VT
* (Workspace: need M*M)
*
CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
$ LDWRKU, A, LDA, ZERO, VT, LDVT )
*
* Copy left singular vectors of L to A
* (Workspace: need M*M)
*
CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
$ LDA )
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + M
*
* Compute A=L*Q, copying result to VT
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
*
* Generate Q in VT
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Zero out above L in A
*
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
$ LDA )
*
* Bidiagonalize L in A
* (Workspace: need 4*M, prefer 3*M+2*M*NB)
*
CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply right vectors bidiagonalizing L by Q in VT
* (Workspace: need 3*M+N, prefer 3*M+N*NB)
*
CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
$ WORK( ITAUP ), VT, LDVT,
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Generate left bidiagonalizing vectors of L in A
* (Workspace: need 4*M, prefer 3*M+M*NB)
*
CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, compute left
* singular vectors of A in A and compute right
* singular vectors of A in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
$ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
$ INFO )
*
END IF
*
ELSE IF( WNTUAS ) THEN
*
* Path 6t(N much larger than M, JOBU='S' or 'A',
* JOBVT='S')
* M right singular vectors to be computed in VT and
* M left singular vectors to be computed in U
*
IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IU = 1
IF( LWORK.GE.WRKBL+LDA*M ) THEN
*
* WORK(IU) is LDA by N
*
LDWRKU = LDA
ELSE
*
* WORK(IU) is LDA by M
*
LDWRKU = M
END IF
ITAU = IU + LDWRKU*M
IWORK = ITAU + M
*
* Compute A=L*Q
* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy L to WORK(IU), zeroing out above it
*
CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
$ LDWRKU )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
$ WORK( IU+LDWRKU ), LDWRKU )
*
* Generate Q in A
* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in WORK(IU), copying result to U
* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
*
CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
$ WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
$ LDU )
*
* Generate right bidiagonalizing vectors in WORK(IU)
* (Workspace: need M*M+4*M-1,
* prefer M*M+3*M+(M-1)*NB)
*
CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate left bidiagonalizing vectors in U
* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
*
CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of L in U and computing right
* singular vectors of L in WORK(IU)
* (Workspace: need M*M+BDSPAC)
*
CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
$ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
$ WORK( IWORK ), INFO )
*
* Multiply right singular vectors of L in WORK(IU) by
* Q in A, storing result in VT
* (Workspace: need M*M)
*
CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
$ LDWRKU, A, LDA, ZERO, VT, LDVT )
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + M
*
* Compute A=L*Q, copying result to VT
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
*
* Generate Q in VT
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy L to U, zeroing out above it
*
CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
$ LDU )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in U
* (Workspace: need 4*M, prefer 3*M+2*M*NB)
*
CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply right bidiagonalizing vectors in U by Q
* in VT
* (Workspace: need 3*M+N, prefer 3*M+N*NB)
*
CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
$ WORK( ITAUP ), VT, LDVT,
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Generate left bidiagonalizing vectors in U
* (Workspace: need 4*M, prefer 3*M+M*NB)
*
CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of A in U and computing right
* singular vectors of A in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
$ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
$ INFO )
*
END IF
*
END IF
*
ELSE IF( WNTVA ) THEN
*
IF( WNTUN ) THEN
*
* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
* N right singular vectors to be computed in VT and
* no left singular vectors to be computed
*
IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IR = 1
IF( LWORK.GE.WRKBL+LDA*M ) THEN
*
* WORK(IR) is LDA by M
*
LDWRKR = LDA
ELSE
*
* WORK(IR) is M by M
*
LDWRKR = M
END IF
ITAU = IR + LDWRKR*M
IWORK = ITAU + M
*
* Compute A=L*Q, copying result to VT
* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
*
* Copy L to WORK(IR), zeroing out above it
*
CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
$ LDWRKR )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
$ WORK( IR+LDWRKR ), LDWRKR )
*
* Generate Q in VT
* (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
*
CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in WORK(IR)
* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
*
CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
$ WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate right bidiagonalizing vectors in WORK(IR)
* (Workspace: need M*M+4*M-1,
* prefer M*M+3*M+(M-1)*NB)
*
CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing right
* singular vectors of L in WORK(IR)
* (Workspace: need M*M+BDSPAC)
*
CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
$ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
$ WORK( IWORK ), INFO )
*
* Multiply right singular vectors of L in WORK(IR) by
* Q in VT, storing result in A
* (Workspace: need M*M)
*
CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
$ LDWRKR, VT, LDVT, ZERO, A, LDA )
*
* Copy right singular vectors of A from A to VT
*
CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + M
*
* Compute A=L*Q, copying result to VT
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
*
* Generate Q in VT
* (Workspace: need M+N, prefer M+N*NB)
*
CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Zero out above L in A
*
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
$ LDA )
*
* Bidiagonalize L in A
* (Workspace: need 4*M, prefer 3*M+2*M*NB)
*
CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply right bidiagonalizing vectors in A by Q
* in VT
* (Workspace: need 3*M+N, prefer 3*M+N*NB)
*
CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
$ WORK( ITAUP ), VT, LDVT,
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing right
* singular vectors of A in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
$ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
$ INFO )
*
END IF
*
ELSE IF( WNTUO ) THEN
*
* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
* N right singular vectors to be computed in VT and
* M left singular vectors to be overwritten on A
*
IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IU = 1
IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
*
* WORK(IU) is LDA by M and WORK(IR) is LDA by M
*
LDWRKU = LDA
IR = IU + LDWRKU*M
LDWRKR = LDA
ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
*
* WORK(IU) is LDA by M and WORK(IR) is M by M
*
LDWRKU = LDA
IR = IU + LDWRKU*M
LDWRKR = M
ELSE
*
* WORK(IU) is M by M and WORK(IR) is M by M
*
LDWRKU = M
IR = IU + LDWRKU*M
LDWRKR = M
END IF
ITAU = IR + LDWRKR*M
IWORK = ITAU + M
*
* Compute A=L*Q, copying result to VT
* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
*
* Generate Q in VT
* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
*
CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy L to WORK(IU), zeroing out above it
*
CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
$ LDWRKU )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
$ WORK( IU+LDWRKU ), LDWRKU )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in WORK(IU), copying result to
* WORK(IR)
* (Workspace: need 2*M*M+4*M,
* prefer 2*M*M+3*M+2*M*NB)
*
CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
$ WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
$ WORK( IR ), LDWRKR )
*
* Generate right bidiagonalizing vectors in WORK(IU)
* (Workspace: need 2*M*M+4*M-1,
* prefer 2*M*M+3*M+(M-1)*NB)
*
CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate left bidiagonalizing vectors in WORK(IR)
* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
*
CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
$ WORK( ITAUQ ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of L in WORK(IR) and computing
* right singular vectors of L in WORK(IU)
* (Workspace: need 2*M*M+BDSPAC)
*
CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
$ WORK( IU ), LDWRKU, WORK( IR ),
$ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
*
* Multiply right singular vectors of L in WORK(IU) by
* Q in VT, storing result in A
* (Workspace: need M*M)
*
CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
$ LDWRKU, VT, LDVT, ZERO, A, LDA )
*
* Copy right singular vectors of A from A to VT
*
CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
*
* Copy left singular vectors of A from WORK(IR) to A
*
CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
$ LDA )
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + M
*
* Compute A=L*Q, copying result to VT
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
*
* Generate Q in VT
* (Workspace: need M+N, prefer M+N*NB)
*
CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Zero out above L in A
*
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
$ LDA )
*
* Bidiagonalize L in A
* (Workspace: need 4*M, prefer 3*M+2*M*NB)
*
CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply right bidiagonalizing vectors in A by Q
* in VT
* (Workspace: need 3*M+N, prefer 3*M+N*NB)
*
CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
$ WORK( ITAUP ), VT, LDVT,
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Generate left bidiagonalizing vectors in A
* (Workspace: need 4*M, prefer 3*M+M*NB)
*
CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of A in A and computing right
* singular vectors of A in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
$ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
$ INFO )
*
END IF
*
ELSE IF( WNTUAS ) THEN
*
* Path 9t(N much larger than M, JOBU='S' or 'A',
* JOBVT='A')
* N right singular vectors to be computed in VT and
* M left singular vectors to be computed in U
*
IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
*
* Sufficient workspace for a fast algorithm
*
IU = 1
IF( LWORK.GE.WRKBL+LDA*M ) THEN
*
* WORK(IU) is LDA by M
*
LDWRKU = LDA
ELSE
*
* WORK(IU) is M by M
*
LDWRKU = M
END IF
ITAU = IU + LDWRKU*M
IWORK = ITAU + M
*
* Compute A=L*Q, copying result to VT
* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
*
* Generate Q in VT
* (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
*
CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy L to WORK(IU), zeroing out above it
*
CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
$ LDWRKU )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
$ WORK( IU+LDWRKU ), LDWRKU )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in WORK(IU), copying result to U
* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
*
CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
$ WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
$ LDU )
*
* Generate right bidiagonalizing vectors in WORK(IU)
* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
*
CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
$ WORK( ITAUP ), WORK( IWORK ),
$ LWORK-IWORK+1, IERR )
*
* Generate left bidiagonalizing vectors in U
* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
*
CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of L in U and computing right
* singular vectors of L in WORK(IU)
* (Workspace: need M*M+BDSPAC)
*
CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
$ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
$ WORK( IWORK ), INFO )
*
* Multiply right singular vectors of L in WORK(IU) by
* Q in VT, storing result in A
* (Workspace: need M*M)
*
CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
$ LDWRKU, VT, LDVT, ZERO, A, LDA )
*
* Copy right singular vectors of A from A to VT
*
CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
*
ELSE
*
* Insufficient workspace for a fast algorithm
*
ITAU = 1
IWORK = ITAU + M
*
* Compute A=L*Q, copying result to VT
* (Workspace: need 2*M, prefer M+M*NB)
*
CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
*
* Generate Q in VT
* (Workspace: need M+N, prefer M+N*NB)
*
CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Copy L to U, zeroing out above it
*
CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
$ LDU )
IE = ITAU
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize L in U
* (Workspace: need 4*M, prefer 3*M+2*M*NB)
*
CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
$ WORK( ITAUQ ), WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Multiply right bidiagonalizing vectors in U by Q
* in VT
* (Workspace: need 3*M+N, prefer 3*M+N*NB)
*
CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
$ WORK( ITAUP ), VT, LDVT,
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
*
* Generate left bidiagonalizing vectors in U
* (Workspace: need 4*M, prefer 3*M+M*NB)
*
CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
IWORK = IE + M
*
* Perform bidiagonal QR iteration, computing left
* singular vectors of A in U and computing right
* singular vectors of A in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
$ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
$ INFO )
*
END IF
*
END IF
*
END IF
*
ELSE
*
* N .LT. MNTHR
*
* Path 10t(N greater than M, but not much larger)
* Reduce to bidiagonal form without LQ decomposition
*
IE = 1
ITAUQ = IE + M
ITAUP = ITAUQ + M
IWORK = ITAUP + M
*
* Bidiagonalize A
* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
*
CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
$ IERR )
IF( WNTUAS ) THEN
*
* If left singular vectors desired in U, copy result to U
* and generate left bidiagonalizing vectors in U
* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
*
CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
END IF
IF( WNTVAS ) THEN
*
* If right singular vectors desired in VT, copy result to
* VT and generate right bidiagonalizing vectors in VT
* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
*
CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
IF( WNTVA )
$ NRVT = N
IF( WNTVS )
$ NRVT = M
CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
END IF
IF( WNTUO ) THEN
*
* If left singular vectors desired in A, generate left
* bidiagonalizing vectors in A
* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
*
CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
END IF
IF( WNTVO ) THEN
*
* If right singular vectors desired in A, generate right
* bidiagonalizing vectors in A
* (Workspace: need 4*M, prefer 3*M+M*NB)
*
CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
END IF
IWORK = IE + M
IF( WNTUAS .OR. WNTUO )
$ NRU = M
IF( WNTUN )
$ NRU = 0
IF( WNTVAS .OR. WNTVO )
$ NCVT = N
IF( WNTVN )
$ NCVT = 0
IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
*
* Perform bidiagonal QR iteration, if desired, computing
* left singular vectors in U and computing right singular
* vectors in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
$ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
*
* Perform bidiagonal QR iteration, if desired, computing
* left singular vectors in U and computing right singular
* vectors in A
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
$ U, LDU, DUM, 1, WORK( IWORK ), INFO )
ELSE
*
* Perform bidiagonal QR iteration, if desired, computing
* left singular vectors in A and computing right singular
* vectors in VT
* (Workspace: need BDSPAC)
*
CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
$ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
END IF
*
END IF
*
END IF
*
* If DBDSQR failed to converge, copy unconverged superdiagonals
* to WORK( 2:MINMN )
*
IF( INFO.NE.0 ) THEN
IF( IE.GT.2 ) THEN
DO 50 I = 1, MINMN - 1
WORK( I+1 ) = WORK( I+IE-1 )
50 CONTINUE
END IF
IF( IE.LT.2 ) THEN
DO 60 I = MINMN - 1, 1, -1
WORK( I+1 ) = WORK( I+IE-1 )
60 CONTINUE
END IF
END IF
*
* Undo scaling if necessary
*
IF( ISCL.EQ.1 ) THEN
IF( ANRM.GT.BIGNUM )
$ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
$ IERR )
IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
$ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
$ MINMN, IERR )
IF( ANRM.LT.SMLNUM )
$ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
$ IERR )
IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
$ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
$ MINMN, IERR )
END IF
*
* Return optimal workspace in WORK(1)
*
WORK( 1 ) = MAXWRK
*
RETURN
*
* End of DGESVD
*
END
LOGICAL FUNCTION DISNAN( DIN )
*
* -- LAPACK auxiliary routine (version 3.2.2) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* June 2010
*
* .. Scalar Arguments ..
DOUBLE PRECISION DIN
* ..
*
* Purpose
* =======
*
* DISNAN returns .TRUE. if its argument is NaN, and .FALSE.
* otherwise. To be replaced by the Fortran 2003 intrinsic in the
* future.
*
* Arguments
* =========
*
* DIN (input) DOUBLE PRECISION
* Input to test for NaN.
*
* =====================================================================
*
* .. External Functions ..
LOGICAL DLAISNAN
EXTERNAL DLAISNAN
* ..
* .. Executable Statements ..
DISNAN = DLAISNAN(DIN,DIN)
RETURN
END
SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
$ LDY )
*
* -- LAPACK auxiliary routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
INTEGER LDA, LDX, LDY, M, N, NB
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ),
$ TAUQ( * ), X( LDX, * ), Y( LDY, * )
* ..
*
* Purpose
* =======
*
* DLABRD reduces the first NB rows and columns of a real general
* m by n matrix A to upper or lower bidiagonal form by an orthogonal
* transformation Q**T * A * P, and returns the matrices X and Y which
* are needed to apply the transformation to the unreduced part of A.
*
* If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
* bidiagonal form.
*
* This is an auxiliary routine called by DGEBRD
*
* Arguments
* =========
*
* M (input) INTEGER
* The number of rows in the matrix A.
*
* N (input) INTEGER
* The number of columns in the matrix A.
*
* NB (input) INTEGER
* The number of leading rows and columns of A to be reduced.
*
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
* On entry, the m by n general matrix to be reduced.
* On exit, the first NB rows and columns of the matrix are
* overwritten; the rest of the array is unchanged.
* If m >= n, elements on and below the diagonal in the first NB
* columns, with the array TAUQ, represent the orthogonal
* matrix Q as a product of elementary reflectors; and
* elements above the diagonal in the first NB rows, with the
* array TAUP, represent the orthogonal matrix P as a product
* of elementary reflectors.
* If m < n, elements below the diagonal in the first NB
* columns, with the array TAUQ, represent the orthogonal
* matrix Q as a product of elementary reflectors, and
* elements on and above the diagonal in the first NB rows,
* with the array TAUP, represent the orthogonal matrix P as
* a product of elementary reflectors.
* See Further Details.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,M).
*
* D (output) DOUBLE PRECISION array, dimension (NB)
* The diagonal elements of the first NB rows and columns of
* the reduced matrix. D(i) = A(i,i).
*
* E (output) DOUBLE PRECISION array, dimension (NB)
* The off-diagonal elements of the first NB rows and columns of
* the reduced matrix.
*
* TAUQ (output) DOUBLE PRECISION array dimension (NB)
* The scalar factors of the elementary reflectors which
* represent the orthogonal matrix Q. See Further Details.
*
* TAUP (output) DOUBLE PRECISION array, dimension (NB)
* The scalar factors of the elementary reflectors which
* represent the orthogonal matrix P. See Further Details.
*
* X (output) DOUBLE PRECISION array, dimension (LDX,NB)
* The m-by-nb matrix X required to update the unreduced part
* of A.
*
* LDX (input) INTEGER
* The leading dimension of the array X. LDX >= max(1,M).
*
* Y (output) DOUBLE PRECISION array, dimension (LDY,NB)
* The n-by-nb matrix Y required to update the unreduced part
* of A.
*
* LDY (input) INTEGER
* The leading dimension of the array Y. LDY >= max(1,N).
*
* Further Details
* ===============
*
* The matrices Q and P are represented as products of elementary
* reflectors:
*
* Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb)
*
* Each H(i) and G(i) has the form:
*
* H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T
*
* where tauq and taup are real scalars, and v and u are real vectors.
*
* If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
* A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
*
* If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
* A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
* A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
*
* The elements of the vectors v and u together form the m-by-nb matrix
* V and the nb-by-n matrix U**T which are needed, with X and Y, to apply
* the transformation to the unreduced part of the matrix, using a block
* update of the form: A := A - V*Y**T - X*U**T.
*
* The contents of A on exit are illustrated by the following examples
* with nb = 2:
*
* m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
*
* ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 )
* ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 )
* ( v1 v2 a a a ) ( v1 1 a a a a )
* ( v1 v2 a a a ) ( v1 v2 a a a a )
* ( v1 v2 a a a ) ( v1 v2 a a a a )
* ( v1 v2 a a a )
*
* where a denotes an element of the original matrix which is unchanged,
* vi denotes an element of the vector defining H(i), and ui an element
* of the vector defining G(i).
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
* ..
* .. Local Scalars ..
INTEGER I
* ..
* .. External Subroutines ..
EXTERNAL DGEMV, DLARFG, DSCAL
* ..
* .. Intrinsic Functions ..
INTRINSIC MIN
* ..
* .. Executable Statements ..
*
* Quick return if possible
*
IF( M.LE.0 .OR. N.LE.0 )
$ RETURN
*
IF( M.GE.N ) THEN
*
* Reduce to upper bidiagonal form
*
DO 10 I = 1, NB
*
* Update A(i:m,i)
*
CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ),
$ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 )
CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ),
$ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 )
*
* Generate reflection Q(i) to annihilate A(i+1:m,i)
*
CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
$ TAUQ( I ) )
D( I ) = A( I, I )
IF( I.LT.N ) THEN
A( I, I ) = ONE
*
* Compute Y(i+1:n,i)
*
CALL DGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ),
$ LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 )
CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA,
$ A( I, I ), 1, ZERO, Y( 1, I ), 1 )
CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
$ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX,
$ A( I, I ), 1, ZERO, Y( 1, I ), 1 )
CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
$ LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
*
* Update A(i,i+1:n)
*
CALL DGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ),
$ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA )
CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
$ LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA )
*
* Generate reflection P(i) to annihilate A(i,i+2:n)
*
CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
$ LDA, TAUP( I ) )
E( I ) = A( I, I+1 )
A( I, I+1 ) = ONE
*
* Compute X(i+1:m,i)
*
CALL DGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ),
$ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 )
CALL DGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY,
$ A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ),
$ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
CALL DGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
$ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
$ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
END IF
10 CONTINUE
ELSE
*
* Reduce to lower bidiagonal form
*
DO 20 I = 1, NB
*
* Update A(i,i:n)
*
CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ),
$ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA )
CALL DGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA,
$ X( I, 1 ), LDX, ONE, A( I, I ), LDA )
*
* Generate reflection P(i) to annihilate A(i,i+1:n)
*
CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
$ TAUP( I ) )
D( I ) = A( I, I )
IF( I.LT.M ) THEN
A( I, I ) = ONE
*
* Compute X(i+1:m,i)
*
CALL DGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ),
$ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 )
CALL DGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY,
$ A( I, I ), LDA, ZERO, X( 1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
$ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
CALL DGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ),
$ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
$ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
*
* Update A(i+1:m,i)
*
CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
$ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 )
CALL DGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ),
$ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 )
*
* Generate reflection Q(i) to annihilate A(i+2:m,i)
*
CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
$ TAUQ( I ) )
E( I ) = A( I+1, I )
A( I+1, I ) = ONE
*
* Compute Y(i+1:n,i)
*
CALL DGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ),
$ LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 )
CALL DGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA,
$ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
$ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
CALL DGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX,
$ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
CALL DGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA,
$ Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
END IF
20 CONTINUE
END IF
RETURN
*
* End of DLABRD
*
END
SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
*
* -- LAPACK auxiliary routine (version 3.2) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2006
*
* .. Scalar Arguments ..
CHARACTER UPLO
INTEGER LDA, LDB, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), B( LDB, * )
* ..
*
* Purpose
* =======
*
* DLACPY copies all or part of a two-dimensional matrix A to another
* matrix B.
*
* Arguments
* =========
*
* UPLO (input) CHARACTER*1
* Specifies the part of the matrix A to be copied to B.
* = 'U': Upper triangular part
* = 'L': Lower triangular part
* Otherwise: All of the matrix A
*
* M (input) INTEGER
* The number of rows of the matrix A. M >= 0.
*
* N (input) INTEGER
* The number of columns of the matrix A. N >= 0.
*
* A (input) DOUBLE PRECISION array, dimension (LDA,N)
* The m by n matrix A. If UPLO = 'U', only the upper triangle
* or trapezoid is accessed; if UPLO = 'L', only the lower
* triangle or trapezoid is accessed.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,M).
*
* B (output) DOUBLE PRECISION array, dimension (LDB,N)
* On exit, B = A in the locations specified by UPLO.
*
* LDB (input) INTEGER
* The leading dimension of the array B. LDB >= max(1,M).
*
* =====================================================================
*
* .. Local Scalars ..
INTEGER I, J
* ..
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. Intrinsic Functions ..
INTRINSIC MIN
* ..
* .. Executable Statements ..
*
IF( LSAME( UPLO, 'U' ) ) THEN
DO 20 J = 1, N
DO 10 I = 1, MIN( J, M )
B( I, J ) = A( I, J )
10 CONTINUE
20 CONTINUE
ELSE IF( LSAME( UPLO, 'L' ) ) THEN
DO 40 J = 1, N
DO 30 I = J, M
B( I, J ) = A( I, J )
30 CONTINUE
40 CONTINUE
ELSE
DO 60 J = 1, N
DO 50 I = 1, M
B( I, J ) = A( I, J )
50 CONTINUE
60 CONTINUE
END IF
RETURN
*
* End of DLACPY
*
END
LOGICAL FUNCTION DLAISNAN( DIN1, DIN2 )
*
* -- LAPACK auxiliary routine (version 3.2.2) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* June 2010
*
* .. Scalar Arguments ..
DOUBLE PRECISION DIN1, DIN2
* ..
*
* Purpose
* =======
*
* This routine is not for general use. It exists solely to avoid
* over-optimization in DISNAN.
*
* DLAISNAN checks for NaNs by comparing its two arguments for
* inequality. NaN is the only floating-point value where NaN != NaN
* returns .TRUE. To check for NaNs, pass the same variable as both
* arguments.
*
* A compiler must assume that the two arguments are
* not the same variable, and the test will not be optimized away.
* Interprocedural or whole-program optimization may delete this
* test. The ISNAN functions will be replaced by the correct
* Fortran 03 intrinsic once the intrinsic is widely available.
*
* Arguments
* =========
*
* DIN1 (input) DOUBLE PRECISION
*
* DIN2 (input) DOUBLE PRECISION
* Two numbers to compare for inequality.
*
* =====================================================================
*
* .. Executable Statements ..
DLAISNAN = (DIN1.NE.DIN2)
RETURN
END
DOUBLE PRECISION FUNCTION DLANGE( NORM, M, N, A, LDA, WORK )
*
* -- LAPACK auxiliary routine (version 3.2) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2006
*
* .. Scalar Arguments ..
CHARACTER NORM
INTEGER LDA, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), WORK( * )
* ..
*
* Purpose
* =======
*
* DLANGE returns the value of the one norm, or the Frobenius norm, or
* the infinity norm, or the element of largest absolute value of a
* real matrix A.
*
* Description
* ===========
*
* DLANGE returns the value
*
* DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
* (
* ( norm1(A), NORM = '1', 'O' or 'o'
* (
* ( normI(A), NORM = 'I' or 'i'
* (
* ( normF(A), NORM = 'F', 'f', 'E' or 'e'
*
* where norm1 denotes the one norm of a matrix (maximum column sum),
* normI denotes the infinity norm of a matrix (maximum row sum) and
* normF denotes the Frobenius norm of a matrix (square root of sum of
* squares). Note that max(abs(A(i,j))) is not a consistent matrix norm.
*
* Arguments
* =========
*
* NORM (input) CHARACTER*1
* Specifies the value to be returned in DLANGE as described
* above.
*
* M (input) INTEGER
* The number of rows of the matrix A. M >= 0. When M = 0,
* DLANGE is set to zero.
*
* N (input) INTEGER
* The number of columns of the matrix A. N >= 0. When N = 0,
* DLANGE is set to zero.
*
* A (input) DOUBLE PRECISION array, dimension (LDA,N)
* The m by n matrix A.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(M,1).
*
* WORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)),
* where LWORK >= M when NORM = 'I'; otherwise, WORK is not
* referenced.
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I, J
DOUBLE PRECISION SCALE, SUM, VALUE
* ..
* .. External Subroutines ..
EXTERNAL DLASSQ
* ..
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
IF( MIN( M, N ).EQ.0 ) THEN
VALUE = ZERO
ELSE IF( LSAME( NORM, 'M' ) ) THEN
*
* Find max(abs(A(i,j))).
*
VALUE = ZERO
DO 20 J = 1, N
DO 10 I = 1, M
VALUE = MAX( VALUE, ABS( A( I, J ) ) )
10 CONTINUE
20 CONTINUE
ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
*
* Find norm1(A).
*
VALUE = ZERO
DO 40 J = 1, N
SUM = ZERO
DO 30 I = 1, M
SUM = SUM + ABS( A( I, J ) )
30 CONTINUE
VALUE = MAX( VALUE, SUM )
40 CONTINUE
ELSE IF( LSAME( NORM, 'I' ) ) THEN
*
* Find normI(A).
*
DO 50 I = 1, M
WORK( I ) = ZERO
50 CONTINUE
DO 70 J = 1, N
DO 60 I = 1, M
WORK( I ) = WORK( I ) + ABS( A( I, J ) )
60 CONTINUE
70 CONTINUE
VALUE = ZERO
DO 80 I = 1, M
VALUE = MAX( VALUE, WORK( I ) )
80 CONTINUE
ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
*
* Find normF(A).
*
SCALE = ZERO
SUM = ONE
DO 90 J = 1, N
CALL DLASSQ( M, A( 1, J ), 1, SCALE, SUM )
90 CONTINUE
VALUE = SCALE*SQRT( SUM )
END IF
*
DLANGE = VALUE
RETURN
*
* End of DLANGE
*
END
DOUBLE PRECISION FUNCTION DLAPY2( X, Y )
*
* -- LAPACK auxiliary routine (version 3.2) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2006
*
* .. Scalar Arguments ..
DOUBLE PRECISION X, Y
* ..
*
* Purpose
* =======
*
* DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
* overflow.
*
* Arguments
* =========
*
* X (input) DOUBLE PRECISION
* Y (input) DOUBLE PRECISION
* X and Y specify the values x and y.
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0D0 )
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION W, XABS, YABS, Z
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
XABS = ABS( X )
YABS = ABS( Y )
W = MAX( XABS, YABS )
Z = MIN( XABS, YABS )
IF( Z.EQ.ZERO ) THEN
DLAPY2 = W
ELSE
DLAPY2 = W*SQRT( ONE+( Z / W )**2 )
END IF
RETURN
*
* End of DLAPY2
*
END
SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK )
IMPLICIT NONE
*
* -- LAPACK auxiliary routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
CHARACTER SIDE
INTEGER INCV, LDC, M, N
DOUBLE PRECISION TAU
* ..
* .. Array Arguments ..
DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * )
* ..
*
* Purpose
* =======
*
* DLARF applies a real elementary reflector H to a real m by n matrix
* C, from either the left or the right. H is represented in the form
*
* H = I - tau * v * v**T
*
* where tau is a real scalar and v is a real vector.
*
* If tau = 0, then H is taken to be the unit matrix.
*
* Arguments
* =========
*
* SIDE (input) CHARACTER*1
* = 'L': form H * C
* = 'R': form C * H
*
* M (input) INTEGER
* The number of rows of the matrix C.
*
* N (input) INTEGER
* The number of columns of the matrix C.
*
* V (input) DOUBLE PRECISION array, dimension
* (1 + (M-1)*abs(INCV)) if SIDE = 'L'
* or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
* The vector v in the representation of H. V is not used if
* TAU = 0.
*
* INCV (input) INTEGER
* The increment between elements of v. INCV <> 0.
*
* TAU (input) DOUBLE PRECISION
* The value tau in the representation of H.
*
* C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
* On entry, the m by n matrix C.
* On exit, C is overwritten by the matrix H * C if SIDE = 'L',
* or C * H if SIDE = 'R'.
*
* LDC (input) INTEGER
* The leading dimension of the array C. LDC >= max(1,M).
*
* WORK (workspace) DOUBLE PRECISION array, dimension
* (N) if SIDE = 'L'
* or (M) if SIDE = 'R'
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
* ..
* .. Local Scalars ..
LOGICAL APPLYLEFT
INTEGER I, LASTV, LASTC
* ..
* .. External Subroutines ..
EXTERNAL DGEMV, DGER
* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILADLR, ILADLC
EXTERNAL LSAME, ILADLR, ILADLC
* ..
* .. Executable Statements ..
*
APPLYLEFT = LSAME( SIDE, 'L' )
LASTV = 0
LASTC = 0
IF( TAU.NE.ZERO ) THEN
! Set up variables for scanning V. LASTV begins pointing to the end
! of V.
IF( APPLYLEFT ) THEN
LASTV = M
ELSE
LASTV = N
END IF
IF( INCV.GT.0 ) THEN
I = 1 + (LASTV-1) * INCV
ELSE
I = 1
END IF
! Look for the last non-zero row in V.
DO WHILE( LASTV.GT.0 .AND. V( I ).EQ.ZERO )
LASTV = LASTV - 1
I = I - INCV
END DO
IF( APPLYLEFT ) THEN
! Scan for the last non-zero column in C(1:lastv,:).
LASTC = ILADLC(LASTV, N, C, LDC)
ELSE
! Scan for the last non-zero row in C(:,1:lastv).
LASTC = ILADLR(M, LASTV, C, LDC)
END IF
END IF
! Note that lastc.eq.0 renders the BLAS operations null; no special
! case is needed at this level.
IF( APPLYLEFT ) THEN
*
* Form H * C
*
IF( LASTV.GT.0 ) THEN
*
* w(1:lastc,1) := C(1:lastv,1:lastc)**T * v(1:lastv,1)
*
CALL DGEMV( 'Transpose', LASTV, LASTC, ONE, C, LDC, V, INCV,
$ ZERO, WORK, 1 )
*
* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**T
*
CALL DGER( LASTV, LASTC, -TAU, V, INCV, WORK, 1, C, LDC )
END IF
ELSE
*
* Form C * H
*
IF( LASTV.GT.0 ) THEN
*
* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1)
*
CALL DGEMV( 'No transpose', LASTC, LASTV, ONE, C, LDC,
$ V, INCV, ZERO, WORK, 1 )
*
* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**T
*
CALL DGER( LASTC, LASTV, -TAU, WORK, 1, V, INCV, C, LDC )
END IF
END IF
RETURN
*
* End of DLARF
*
END
SUBROUTINE DLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV,
$ T, LDT, C, LDC, WORK, LDWORK )
IMPLICIT NONE
*
* -- LAPACK auxiliary routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
CHARACTER DIRECT, SIDE, STOREV, TRANS
INTEGER K, LDC, LDT, LDV, LDWORK, M, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION C( LDC, * ), T( LDT, * ), V( LDV, * ),
$ WORK( LDWORK, * )
* ..
*
* Purpose
* =======
*
* DLARFB applies a real block reflector H or its transpose H**T to a
* real m by n matrix C, from either the left or the right.
*
* Arguments
* =========
*
* SIDE (input) CHARACTER*1
* = 'L': apply H or H**T from the Left
* = 'R': apply H or H**T from the Right
*
* TRANS (input) CHARACTER*1
* = 'N': apply H (No transpose)
* = 'T': apply H**T (Transpose)
*
* DIRECT (input) CHARACTER*1
* Indicates how H is formed from a product of elementary
* reflectors
* = 'F': H = H(1) H(2) . . . H(k) (Forward)
* = 'B': H = H(k) . . . H(2) H(1) (Backward)
*
* STOREV (input) CHARACTER*1
* Indicates how the vectors which define the elementary
* reflectors are stored:
* = 'C': Columnwise
* = 'R': Rowwise
*
* M (input) INTEGER
* The number of rows of the matrix C.
*
* N (input) INTEGER
* The number of columns of the matrix C.
*
* K (input) INTEGER
* The order of the matrix T (= the number of elementary
* reflectors whose product defines the block reflector).
*
* V (input) DOUBLE PRECISION array, dimension
* (LDV,K) if STOREV = 'C'
* (LDV,M) if STOREV = 'R' and SIDE = 'L'
* (LDV,N) if STOREV = 'R' and SIDE = 'R'
* The matrix V. See Further Details.
*
* LDV (input) INTEGER
* The leading dimension of the array V.
* If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
* if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
* if STOREV = 'R', LDV >= K.
*
* T (input) DOUBLE PRECISION array, dimension (LDT,K)
* The triangular k by k matrix T in the representation of the
* block reflector.
*
* LDT (input) INTEGER
* The leading dimension of the array T. LDT >= K.
*
* C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
* On entry, the m by n matrix C.
* On exit, C is overwritten by H*C or H**T*C or C*H or C*H**T.
*
* LDC (input) INTEGER
* The leading dimension of the array C. LDC >= max(1,M).
*
* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,K)
*
* LDWORK (input) INTEGER
* The leading dimension of the array WORK.
* If SIDE = 'L', LDWORK >= max(1,N);
* if SIDE = 'R', LDWORK >= max(1,M).
*
* Further Details
* ===============
*
* The shape of the matrix V and the storage of the vectors which define
* the H(i) is best illustrated by the following example with n = 5 and
* k = 3. The elements equal to 1 are not stored; the corresponding
* array elements are modified but restored on exit. The rest of the
* array is not used.
*
* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
*
* V = ( 1 ) V = ( 1 v1 v1 v1 v1 )
* ( v1 1 ) ( 1 v2 v2 v2 )
* ( v1 v2 1 ) ( 1 v3 v3 )
* ( v1 v2 v3 )
* ( v1 v2 v3 )
*
* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
*
* V = ( v1 v2 v3 ) V = ( v1 v1 1 )
* ( v1 v2 v3 ) ( v2 v2 v2 1 )
* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
* ( 1 v3 )
* ( 1 )
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
CHARACTER TRANST
INTEGER I, J, LASTV, LASTC
* ..
* .. External Functions ..
LOGICAL LSAME
INTEGER ILADLR, ILADLC
EXTERNAL LSAME, ILADLR, ILADLC
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DGEMM, DTRMM
* ..
* .. Executable Statements ..
*
* Quick return if possible
*
IF( M.LE.0 .OR. N.LE.0 )
$ RETURN
*
IF( LSAME( TRANS, 'N' ) ) THEN
TRANST = 'T'
ELSE
TRANST = 'N'
END IF
*
IF( LSAME( STOREV, 'C' ) ) THEN
*
IF( LSAME( DIRECT, 'F' ) ) THEN
*
* Let V = ( V1 ) (first K rows)
* ( V2 )
* where V1 is unit lower triangular.
*
IF( LSAME( SIDE, 'L' ) ) THEN
*
* Form H * C or H**T * C where C = ( C1 )
* ( C2 )
*
LASTV = MAX( K, ILADLR( M, K, V, LDV ) )
LASTC = ILADLC( LASTV, N, C, LDC )
*
* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK)
*
* W := C1**T
*
DO 10 J = 1, K
CALL DCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 )
10 CONTINUE
*
* W := W * V1
*
CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit',
$ LASTC, K, ONE, V, LDV, WORK, LDWORK )
IF( LASTV.GT.K ) THEN
*
* W := W + C2**T *V2
*
CALL DGEMM( 'Transpose', 'No transpose',
$ LASTC, K, LASTV-K,
$ ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV,
$ ONE, WORK, LDWORK )
END IF
*
* W := W * T**T or W * T
*
CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit',
$ LASTC, K, ONE, T, LDT, WORK, LDWORK )
*
* C := C - V * W**T
*
IF( LASTV.GT.K ) THEN
*
* C2 := C2 - V2 * W**T
*
CALL DGEMM( 'No transpose', 'Transpose',
$ LASTV-K, LASTC, K,
$ -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE,
$ C( K+1, 1 ), LDC )
END IF
*
* W := W * V1**T
*
CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit',
$ LASTC, K, ONE, V, LDV, WORK, LDWORK )
*
* C1 := C1 - W**T
*
DO 30 J = 1, K
DO 20 I = 1, LASTC
C( J, I ) = C( J, I ) - WORK( I, J )
20 CONTINUE
30 CONTINUE
*
ELSE IF( LSAME( SIDE, 'R' ) ) THEN
*
* Form C * H or C * H**T where C = ( C1 C2 )
*
LASTV = MAX( K, ILADLR( N, K, V, LDV ) )
LASTC = ILADLR( M, LASTV, C, LDC )
*
* W := C * V = (C1*V1 + C2*V2) (stored in WORK)
*
* W := C1
*
DO 40 J = 1, K
CALL DCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 )
40 CONTINUE
*
* W := W * V1
*
CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit',
$ LASTC, K, ONE, V, LDV, WORK, LDWORK )
IF( LASTV.GT.K ) THEN
*
* W := W + C2 * V2
*
CALL DGEMM( 'No transpose', 'No transpose',
$ LASTC, K, LASTV-K,
$ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV,
$ ONE, WORK, LDWORK )
END IF
*
* W := W * T or W * T**T
*
CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit',
$ LASTC, K, ONE, T, LDT, WORK, LDWORK )
*
* C := C - W * V**T
*
IF( LASTV.GT.K ) THEN
*
* C2 := C2 - W * V2**T
*
CALL DGEMM( 'No transpose', 'Transpose',
$ LASTC, LASTV-K, K,
$ -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE,
$ C( 1, K+1 ), LDC )
END IF
*
* W := W * V1**T
*
CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit',
$ LASTC, K, ONE, V, LDV, WORK, LDWORK )
*
* C1 := C1 - W
*
DO 60 J = 1, K
DO 50 I = 1, LASTC
C( I, J ) = C( I, J ) - WORK( I, J )
50 CONTINUE
60 CONTINUE
END IF
*
ELSE
*
* Let V = ( V1 )
* ( V2 ) (last K rows)
* where V2 is unit upper triangular.
*
IF( LSAME( SIDE, 'L' ) ) THEN
*
* Form H * C or H**T * C where C = ( C1 )
* ( C2 )
*
LASTV = MAX( K, ILADLR( M, K, V, LDV ) )
LASTC = ILADLC( LASTV, N, C, LDC )
*
* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK)
*
* W := C2**T
*
DO 70 J = 1, K
CALL DCOPY( LASTC, C( LASTV-K+J, 1 ), LDC,
$ WORK( 1, J ), 1 )
70 CONTINUE
*
* W := W * V2
*
CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit',
$ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV,
$ WORK, LDWORK )
IF( LASTV.GT.K ) THEN
*
* W := W + C1**T*V1
*
CALL DGEMM( 'Transpose', 'No transpose',
$ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV,
$ ONE, WORK, LDWORK )
END IF
*
* W := W * T**T or W * T
*
CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit',
$ LASTC, K, ONE, T, LDT, WORK, LDWORK )
*
* C := C - V * W**T
*
IF( LASTV.GT.K ) THEN
*
* C1 := C1 - V1 * W**T
*
CALL DGEMM( 'No transpose', 'Transpose',
$ LASTV-K, LASTC, K, -ONE, V, LDV, WORK, LDWORK,
$ ONE, C, LDC )
END IF
*
* W := W * V2**T
*
CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit',
$ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV,
$ WORK, LDWORK )
*
* C2 := C2 - W**T
*
DO 90 J = 1, K
DO 80 I = 1, LASTC
C( LASTV-K+J, I ) = C( LASTV-K+J, I ) - WORK(I, J)
80 CONTINUE
90 CONTINUE
*
ELSE IF( LSAME( SIDE, 'R' ) ) THEN
*
* Form C * H or C * H**T where C = ( C1 C2 )
*
LASTV = MAX( K, ILADLR( N, K, V, LDV ) )
LASTC = ILADLR( M, LASTV, C, LDC )
*
* W := C * V = (C1*V1 + C2*V2) (stored in WORK)
*
* W := C2
*
DO 100 J = 1, K
CALL DCOPY( LASTC, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
100 CONTINUE
*
* W := W * V2
*
CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit',
$ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV,
$ WORK, LDWORK )
IF( LASTV.GT.K ) THEN
*
* W := W + C1 * V1
*
CALL DGEMM( 'No transpose', 'No transpose',
$ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV,
$ ONE, WORK, LDWORK )
END IF
*
* W := W * T or W * T**T
*
CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit',
$ LASTC, K, ONE, T, LDT, WORK, LDWORK )
*
* C := C - W * V**T
*
IF( LASTV.GT.K ) THEN
*
* C1 := C1 - W * V1**T
*
CALL DGEMM( 'No transpose', 'Transpose',
$ LASTC, LASTV-K, K, -ONE, WORK, LDWORK, V, LDV,
$ ONE, C, LDC )
END IF
*
* W := W * V2**T
*
CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit',
$ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV,
$ WORK, LDWORK )
*
* C2 := C2 - W
*
DO 120 J = 1, K
DO 110 I = 1, LASTC
C( I, LASTV-K+J ) = C( I, LASTV-K+J ) - WORK(I, J)
110 CONTINUE
120 CONTINUE
END IF
END IF
*
ELSE IF( LSAME( STOREV, 'R' ) ) THEN
*
IF( LSAME( DIRECT, 'F' ) ) THEN
*
* Let V = ( V1 V2 ) (V1: first K columns)
* where V1 is unit upper triangular.
*
IF( LSAME( SIDE, 'L' ) ) THEN
*
* Form H * C or H**T * C where C = ( C1 )
* ( C2 )
*
LASTV = MAX( K, ILADLC( K, M, V, LDV ) )
LASTC = ILADLC( LASTV, N, C, LDC )
*
* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK)
*
* W := C1**T
*
DO 130 J = 1, K
CALL DCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 )
130 CONTINUE
*
* W := W * V1**T
*
CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit',
$ LASTC, K, ONE, V, LDV, WORK, LDWORK )
IF( LASTV.GT.K ) THEN
*
* W := W + C2**T*V2**T
*
CALL DGEMM( 'Transpose', 'Transpose',
$ LASTC, K, LASTV-K,
$ ONE, C( K+1, 1 ), LDC, V( 1, K+1 ), LDV,
$ ONE, WORK, LDWORK )
END IF
*
* W := W * T**T or W * T
*
CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit',
$ LASTC, K, ONE, T, LDT, WORK, LDWORK )
*
* C := C - V**T * W**T
*
IF( LASTV.GT.K ) THEN
*
* C2 := C2 - V2**T * W**T
*
CALL DGEMM( 'Transpose', 'Transpose',
$ LASTV-K, LASTC, K,
$ -ONE, V( 1, K+1 ), LDV, WORK, LDWORK,
$ ONE, C( K+1, 1 ), LDC )
END IF
*
* W := W * V1
*
CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit',
$ LASTC, K, ONE, V, LDV, WORK, LDWORK )
*
* C1 := C1 - W**T
*
DO 150 J = 1, K
DO 140 I = 1, LASTC
C( J, I ) = C( J, I ) - WORK( I, J )
140 CONTINUE
150 CONTINUE
*
ELSE IF( LSAME( SIDE, 'R' ) ) THEN
*
* Form C * H or C * H**T where C = ( C1 C2 )
*
LASTV = MAX( K, ILADLC( K, N, V, LDV ) )
LASTC = ILADLR( M, LASTV, C, LDC )
*
* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK)
*
* W := C1
*
DO 160 J = 1, K
CALL DCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 )
160 CONTINUE
*
* W := W * V1**T
*
CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit',
$ LASTC, K, ONE, V, LDV, WORK, LDWORK )
IF( LASTV.GT.K ) THEN
*
* W := W + C2 * V2**T
*
CALL DGEMM( 'No transpose', 'Transpose',
$ LASTC, K, LASTV-K,
$ ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV,
$ ONE, WORK, LDWORK )
END IF
*
* W := W * T or W * T**T
*
CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit',
$ LASTC, K, ONE, T, LDT, WORK, LDWORK )
*
* C := C - W * V
*
IF( LASTV.GT.K ) THEN
*
* C2 := C2 - W * V2
*
CALL DGEMM( 'No transpose', 'No transpose',
$ LASTC, LASTV-K, K,
$ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV,
$ ONE, C( 1, K+1 ), LDC )
END IF
*
* W := W * V1
*
CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit',
$ LASTC, K, ONE, V, LDV, WORK, LDWORK )
*
* C1 := C1 - W
*
DO 180 J = 1, K
DO 170 I = 1, LASTC
C( I, J ) = C( I, J ) - WORK( I, J )
170 CONTINUE
180 CONTINUE
*
END IF
*
ELSE
*
* Let V = ( V1 V2 ) (V2: last K columns)
* where V2 is unit lower triangular.
*
IF( LSAME( SIDE, 'L' ) ) THEN
*
* Form H * C or H**T * C where C = ( C1 )
* ( C2 )
*
LASTV = MAX( K, ILADLC( K, M, V, LDV ) )
LASTC = ILADLC( LASTV, N, C, LDC )
*
* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK)
*
* W := C2**T
*
DO 190 J = 1, K
CALL DCOPY( LASTC, C( LASTV-K+J, 1 ), LDC,
$ WORK( 1, J ), 1 )
190 CONTINUE
*
* W := W * V2**T
*
CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit',
$ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV,
$ WORK, LDWORK )
IF( LASTV.GT.K ) THEN
*
* W := W + C1**T * V1**T
*
CALL DGEMM( 'Transpose', 'Transpose',
$ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV,
$ ONE, WORK, LDWORK )
END IF
*
* W := W * T**T or W * T
*
CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit',
$ LASTC, K, ONE, T, LDT, WORK, LDWORK )
*
* C := C - V**T * W**T
*
IF( LASTV.GT.K ) THEN
*
* C1 := C1 - V1**T * W**T
*
CALL DGEMM( 'Transpose', 'Transpose',
$ LASTV-K, LASTC, K, -ONE, V, LDV, WORK, LDWORK,
$ ONE, C, LDC )
END IF
*
* W := W * V2
*
CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit',
$ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV,
$ WORK, LDWORK )
*
* C2 := C2 - W**T
*
DO 210 J = 1, K
DO 200 I = 1, LASTC
C( LASTV-K+J, I ) = C( LASTV-K+J, I ) - WORK(I, J)
200 CONTINUE
210 CONTINUE
*
ELSE IF( LSAME( SIDE, 'R' ) ) THEN
*
* Form C * H or C * H**T where C = ( C1 C2 )
*
LASTV = MAX( K, ILADLC( K, N, V, LDV ) )
LASTC = ILADLR( M, LASTV, C, LDC )
*
* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK)
*
* W := C2
*
DO 220 J = 1, K
CALL DCOPY( LASTC, C( 1, LASTV-K+J ), 1,
$ WORK( 1, J ), 1 )
220 CONTINUE
*
* W := W * V2**T
*
CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit',
$ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV,
$ WORK, LDWORK )
IF( LASTV.GT.K ) THEN
*
* W := W + C1 * V1**T
*
CALL DGEMM( 'No transpose', 'Transpose',
$ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV,
$ ONE, WORK, LDWORK )
END IF
*
* W := W * T or W * T**T
*
CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit',
$ LASTC, K, ONE, T, LDT, WORK, LDWORK )
*
* C := C - W * V
*
IF( LASTV.GT.K ) THEN
*
* C1 := C1 - W * V1
*
CALL DGEMM( 'No transpose', 'No transpose',
$ LASTC, LASTV-K, K, -ONE, WORK, LDWORK, V, LDV,
$ ONE, C, LDC )
END IF
*
* W := W * V2
*
CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit',
$ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV,
$ WORK, LDWORK )
*
* C1 := C1 - W
*
DO 240 J = 1, K
DO 230 I = 1, LASTC
C( I, LASTV-K+J ) = C( I, LASTV-K+J ) - WORK(I, J)
230 CONTINUE
240 CONTINUE
*
END IF
*
END IF
END IF
*
RETURN
*
* End of DLARFB
*
END
SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU )
*
* -- LAPACK auxiliary routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
INTEGER INCX, N
DOUBLE PRECISION ALPHA, TAU
* ..
* .. Array Arguments ..
DOUBLE PRECISION X( * )
* ..
*
* Purpose
* =======
*
* DLARFG generates a real elementary reflector H of order n, such
* that
*
* H * ( alpha ) = ( beta ), H**T * H = I.
* ( x ) ( 0 )
*
* where alpha and beta are scalars, and x is an (n-1)-element real
* vector. H is represented in the form
*
* H = I - tau * ( 1 ) * ( 1 v**T ) ,
* ( v )
*
* where tau is a real scalar and v is a real (n-1)-element
* vector.
*
* If the elements of x are all zero, then tau = 0 and H is taken to be
* the unit matrix.
*
* Otherwise 1 <= tau <= 2.
*
* Arguments
* =========
*
* N (input) INTEGER
* The order of the elementary reflector.
*
* ALPHA (input/output) DOUBLE PRECISION
* On entry, the value alpha.
* On exit, it is overwritten with the value beta.
*
* X (input/output) DOUBLE PRECISION array, dimension
* (1+(N-2)*abs(INCX))
* On entry, the vector x.
* On exit, it is overwritten with the vector v.
*
* INCX (input) INTEGER
* The increment between elements of X. INCX > 0.
*
* TAU (output) DOUBLE PRECISION
* The value tau.
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
* ..
* .. Local Scalars ..
INTEGER J, KNT
DOUBLE PRECISION BETA, RSAFMN, SAFMIN, XNORM
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH, DLAPY2, DNRM2
EXTERNAL DLAMCH, DLAPY2, DNRM2
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, SIGN
* ..
* .. External Subroutines ..
EXTERNAL DSCAL
* ..
* .. Executable Statements ..
*
IF( N.LE.1 ) THEN
TAU = ZERO
RETURN
END IF
*
XNORM = DNRM2( N-1, X, INCX )
*
IF( XNORM.EQ.ZERO ) THEN
*
* H = I
*
TAU = ZERO
ELSE
*
* general case
*
BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA )
SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' )
KNT = 0
IF( ABS( BETA ).LT.SAFMIN ) THEN
*
* XNORM, BETA may be inaccurate; scale X and recompute them
*
RSAFMN = ONE / SAFMIN
10 CONTINUE
KNT = KNT + 1
CALL DSCAL( N-1, RSAFMN, X, INCX )
BETA = BETA*RSAFMN
ALPHA = ALPHA*RSAFMN
IF( ABS( BETA ).LT.SAFMIN )
$ GO TO 10
*
* New BETA is at most 1, at least SAFMIN
*
XNORM = DNRM2( N-1, X, INCX )
BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA )
END IF
TAU = ( BETA-ALPHA ) / BETA
CALL DSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX )
*
* If ALPHA is subnormal, it may lose relative accuracy
*
DO 20 J = 1, KNT
BETA = BETA*SAFMIN
20 CONTINUE
ALPHA = BETA
END IF
*
RETURN
*
* End of DLARFG
*
END
SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
IMPLICIT NONE
*
* -- LAPACK auxiliary routine (version 3.3.1) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* -- April 2011 --
*
* .. Scalar Arguments ..
CHARACTER DIRECT, STOREV
INTEGER K, LDT, LDV, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * )
* ..
*
* Purpose
* =======
*
* DLARFT forms the triangular factor T of a real block reflector H
* of order n, which is defined as a product of k elementary reflectors.
*
* If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
*
* If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
*
* If STOREV = 'C', the vector which defines the elementary reflector
* H(i) is stored in the i-th column of the array V, and
*
* H = I - V * T * V**T
*
* If STOREV = 'R', the vector which defines the elementary reflector
* H(i) is stored in the i-th row of the array V, and
*
* H = I - V**T * T * V
*
* Arguments
* =========
*
* DIRECT (input) CHARACTER*1
* Specifies the order in which the elementary reflectors are
* multiplied to form the block reflector:
* = 'F': H = H(1) H(2) . . . H(k) (Forward)
* = 'B': H = H(k) . . . H(2) H(1) (Backward)
*
* STOREV (input) CHARACTER*1
* Specifies how the vectors which define the elementary
* reflectors are stored (see also Further Details):
* = 'C': columnwise
* = 'R': rowwise
*
* N (input) INTEGER
* The order of the block reflector H. N >= 0.
*
* K (input) INTEGER
* The order of the triangular factor T (= the number of
* elementary reflectors). K >= 1.
*
* V (input/output) DOUBLE PRECISION array, dimension
* (LDV,K) if STOREV = 'C'
* (LDV,N) if STOREV = 'R'
* The matrix V. See further details.
*
* LDV (input) INTEGER
* The leading dimension of the array V.
* If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
*
* TAU (input) DOUBLE PRECISION array, dimension (K)
* TAU(i) must contain the scalar factor of the elementary
* reflector H(i).
*
* T (output) DOUBLE PRECISION array, dimension (LDT,K)
* The k by k triangular factor T of the block reflector.
* If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
* lower triangular. The rest of the array is not used.
*
* LDT (input) INTEGER
* The leading dimension of the array T. LDT >= K.
*
* Further Details
* ===============
*
* The shape of the matrix V and the storage of the vectors which define
* the H(i) is best illustrated by the following example with n = 5 and
* k = 3. The elements equal to 1 are not stored; the corresponding
* array elements are modified but restored on exit. The rest of the
* array is not used.
*
* DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
*
* V = ( 1 ) V = ( 1 v1 v1 v1 v1 )
* ( v1 1 ) ( 1 v2 v2 v2 )
* ( v1 v2 1 ) ( 1 v3 v3 )
* ( v1 v2 v3 )
* ( v1 v2 v3 )
*
* DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
*
* V = ( v1 v2 v3 ) V = ( v1 v1 1 )
* ( v1 v2 v3 ) ( v2 v2 v2 1 )
* ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
* ( 1 v3 )
* ( 1 )
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ONE, ZERO
PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
* ..
* .. Local Scalars ..
INTEGER I, J, PREVLASTV, LASTV
DOUBLE PRECISION VII
* ..
* .. External Subroutines ..
EXTERNAL DGEMV, DTRMV
* ..
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. Executable Statements ..
*
* Quick return if possible
*
IF( N.EQ.0 )
$ RETURN
*
IF( LSAME( DIRECT, 'F' ) ) THEN
PREVLASTV = N
DO 20 I = 1, K
PREVLASTV = MAX( I, PREVLASTV )
IF( TAU( I ).EQ.ZERO ) THEN
*
* H(i) = I
*
DO 10 J = 1, I
T( J, I ) = ZERO
10 CONTINUE
ELSE
*
* general case
*
VII = V( I, I )
V( I, I ) = ONE
IF( LSAME( STOREV, 'C' ) ) THEN
! Skip any trailing zeros.
DO LASTV = N, I+1, -1
IF( V( LASTV, I ).NE.ZERO ) EXIT
END DO
J = MIN( LASTV, PREVLASTV )
*
* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i)
*
CALL DGEMV( 'Transpose', J-I+1, I-1, -TAU( I ),
$ V( I, 1 ), LDV, V( I, I ), 1, ZERO,
$ T( 1, I ), 1 )
ELSE
! Skip any trailing zeros.
DO LASTV = N, I+1, -1
IF( V( I, LASTV ).NE.ZERO ) EXIT
END DO
J = MIN( LASTV, PREVLASTV )
*
* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T
*
CALL DGEMV( 'No transpose', I-1, J-I+1, -TAU( I ),
$ V( 1, I ), LDV, V( I, I ), LDV, ZERO,
$ T( 1, I ), 1 )
END IF
V( I, I ) = VII
*
* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
*
CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T,
$ LDT, T( 1, I ), 1 )
T( I, I ) = TAU( I )
IF( I.GT.1 ) THEN
PREVLASTV = MAX( PREVLASTV, LASTV )
ELSE
PREVLASTV = LASTV
END IF
END IF
20 CONTINUE
ELSE
PREVLASTV = 1
DO 40 I = K, 1, -1
IF( TAU( I ).EQ.ZERO ) THEN
*
* H(i) = I
*
DO 30 J = I, K
T( J, I ) = ZERO
30 CONTINUE
ELSE
*
* general case
*
IF( I.LT.K ) THEN
IF( LSAME( STOREV, 'C' ) ) THEN
VII = V( N-K+I, I )
V( N-K+I, I ) = ONE
! Skip any leading zeros.
DO LASTV = 1, I-1
IF( V( LASTV, I ).NE.ZERO ) EXIT
END DO
J = MAX( LASTV, PREVLASTV )
*
* T(i+1:k,i) :=
* - tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i)
*
CALL DGEMV( 'Transpose', N-K+I-J+1, K-I, -TAU( I ),
$ V( J, I+1 ), LDV, V( J, I ), 1, ZERO,
$ T( I+1, I ), 1 )
V( N-K+I, I ) = VII
ELSE
VII = V( I, N-K+I )
V( I, N-K+I ) = ONE
! Skip any leading zeros.
DO LASTV = 1, I-1
IF( V( I, LASTV ).NE.ZERO ) EXIT
END DO
J = MAX( LASTV, PREVLASTV )
*
* T(i+1:k,i) :=
* - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T
*
CALL DGEMV( 'No transpose', K-I, N-K+I-J+1,
$ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV,
$ ZERO, T( I+1, I ), 1 )
V( I, N-K+I ) = VII
END IF
*
* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
*
CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
$ T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
IF( I.GT.1 ) THEN
PREVLASTV = MIN( PREVLASTV, LASTV )
ELSE
PREVLASTV = LASTV
END IF
END IF
T( I, I ) = TAU( I )
END IF
40 CONTINUE
END IF
RETURN
*
* End of DLARFT
*
END
SUBROUTINE DLARTG( F, G, CS, SN, R )
*
* -- LAPACK auxiliary routine (version 3.2) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2006
*
* .. Scalar Arguments ..
DOUBLE PRECISION CS, F, G, R, SN
* ..
*
* Purpose
* =======
*
* DLARTG generate a plane rotation so that
*
* [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1.
* [ -SN CS ] [ G ] [ 0 ]
*
* This is a slower, more accurate version of the BLAS1 routine DROTG,
* with the following other differences:
* F and G are unchanged on return.
* If G=0, then CS=1 and SN=0.
* If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any
* floating point operations (saves work in DBDSQR when
* there are zeros on the diagonal).
*
* If F exceeds G in magnitude, CS will be positive.
*
* Arguments
* =========
*
* F (input) DOUBLE PRECISION
* The first component of vector to be rotated.
*
* G (input) DOUBLE PRECISION
* The second component of vector to be rotated.
*
* CS (output) DOUBLE PRECISION
* The cosine of the rotation.
*
* SN (output) DOUBLE PRECISION
* The sine of the rotation.
*
* R (output) DOUBLE PRECISION
* The nonzero component of the rotated vector.
*
* This version has a few statements commented out for thread safety
* (machine parameters are computed on each entry). 10 feb 03, SJH.
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0D0 )
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D0 )
DOUBLE PRECISION TWO
PARAMETER ( TWO = 2.0D0 )
* ..
* .. Local Scalars ..
* LOGICAL FIRST
INTEGER COUNT, I
DOUBLE PRECISION EPS, F1, G1, SAFMIN, SAFMN2, SAFMX2, SCALE
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
EXTERNAL DLAMCH
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, INT, LOG, MAX, SQRT
* ..
* .. Save statement ..
* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2
* ..
* .. Data statements ..
* DATA FIRST / .TRUE. /
* ..
* .. Executable Statements ..
*
* IF( FIRST ) THEN
SAFMIN = DLAMCH( 'S' )
EPS = DLAMCH( 'E' )
SAFMN2 = DLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) /
$ LOG( DLAMCH( 'B' ) ) / TWO )
SAFMX2 = ONE / SAFMN2
* FIRST = .FALSE.
* END IF
IF( G.EQ.ZERO ) THEN
CS = ONE
SN = ZERO
R = F
ELSE IF( F.EQ.ZERO ) THEN
CS = ZERO
SN = ONE
R = G
ELSE
F1 = F
G1 = G
SCALE = MAX( ABS( F1 ), ABS( G1 ) )
IF( SCALE.GE.SAFMX2 ) THEN
COUNT = 0
10 CONTINUE
COUNT = COUNT + 1
F1 = F1*SAFMN2
G1 = G1*SAFMN2
SCALE = MAX( ABS( F1 ), ABS( G1 ) )
IF( SCALE.GE.SAFMX2 )
$ GO TO 10
R = SQRT( F1**2+G1**2 )
CS = F1 / R
SN = G1 / R
DO 20 I = 1, COUNT
R = R*SAFMX2
20 CONTINUE
ELSE IF( SCALE.LE.SAFMN2 ) THEN
COUNT = 0
30 CONTINUE
COUNT = COUNT + 1
F1 = F1*SAFMX2
G1 = G1*SAFMX2
SCALE = MAX( ABS( F1 ), ABS( G1 ) )
IF( SCALE.LE.SAFMN2 )
$ GO TO 30
R = SQRT( F1**2+G1**2 )
CS = F1 / R
SN = G1 / R
DO 40 I = 1, COUNT
R = R*SAFMN2
40 CONTINUE
ELSE
R = SQRT( F1**2+G1**2 )
CS = F1 / R
SN = G1 / R
END IF
IF( ABS( F ).GT.ABS( G ) .AND. CS.LT.ZERO ) THEN
CS = -CS
SN = -SN
R = -R
END IF
END IF
RETURN
*
* End of DLARTG
*
END
SUBROUTINE DLAS2( F, G, H, SSMIN, SSMAX )
*
* -- LAPACK auxiliary routine (version 3.2) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2006
*
* .. Scalar Arguments ..
DOUBLE PRECISION F, G, H, SSMAX, SSMIN
* ..
*
* Purpose
* =======
*
* DLAS2 computes the singular values of the 2-by-2 matrix
* [ F G ]
* [ 0 H ].
* On return, SSMIN is the smaller singular value and SSMAX is the
* larger singular value.
*
* Arguments
* =========
*
* F (input) DOUBLE PRECISION
* The (1,1) element of the 2-by-2 matrix.
*
* G (input) DOUBLE PRECISION
* The (1,2) element of the 2-by-2 matrix.
*
* H (input) DOUBLE PRECISION
* The (2,2) element of the 2-by-2 matrix.
*
* SSMIN (output) DOUBLE PRECISION
* The smaller singular value.
*
* SSMAX (output) DOUBLE PRECISION
* The larger singular value.
*
* Further Details
* ===============
*
* Barring over/underflow, all output quantities are correct to within
* a few units in the last place (ulps), even in the absence of a guard
* digit in addition/subtraction.
*
* In IEEE arithmetic, the code works correctly if one matrix element is
* infinite.
*
* Overflow will not occur unless the largest singular value itself
* overflows, or is within a few ulps of overflow. (On machines with
* partial overflow, like the Cray, overflow may occur if the largest
* singular value is within a factor of 2 of overflow.)
*
* Underflow is harmless if underflow is gradual. Otherwise, results
* may correspond to a matrix modified by perturbations of size near
* the underflow threshold.
*
* ====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0D0 )
DOUBLE PRECISION ONE
PARAMETER ( ONE = 1.0D0 )
DOUBLE PRECISION TWO
PARAMETER ( TWO = 2.0D0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION AS, AT, AU, C, FA, FHMN, FHMX, GA, HA
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
FA = ABS( F )
GA = ABS( G )
HA = ABS( H )
FHMN = MIN( FA, HA )
FHMX = MAX( FA, HA )
IF( FHMN.EQ.ZERO ) THEN
SSMIN = ZERO
IF( FHMX.EQ.ZERO ) THEN
SSMAX = GA
ELSE
SSMAX = MAX( FHMX, GA )*SQRT( ONE+
$ ( MIN( FHMX, GA ) / MAX( FHMX, GA ) )**2 )
END IF
ELSE
IF( GA.LT.FHMX ) THEN
AS = ONE + FHMN / FHMX
AT = ( FHMX-FHMN ) / FHMX
AU = ( GA / FHMX )**2
C = TWO / ( SQRT( AS*AS+AU )+SQRT( AT*AT+AU ) )
SSMIN = FHMN*C
SSMAX = FHMX / C
ELSE
AU = FHMX / GA
IF( AU.EQ.ZERO ) THEN
*
* Avoid possible harmful underflow if exponent range
* asymmetric (true SSMIN may not underflow even if
* AU underflows)
*
SSMIN = ( FHMN*FHMX ) / GA
SSMAX = GA
ELSE
AS = ONE + FHMN / FHMX
AT = ( FHMX-FHMN ) / FHMX
C = ONE / ( SQRT( ONE+( AS*AU )**2 )+
$ SQRT( ONE+( AT*AU )**2 ) )
SSMIN = ( FHMN*C )*AU
SSMIN = SSMIN + SSMIN
SSMAX = GA / ( C+C )
END IF
END IF
END IF
RETURN
*
* End of DLAS2
*
END
SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO )
*
* -- LAPACK auxiliary routine (version 3.3.0) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2010
*
* .. Scalar Arguments ..
CHARACTER TYPE
INTEGER INFO, KL, KU, LDA, M, N
DOUBLE PRECISION CFROM, CTO
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * )
* ..
*
* Purpose
* =======
*
* DLASCL multiplies the M by N real matrix A by the real scalar
* CTO/CFROM. This is done without over/underflow as long as the final
* result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
* A may be full, upper triangular, lower triangular, upper Hessenberg,
* or banded.
*
* Arguments
* =========
*
* TYPE (input) CHARACTER*1
* TYPE indices the storage type of the input matrix.
* = 'G': A is a full matrix.
* = 'L': A is a lower triangular matrix.
* = 'U': A is an upper triangular matrix.
* = 'H': A is an upper Hessenberg matrix.
* = 'B': A is a symmetric band matrix with lower bandwidth KL
* and upper bandwidth KU and with the only the lower
* half stored.
* = 'Q': A is a symmetric band matrix with lower bandwidth KL
* and upper bandwidth KU and with the only the upper
* half stored.
* = 'Z': A is a band matrix with lower bandwidth KL and upper
* bandwidth KU. See DGBTRF for storage details.
*
* KL (input) INTEGER
* The lower bandwidth of A. Referenced only if TYPE = 'B',
* 'Q' or 'Z'.
*
* KU (input) INTEGER
* The upper bandwidth of A. Referenced only if TYPE = 'B',
* 'Q' or 'Z'.
*
* CFROM (input) DOUBLE PRECISION
* CTO (input) DOUBLE PRECISION
* The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
* without over/underflow if the final result CTO*A(I,J)/CFROM
* can be represented without over/underflow. CFROM must be
* nonzero.
*
* M (input) INTEGER
* The number of rows of the matrix A. M >= 0.
*
* N (input) INTEGER
* The number of columns of the matrix A. N >= 0.
*
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
* The matrix to be multiplied by CTO/CFROM. See TYPE for the
* storage type.
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,M).
*
* INFO (output) INTEGER
* 0 - successful exit
* <0 - if INFO = -i, the i-th argument had an illegal value.
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
* ..
* .. Local Scalars ..
LOGICAL DONE
INTEGER I, ITYPE, J, K1, K2, K3, K4
DOUBLE PRECISION BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM
* ..
* .. External Functions ..
LOGICAL LSAME, DISNAN
DOUBLE PRECISION DLAMCH
EXTERNAL LSAME, DLAMCH, DISNAN
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN
* ..
* .. External Subroutines ..
EXTERNAL XERBLA
* ..
* .. Executable Statements ..
*
* Test the input arguments
*
INFO = 0
*
IF( LSAME( TYPE, 'G' ) ) THEN
ITYPE = 0
ELSE IF( LSAME( TYPE, 'L' ) ) THEN
ITYPE = 1
ELSE IF( LSAME( TYPE, 'U' ) ) THEN
ITYPE = 2
ELSE IF( LSAME( TYPE, 'H' ) ) THEN
ITYPE = 3
ELSE IF( LSAME( TYPE, 'B' ) ) THEN
ITYPE = 4
ELSE IF( LSAME( TYPE, 'Q' ) ) THEN
ITYPE = 5
ELSE IF( LSAME( TYPE, 'Z' ) ) THEN
ITYPE = 6
ELSE
ITYPE = -1
END IF
*
IF( ITYPE.EQ.-1 ) THEN
INFO = -1
ELSE IF( CFROM.EQ.ZERO .OR. DISNAN(CFROM) ) THEN
INFO = -4
ELSE IF( DISNAN(CTO) ) THEN
INFO = -5
ELSE IF( M.LT.0 ) THEN
INFO = -6
ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR.
$ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN
INFO = -7
ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN
INFO = -9
ELSE IF( ITYPE.GE.4 ) THEN
IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN
INFO = -2
ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR.
$ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) )
$ THEN
INFO = -3
ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR.
$ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR.
$ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN
INFO = -9
END IF
END IF
*
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'DLASCL', -INFO )
RETURN
END IF
*
* Quick return if possible
*
IF( N.EQ.0 .OR. M.EQ.0 )
$ RETURN
*
* Get machine parameters
*
SMLNUM = DLAMCH( 'S' )
BIGNUM = ONE / SMLNUM
*
CFROMC = CFROM
CTOC = CTO
*
10 CONTINUE
CFROM1 = CFROMC*SMLNUM
IF( CFROM1.EQ.CFROMC ) THEN
! CFROMC is an inf. Multiply by a correctly signed zero for
! finite CTOC, or a NaN if CTOC is infinite.
MUL = CTOC / CFROMC
DONE = .TRUE.
CTO1 = CTOC
ELSE
CTO1 = CTOC / BIGNUM
IF( CTO1.EQ.CTOC ) THEN
! CTOC is either 0 or an inf. In both cases, CTOC itself
! serves as the correct multiplication factor.
MUL = CTOC
DONE = .TRUE.
CFROMC = ONE
ELSE IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN
MUL = SMLNUM
DONE = .FALSE.
CFROMC = CFROM1
ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN
MUL = BIGNUM
DONE = .FALSE.
CTOC = CTO1
ELSE
MUL = CTOC / CFROMC
DONE = .TRUE.
END IF
END IF
*
IF( ITYPE.EQ.0 ) THEN
*
* Full matrix
*
DO 30 J = 1, N
DO 20 I = 1, M
A( I, J ) = A( I, J )*MUL
20 CONTINUE
30 CONTINUE
*
ELSE IF( ITYPE.EQ.1 ) THEN
*
* Lower triangular matrix
*
DO 50 J = 1, N
DO 40 I = J, M
A( I, J ) = A( I, J )*MUL
40 CONTINUE
50 CONTINUE
*
ELSE IF( ITYPE.EQ.2 ) THEN
*
* Upper triangular matrix
*
DO 70 J = 1, N
DO 60 I = 1, MIN( J, M )
A( I, J ) = A( I, J )*MUL
60 CONTINUE
70 CONTINUE
*
ELSE IF( ITYPE.EQ.3 ) THEN
*
* Upper Hessenberg matrix
*
DO 90 J = 1, N
DO 80 I = 1, MIN( J+1, M )
A( I, J ) = A( I, J )*MUL
80 CONTINUE
90 CONTINUE
*
ELSE IF( ITYPE.EQ.4 ) THEN
*
* Lower half of a symmetric band matrix
*
K3 = KL + 1
K4 = N + 1
DO 110 J = 1, N
DO 100 I = 1, MIN( K3, K4-J )
A( I, J ) = A( I, J )*MUL
100 CONTINUE
110 CONTINUE
*
ELSE IF( ITYPE.EQ.5 ) THEN
*
* Upper half of a symmetric band matrix
*
K1 = KU + 2
K3 = KU + 1
DO 130 J = 1, N
DO 120 I = MAX( K1-J, 1 ), K3
A( I, J ) = A( I, J )*MUL
120 CONTINUE
130 CONTINUE
*
ELSE IF( ITYPE.EQ.6 ) THEN
*
* Band matrix
*
K1 = KL + KU + 2
K2 = KL + 1
K3 = 2*KL + KU + 1
K4 = KL + KU + 1 + M
DO 150 J = 1, N
DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J )
A( I, J ) = A( I, J )*MUL
140 CONTINUE
150 CONTINUE
*
END IF
*
IF( .NOT.DONE )
$ GO TO 10
*
RETURN
*
* End of DLASCL
*
END
SUBROUTINE DLASET( UPLO, M, N, ALPHA, BETA, A, LDA )
*
* -- LAPACK auxiliary routine (version 3.2) --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2006
*
* .. Scalar Arguments ..
CHARACTER UPLO
INTEGER LDA, M, N
DOUBLE PRECISION ALPHA, BETA
* ..
* .. Array Arguments ..
DOUBLE PRECISION A( LDA, * )
* ..
*
* Purpose
* =======
*
* DLASET initializes an m-by-n matrix A to BETA on the diagonal and
* ALPHA on the offdiagonals.
*
* Arguments
* =========
*
* UPLO (input) CHARACTER*1
* Specifies the part of the matrix A to be set.
* = 'U': Upper triangular part is set; the strictly lower
* triangular part of A is not changed.
* = 'L': Lower triangular part is set; the strictly upper
* triangular part of A is not changed.
* Otherwise: All of the matrix A is set.
*
* M (input) INTEGER
* The number of rows of the matrix A. M >= 0.
*
* N (input) INTEGER
* The number of columns of the matrix A. N >= 0.
*
* ALPHA (input) DOUBLE PRECISION
* The constant to which the offdiagonal elements are to be set.
*
* BETA (input) DOUBLE PRECISION
* The constant to which the diagonal elements are to be set.
*
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
* On exit, the leading m-by-n submatrix of A is set as follows:
*
* if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n,
* if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n,
* otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j,
*
* and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n).
*
* LDA (input) INTEGER
* The leading dimension of the array A. LDA >= max(1,M).
*
* =====================================================================
*
* .. Local Scalars ..
INTEGER I, J
* ..
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. Intrinsic Functions ..
INTRINSIC MIN
* ..
* .. Executable Statements ..
*
IF( LSAME( UPLO, 'U' ) ) THEN
*
* Set the strictly upper triangular or trapezoidal part of the
* array to ALPHA.
*
DO 20 J = 2, N
DO 10 I = 1, MIN( J-1, M )
A( I, J ) = ALPHA
10 CONTINUE
20 CONTINUE
*
ELSE IF( LSAME( UPLO, 'L' ) ) THEN
*
* Set the strictly lower triangular or trapezoidal part of the
* array to ALPHA.
*
DO 40 J = 1, MIN( M, N )
DO 30 I = J + 1, M
A( I, J ) = ALPHA
30 CONTINUE
40 CONTINUE
*
ELSE
*
* Set the leading m-by-n submatrix to ALPHA.
*
DO 60 J = 1, N
DO 50 I = 1, M
A( I, J ) = ALPHA
50 CONTINUE
60 CONTINUE
END IF
*
* Set the first min(M,N) diagonal elements to BETA.
*
DO 70 I = 1, MIN( M, N )
A( I, I ) = BETA
70 CONTINUE
*
RETURN
*
* End of DLASET
*
END
SUBROUTINE DLASQ1( N, D, E, WORK, INFO )
*
* -- LAPACK routine (version 3.2) --
*
* -- Contributed by Osni Marques of the Lawrence Berkeley National --
* -- Laboratory and Beresford Parlett of the Univ. of California at --
* -- Berkeley --
* -- November 2008 --
*
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION D( * ), E( * ), WORK( * )
* ..
*
* Purpose
* =======
*
* DLASQ1 computes the singular values of a real N-by-N bidiagonal
* matrix with diagonal D and off-diagonal E. The singular values
* are computed to high relative accuracy, in the absence of
* denormalization, underflow and overflow. The algorithm was first
* presented in
*
* "Accurate singular values and differential qd algorithms" by K. V.
* Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230,
* 1994,
*
* and the present implementation is described in "An implementation of
* the dqds Algorithm (Positive Case)", LAPACK Working Note.
*
* Arguments
* =========
*
* N (input) INTEGER
* The number of rows and columns in the matrix. N >= 0.
*
* D (input/output) DOUBLE PRECISION array, dimension (N)
* On entry, D contains the diagonal elements of the
* bidiagonal matrix whose SVD is desired. On normal exit,
* D contains the singular values in decreasing order.
*
* E (input/output) DOUBLE PRECISION array, dimension (N)
* On entry, elements E(1:N-1) contain the off-diagonal elements
* of the bidiagonal matrix whose SVD is desired.
* On exit, E is overwritten.
*
* WORK (workspace) DOUBLE PRECISION array, dimension (4*N)
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if INFO = -i, the i-th argument had an illegal value
* > 0: the algorithm failed
* = 1, a split was marked by a positive value in E
* = 2, current block of Z not diagonalized after 30*N
* iterations (in inner while loop)
* = 3, termination criterion of outer while loop not met
* (program created more than N unreduced blocks)
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO
PARAMETER ( ZERO = 0.0D0 )
* ..
* .. Local Scalars ..
INTEGER I, IINFO
DOUBLE PRECISION EPS, SCALE, SAFMIN, SIGMN, SIGMX
* ..
* .. External Subroutines ..
EXTERNAL DCOPY, DLAS2, DLASCL, DLASQ2, DLASRT, XERBLA
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
EXTERNAL DLAMCH
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, SQRT
* ..
* .. Executable Statements ..
*
INFO = 0
IF( N.LT.0 ) THEN
INFO = -2
CALL XERBLA( 'DLASQ1', -INFO )
RETURN
ELSE IF( N.EQ.0 ) THEN
RETURN
ELSE IF( N.EQ.1 ) THEN
D( 1 ) = ABS( D( 1 ) )
RETURN
ELSE IF( N.EQ.2 ) THEN
CALL DLAS2( D( 1 ), E( 1 ), D( 2 ), SIGMN, SIGMX )
D( 1 ) = SIGMX
D( 2 ) = SIGMN
RETURN
END IF
*
* Estimate the largest singular value.
*
SIGMX = ZERO
DO 10 I = 1, N - 1
D( I ) = ABS( D( I ) )
SIGMX = MAX( SIGMX, ABS( E( I ) ) )
10 CONTINUE
D( N ) = ABS( D( N ) )
*
* Early return if SIGMX is zero (matrix is already diagonal).
*
IF( SIGMX.EQ.ZERO ) THEN
CALL DLASRT( 'D', N, D, IINFO )
RETURN
END IF
*
DO 20 I = 1, N
SIGMX = MAX( SIGMX, D( I ) )
20 CONTINUE
*
* Copy D and E into WORK (in the Z format) and scale (squaring the
* input data makes scaling by a power of the radix pointless).
*
EPS = DLAMCH( 'Precision' )
SAFMIN = DLAMCH( 'Safe minimum' )
SCALE = SQRT( EPS / SAFMIN )
CALL DCOPY( N, D, 1, WORK( 1 ), 2 )
CALL DCOPY( N-1, E, 1, WORK( 2 ), 2 )
CALL DLASCL( 'G', 0, 0, SIGMX, SCALE, 2*N-1, 1, WORK, 2*N-1,
$ IINFO )
*
* Compute the q's and e's.
*
DO 30 I = 1, 2*N - 1
WORK( I ) = WORK( I )**2
30 CONTINUE
WORK( 2*N ) = ZERO
*
CALL DLASQ2( N, WORK, INFO )
*
IF( INFO.EQ.0 ) THEN
DO 40 I = 1, N
D( I ) = SQRT( WORK( I ) )
40 CONTINUE
CALL DLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, D, N, IINFO )
END IF
*
RETURN
*
* End of DLASQ1
*
END
SUBROUTINE DLASQ2( N, Z, INFO )
*
* -- LAPACK routine (version 3.2) --
*
* -- Contributed by Osni Marques of the Lawrence Berkeley National --
* -- Laboratory and Beresford Parlett of the Univ. of California at --
* -- Berkeley --
* -- November 2008 --
*
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INFO, N
* ..
* .. Array Arguments ..
DOUBLE PRECISION Z( * )
* ..
*
* Purpose
* =======
*
* DLASQ2 computes all the eigenvalues of the symmetric positive
* definite tridiagonal matrix associated with the qd array Z to high
* relative accuracy are computed to high relative accuracy, in the
* absence of denormalization, underflow and overflow.
*
* To see the relation of Z to the tridiagonal matrix, let L be a
* unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and
* let U be an upper bidiagonal matrix with 1's above and diagonal
* Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
* symmetric tridiagonal to which it is similar.
*
* Note : DLASQ2 defines a logical variable, IEEE, which is true
* on machines which follow ieee-754 floating-point standard in their
* handling of infinities and NaNs, and false otherwise. This variable
* is passed to DLASQ3.
*
* Arguments
* =========
*
* N (input) INTEGER
* The number of rows and columns in the matrix. N >= 0.
*
* Z (input/output) DOUBLE PRECISION array, dimension ( 4*N )
* On entry Z holds the qd array. On exit, entries 1 to N hold
* the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
* trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
* N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 )
* holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of
* shifts that failed.
*
* INFO (output) INTEGER
* = 0: successful exit
* < 0: if the i-th argument is a scalar and had an illegal
* value, then INFO = -i, if the i-th argument is an
* array and the j-entry had an illegal value, then
* INFO = -(i*100+j)
* > 0: the algorithm failed
* = 1, a split was marked by a positive value in E
* = 2, current block of Z not diagonalized after 30*N
* iterations (in inner while loop)
* = 3, termination criterion of outer while loop not met
* (program created more than N unreduced blocks)
*
* Further Details
* ===============
* Local Variables: I0:N0 defines a current unreduced segment of Z.
* The shifts are accumulated in SIGMA. Iteration count is in ITER.
* Ping-pong is controlled by PP (alternates between 0 and 1).
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION CBIAS
PARAMETER ( CBIAS = 1.50D0 )
DOUBLE PRECISION ZERO, HALF, ONE, TWO, FOUR, HUNDRD
PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0,
$ TWO = 2.0D0, FOUR = 4.0D0, HUNDRD = 100.0D0 )
* ..
* .. Local Scalars ..
LOGICAL IEEE
INTEGER I0, I4, IINFO, IPN4, ITER, IWHILA, IWHILB, K,
$ KMIN, N0, NBIG, NDIV, NFAIL, PP, SPLT, TTYPE
DOUBLE PRECISION D, DEE, DEEMIN, DESIG, DMIN, DMIN1, DMIN2, DN,
$ DN1, DN2, E, EMAX, EMIN, EPS, G, OLDEMN, QMAX,
$ QMIN, S, SAFMIN, SIGMA, T, TAU, TEMP, TOL,
$ TOL2, TRACE, ZMAX
* ..
* .. External Subroutines ..
EXTERNAL DLASQ3, DLASRT, XERBLA
* ..
* .. External Functions ..
INTEGER ILAENV
DOUBLE PRECISION DLAMCH
EXTERNAL DLAMCH, ILAENV
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, DBLE, MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
* Test the input arguments.
* (in case DLASQ2 is not called by DLASQ1)
*
INFO = 0
EPS = DLAMCH( 'Precision' )
SAFMIN = DLAMCH( 'Safe minimum' )
TOL = EPS*HUNDRD
TOL2 = TOL**2
*
IF( N.LT.0 ) THEN
INFO = -1
CALL XERBLA( 'DLASQ2', 1 )
RETURN
ELSE IF( N.EQ.0 ) THEN
RETURN
ELSE IF( N.EQ.1 ) THEN
*
* 1-by-1 case.
*
IF( Z( 1 ).LT.ZERO ) THEN
INFO = -201
CALL XERBLA( 'DLASQ2', 2 )
END IF
RETURN
ELSE IF( N.EQ.2 ) THEN
*
* 2-by-2 case.
*
IF( Z( 2 ).LT.ZERO .OR. Z( 3 ).LT.ZERO ) THEN
INFO = -2
CALL XERBLA( 'DLASQ2', 2 )
RETURN
ELSE IF( Z( 3 ).GT.Z( 1 ) ) THEN
D = Z( 3 )
Z( 3 ) = Z( 1 )
Z( 1 ) = D
END IF
Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 )
IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN
T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) )
S = Z( 3 )*( Z( 2 ) / T )
IF( S.LE.T ) THEN
S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) )
ELSE
S = Z( 3 )*( Z( 2 ) / ( T+SQRT( T )*SQRT( T+S ) ) )
END IF
T = Z( 1 ) + ( S+Z( 2 ) )
Z( 3 ) = Z( 3 )*( Z( 1 ) / T )
Z( 1 ) = T
END IF
Z( 2 ) = Z( 3 )
Z( 6 ) = Z( 2 ) + Z( 1 )
RETURN
END IF
*
* Check for negative data and compute sums of q's and e's.
*
Z( 2*N ) = ZERO
EMIN = Z( 2 )
QMAX = ZERO
ZMAX = ZERO
D = ZERO
E = ZERO
*
DO 10 K = 1, 2*( N-1 ), 2
IF( Z( K ).LT.ZERO ) THEN
INFO = -( 200+K )
CALL XERBLA( 'DLASQ2', 2 )
RETURN
ELSE IF( Z( K+1 ).LT.ZERO ) THEN
INFO = -( 200+K+1 )
CALL XERBLA( 'DLASQ2', 2 )
RETURN
END IF
D = D + Z( K )
E = E + Z( K+1 )
QMAX = MAX( QMAX, Z( K ) )
EMIN = MIN( EMIN, Z( K+1 ) )
ZMAX = MAX( QMAX, ZMAX, Z( K+1 ) )
10 CONTINUE
IF( Z( 2*N-1 ).LT.ZERO ) THEN
INFO = -( 200+2*N-1 )
CALL XERBLA( 'DLASQ2', 2 )
RETURN
END IF
D = D + Z( 2*N-1 )
QMAX = MAX( QMAX, Z( 2*N-1 ) )
ZMAX = MAX( QMAX, ZMAX )
*
* Check for diagonality.
*
IF( E.EQ.ZERO ) THEN
DO 20 K = 2, N
Z( K ) = Z( 2*K-1 )
20 CONTINUE
CALL DLASRT( 'D', N, Z, IINFO )
Z( 2*N-1 ) = D
RETURN
END IF
*
TRACE = D + E
*
* Check for zero data.
*
IF( TRACE.EQ.ZERO ) THEN
Z( 2*N-1 ) = ZERO
RETURN
END IF
*
* Check whether the machine is IEEE conformable.
*
IEEE = ILAENV( 10, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND.
$ ILAENV( 11, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1
*
* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...).
*
DO 30 K = 2*N, 2, -2
Z( 2*K ) = ZERO
Z( 2*K-1 ) = Z( K )
Z( 2*K-2 ) = ZERO
Z( 2*K-3 ) = Z( K-1 )
30 CONTINUE
*
I0 = 1
N0 = N
*
* Reverse the qd-array, if warranted.
*
IF( CBIAS*Z( 4*I0-3 ).LT.Z( 4*N0-3 ) ) THEN
IPN4 = 4*( I0+N0 )
DO 40 I4 = 4*I0, 2*( I0+N0-1 ), 4
TEMP = Z( I4-3 )
Z( I4-3 ) = Z( IPN4-I4-3 )
Z( IPN4-I4-3 ) = TEMP
TEMP = Z( I4-1 )
Z( I4-1 ) = Z( IPN4-I4-5 )
Z( IPN4-I4-5 ) = TEMP
40 CONTINUE
END IF
*
* Initial split checking via dqd and Li's test.
*
PP = 0
*
DO 80 K = 1, 2
*
D = Z( 4*N0+PP-3 )
DO 50 I4 = 4*( N0-1 ) + PP, 4*I0 + PP, -4
IF( Z( I4-1 ).LE.TOL2*D ) THEN
Z( I4-1 ) = -ZERO
D = Z( I4-3 )
ELSE
D = Z( I4-3 )*( D / ( D+Z( I4-1 ) ) )
END IF
50 CONTINUE
*
* dqd maps Z to ZZ plus Li's test.
*
EMIN = Z( 4*I0+PP+1 )
D = Z( 4*I0+PP-3 )
DO 60 I4 = 4*I0 + PP, 4*( N0-1 ) + PP, 4
Z( I4-2*PP-2 ) = D + Z( I4-1 )
IF( Z( I4-1 ).LE.TOL2*D ) THEN
Z( I4-1 ) = -ZERO
Z( I4-2*PP-2 ) = D
Z( I4-2*PP ) = ZERO
D = Z( I4+1 )
ELSE IF( SAFMIN*Z( I4+1 ).LT.Z( I4-2*PP-2 ) .AND.
$ SAFMIN*Z( I4-2*PP-2 ).LT.Z( I4+1 ) ) THEN
TEMP = Z( I4+1 ) / Z( I4-2*PP-2 )
Z( I4-2*PP ) = Z( I4-1 )*TEMP
D = D*TEMP
ELSE
Z( I4-2*PP ) = Z( I4+1 )*( Z( I4-1 ) / Z( I4-2*PP-2 ) )
D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) )
END IF
EMIN = MIN( EMIN, Z( I4-2*PP ) )
60 CONTINUE
Z( 4*N0-PP-2 ) = D
*
* Now find qmax.
*
QMAX = Z( 4*I0-PP-2 )
DO 70 I4 = 4*I0 - PP + 2, 4*N0 - PP - 2, 4
QMAX = MAX( QMAX, Z( I4 ) )
70 CONTINUE
*
* Prepare for the next iteration on K.
*
PP = 1 - PP
80 CONTINUE
*
* Initialise variables to pass to DLASQ3.
*
TTYPE = 0
DMIN1 = ZERO
DMIN2 = ZERO
DN = ZERO
DN1 = ZERO
DN2 = ZERO
G = ZERO
TAU = ZERO
*
ITER = 2
NFAIL = 0
NDIV = 2*( N0-I0 )
*
DO 160 IWHILA = 1, N + 1
IF( N0.LT.1 )
$ GO TO 170
*
* While array unfinished do
*
* E(N0) holds the value of SIGMA when submatrix in I0:N0
* splits from the rest of the array, but is negated.
*
DESIG = ZERO
IF( N0.EQ.N ) THEN
SIGMA = ZERO
ELSE
SIGMA = -Z( 4*N0-1 )
END IF
IF( SIGMA.LT.ZERO ) THEN
INFO = 1
RETURN
END IF
*
* Find last unreduced submatrix's top index I0, find QMAX and
* EMIN. Find Gershgorin-type bound if Q's much greater than E's.
*
EMAX = ZERO
IF( N0.GT.I0 ) THEN
EMIN = ABS( Z( 4*N0-5 ) )
ELSE
EMIN = ZERO
END IF
QMIN = Z( 4*N0-3 )
QMAX = QMIN
DO 90 I4 = 4*N0, 8, -4
IF( Z( I4-5 ).LE.ZERO )
$ GO TO 100
IF( QMIN.GE.FOUR*EMAX ) THEN
QMIN = MIN( QMIN, Z( I4-3 ) )
EMAX = MAX( EMAX, Z( I4-5 ) )
END IF
QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) )
EMIN = MIN( EMIN, Z( I4-5 ) )
90 CONTINUE
I4 = 4
*
100 CONTINUE
I0 = I4 / 4
PP = 0
*
IF( N0-I0.GT.1 ) THEN
DEE = Z( 4*I0-3 )
DEEMIN = DEE
KMIN = I0
DO 110 I4 = 4*I0+1, 4*N0-3, 4
DEE = Z( I4 )*( DEE /( DEE+Z( I4-2 ) ) )
IF( DEE.LE.DEEMIN ) THEN
DEEMIN = DEE
KMIN = ( I4+3 )/4
END IF
110 CONTINUE
IF( (KMIN-I0)*2.LT.N0-KMIN .AND.
$ DEEMIN.LE.HALF*Z(4*N0-3) ) THEN
IPN4 = 4*( I0+N0 )
PP = 2
DO 120 I4 = 4*I0, 2*( I0+N0-1 ), 4
TEMP = Z( I4-3 )
Z( I4-3 ) = Z( IPN4-I4-3 )
Z( IPN4-I4-3 ) = TEMP
TEMP = Z( I4-2 )
Z( I4-2 ) = Z( IPN4-I4-2 )
Z( IPN4-I4-2 ) = TEMP
TEMP = Z( I4-1 )
Z( I4-1 ) = Z( IPN4-I4-5 )
Z( IPN4-I4-5 ) = TEMP
TEMP = Z( I4 )
Z( I4 ) = Z( IPN4-I4-4 )
Z( IPN4-I4-4 ) = TEMP
120 CONTINUE
END IF
END IF
*
* Put -(initial shift) into DMIN.
*
DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) )
*
* Now I0:N0 is unreduced.
* PP = 0 for ping, PP = 1 for pong.
* PP = 2 indicates that flipping was applied to the Z array and
* and that the tests for deflation upon entry in DLASQ3
* should not be performed.
*
NBIG = 30*( N0-I0+1 )
DO 140 IWHILB = 1, NBIG
IF( I0.GT.N0 )
$ GO TO 150
*
* While submatrix unfinished take a good dqds step.
*
CALL DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL,
$ ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1,
$ DN2, G, TAU )
*
PP = 1 - PP
*
* When EMIN is very small check for splits.
*
IF( PP.EQ.0 .AND. N0-I0.GE.3 ) THEN
IF( Z( 4*N0 ).LE.TOL2*QMAX .OR.
$ Z( 4*N0-1 ).LE.TOL2*SIGMA ) THEN
SPLT = I0 - 1
QMAX = Z( 4*I0-3 )
EMIN = Z( 4*I0-1 )
OLDEMN = Z( 4*I0 )
DO 130 I4 = 4*I0, 4*( N0-3 ), 4
IF( Z( I4 ).LE.TOL2*Z( I4-3 ) .OR.
$ Z( I4-1 ).LE.TOL2*SIGMA ) THEN
Z( I4-1 ) = -SIGMA
SPLT = I4 / 4
QMAX = ZERO
EMIN = Z( I4+3 )
OLDEMN = Z( I4+4 )
ELSE
QMAX = MAX( QMAX, Z( I4+1 ) )
EMIN = MIN( EMIN, Z( I4-1 ) )
OLDEMN = MIN( OLDEMN, Z( I4 ) )
END IF
130 CONTINUE
Z( 4*N0-1 ) = EMIN
Z( 4*N0 ) = OLDEMN
I0 = SPLT + 1
END IF
END IF
*
140 CONTINUE
*
INFO = 2
RETURN
*
* end IWHILB
*
150 CONTINUE
*
160 CONTINUE
*
INFO = 3
RETURN
*
* end IWHILA
*
170 CONTINUE
*
* Move q's to the front.
*
DO 180 K = 2, N
Z( K ) = Z( 4*K-3 )
180 CONTINUE
*
* Sort and compute sum of eigenvalues.
*
CALL DLASRT( 'D', N, Z, IINFO )
*
E = ZERO
DO 190 K = N, 1, -1
E = E + Z( K )
190 CONTINUE
*
* Store trace, sum(eigenvalues) and information on performance.
*
Z( 2*N+1 ) = TRACE
Z( 2*N+2 ) = E
Z( 2*N+3 ) = DBLE( ITER )
Z( 2*N+4 ) = DBLE( NDIV ) / DBLE( N**2 )
Z( 2*N+5 ) = HUNDRD*NFAIL / DBLE( ITER )
RETURN
*
* End of DLASQ2
*
END
SUBROUTINE DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL,
$ ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1,
$ DN2, G, TAU )
*
* -- LAPACK routine (version 3.2.2) --
*
* -- Contributed by Osni Marques of the Lawrence Berkeley National --
* -- Laboratory and Beresford Parlett of the Univ. of California at --
* -- Berkeley --
* -- June 2010 --
*
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
LOGICAL IEEE
INTEGER I0, ITER, N0, NDIV, NFAIL, PP
DOUBLE PRECISION DESIG, DMIN, DMIN1, DMIN2, DN, DN1, DN2, G,
$ QMAX, SIGMA, TAU
* ..
* .. Array Arguments ..
DOUBLE PRECISION Z( * )
* ..
*
* Purpose
* =======
*
* DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds.
* In case of failure it changes shifts, and tries again until output
* is positive.
*
* Arguments
* =========
*
* I0 (input) INTEGER
* First index.
*
* N0 (input/output) INTEGER
* Last index.
*
* Z (input) DOUBLE PRECISION array, dimension ( 4*N )
* Z holds the qd array.
*
* PP (input/output) INTEGER
* PP=0 for ping, PP=1 for pong.
* PP=2 indicates that flipping was applied to the Z array
* and that the initial tests for deflation should not be
* performed.
*
* DMIN (output) DOUBLE PRECISION
* Minimum value of d.
*
* SIGMA (output) DOUBLE PRECISION
* Sum of shifts used in current segment.
*
* DESIG (input/output) DOUBLE PRECISION
* Lower order part of SIGMA
*
* QMAX (input) DOUBLE PRECISION
* Maximum value of q.
*
* NFAIL (output) INTEGER
* Number of times shift was too big.
*
* ITER (output) INTEGER
* Number of iterations.
*
* NDIV (output) INTEGER
* Number of divisions.
*
* IEEE (input) LOGICAL
* Flag for IEEE or non IEEE arithmetic (passed to DLASQ5).
*
* TTYPE (input/output) INTEGER
* Shift type.
*
* DMIN1 (input/output) DOUBLE PRECISION
*
* DMIN2 (input/output) DOUBLE PRECISION
*
* DN (input/output) DOUBLE PRECISION
*
* DN1 (input/output) DOUBLE PRECISION
*
* DN2 (input/output) DOUBLE PRECISION
*
* G (input/output) DOUBLE PRECISION
*
* TAU (input/output) DOUBLE PRECISION
*
* These are passed as arguments in order to save their values
* between calls to DLASQ3.
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION CBIAS
PARAMETER ( CBIAS = 1.50D0 )
DOUBLE PRECISION ZERO, QURTR, HALF, ONE, TWO, HUNDRD
PARAMETER ( ZERO = 0.0D0, QURTR = 0.250D0, HALF = 0.5D0,
$ ONE = 1.0D0, TWO = 2.0D0, HUNDRD = 100.0D0 )
* ..
* .. Local Scalars ..
INTEGER IPN4, J4, N0IN, NN, TTYPE
DOUBLE PRECISION EPS, S, T, TEMP, TOL, TOL2
* ..
* .. External Subroutines ..
EXTERNAL DLASQ4, DLASQ5, DLASQ6
* ..
* .. External Function ..
DOUBLE PRECISION DLAMCH
LOGICAL DISNAN
EXTERNAL DISNAN, DLAMCH
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
N0IN = N0
EPS = DLAMCH( 'Precision' )
TOL = EPS*HUNDRD
TOL2 = TOL**2
*
* Check for deflation.
*
10 CONTINUE
*
IF( N0.LT.I0 )
$ RETURN
IF( N0.EQ.I0 )
$ GO TO 20
NN = 4*N0 + PP
IF( N0.EQ.( I0+1 ) )
$ GO TO 40
*
* Check whether E(N0-1) is negligible, 1 eigenvalue.
*
IF( Z( NN-5 ).GT.TOL2*( SIGMA+Z( NN-3 ) ) .AND.
$ Z( NN-2*PP-4 ).GT.TOL2*Z( NN-7 ) )
$ GO TO 30
*
20 CONTINUE
*
Z( 4*N0-3 ) = Z( 4*N0+PP-3 ) + SIGMA
N0 = N0 - 1
GO TO 10
*
* Check whether E(N0-2) is negligible, 2 eigenvalues.
*
30 CONTINUE
*
IF( Z( NN-9 ).GT.TOL2*SIGMA .AND.
$ Z( NN-2*PP-8 ).GT.TOL2*Z( NN-11 ) )
$ GO TO 50
*
40 CONTINUE
*
IF( Z( NN-3 ).GT.Z( NN-7 ) ) THEN
S = Z( NN-3 )
Z( NN-3 ) = Z( NN-7 )
Z( NN-7 ) = S
END IF
IF( Z( NN-5 ).GT.Z( NN-3 )*TOL2 ) THEN
T = HALF*( ( Z( NN-7 )-Z( NN-3 ) )+Z( NN-5 ) )
S = Z( NN-3 )*( Z( NN-5 ) / T )
IF( S.LE.T ) THEN
S = Z( NN-3 )*( Z( NN-5 ) /
$ ( T*( ONE+SQRT( ONE+S / T ) ) ) )
ELSE
S = Z( NN-3 )*( Z( NN-5 ) / ( T+SQRT( T )*SQRT( T+S ) ) )
END IF
T = Z( NN-7 ) + ( S+Z( NN-5 ) )
Z( NN-3 ) = Z( NN-3 )*( Z( NN-7 ) / T )
Z( NN-7 ) = T
END IF
Z( 4*N0-7 ) = Z( NN-7 ) + SIGMA
Z( 4*N0-3 ) = Z( NN-3 ) + SIGMA
N0 = N0 - 2
GO TO 10
*
50 CONTINUE
IF( PP.EQ.2 )
$ PP = 0
*
* Reverse the qd-array, if warranted.
*
IF( DMIN.LE.ZERO .OR. N0.LT.N0IN ) THEN
IF( CBIAS*Z( 4*I0+PP-3 ).LT.Z( 4*N0+PP-3 ) ) THEN
IPN4 = 4*( I0+N0 )
DO 60 J4 = 4*I0, 2*( I0+N0-1 ), 4
TEMP = Z( J4-3 )
Z( J4-3 ) = Z( IPN4-J4-3 )
Z( IPN4-J4-3 ) = TEMP
TEMP = Z( J4-2 )
Z( J4-2 ) = Z( IPN4-J4-2 )
Z( IPN4-J4-2 ) = TEMP
TEMP = Z( J4-1 )
Z( J4-1 ) = Z( IPN4-J4-5 )
Z( IPN4-J4-5 ) = TEMP
TEMP = Z( J4 )
Z( J4 ) = Z( IPN4-J4-4 )
Z( IPN4-J4-4 ) = TEMP
60 CONTINUE
IF( N0-I0.LE.4 ) THEN
Z( 4*N0+PP-1 ) = Z( 4*I0+PP-1 )
Z( 4*N0-PP ) = Z( 4*I0-PP )
END IF
DMIN2 = MIN( DMIN2, Z( 4*N0+PP-1 ) )
Z( 4*N0+PP-1 ) = MIN( Z( 4*N0+PP-1 ), Z( 4*I0+PP-1 ),
$ Z( 4*I0+PP+3 ) )
Z( 4*N0-PP ) = MIN( Z( 4*N0-PP ), Z( 4*I0-PP ),
$ Z( 4*I0-PP+4 ) )
QMAX = MAX( QMAX, Z( 4*I0+PP-3 ), Z( 4*I0+PP+1 ) )
DMIN = -ZERO
END IF
END IF
*
* Choose a shift.
*
CALL DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, DN1,
$ DN2, TAU, TTYPE, G )
*
* Call dqds until DMIN > 0.
*
70 CONTINUE
*
CALL DLASQ5( I0, N0, Z, PP, TAU, DMIN, DMIN1, DMIN2, DN,
$ DN1, DN2, IEEE )
*
NDIV = NDIV + ( N0-I0+2 )
ITER = ITER + 1
*
* Check status.
*
IF( DMIN.GE.ZERO .AND. DMIN1.GT.ZERO ) THEN
*
* Success.
*
GO TO 90
*
ELSE IF( DMIN.LT.ZERO .AND. DMIN1.GT.ZERO .AND.
$ Z( 4*( N0-1 )-PP ).LT.TOL*( SIGMA+DN1 ) .AND.
$ ABS( DN ).LT.TOL*SIGMA ) THEN
*
* Convergence hidden by negative DN.
*
Z( 4*( N0-1 )-PP+2 ) = ZERO
DMIN = ZERO
GO TO 90
ELSE IF( DMIN.LT.ZERO ) THEN
*
* TAU too big. Select new TAU and try again.
*
NFAIL = NFAIL + 1
IF( TTYPE.LT.-22 ) THEN
*
* Failed twice. Play it safe.
*
TAU = ZERO
ELSE IF( DMIN1.GT.ZERO ) THEN
*
* Late failure. Gives excellent shift.
*
TAU = ( TAU+DMIN )*( ONE-TWO*EPS )
TTYPE = TTYPE - 11
ELSE
*
* Early failure. Divide by 4.
*
TAU = QURTR*TAU
TTYPE = TTYPE - 12
END IF
GO TO 70
ELSE IF( DISNAN( DMIN ) ) THEN
*
* NaN.
*
IF( TAU.EQ.ZERO ) THEN
GO TO 80
ELSE
TAU = ZERO
GO TO 70
END IF
ELSE
*
* Possible underflow. Play it safe.
*
GO TO 80
END IF
*
* Risk of underflow.
*
80 CONTINUE
CALL DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, DN1, DN2 )
NDIV = NDIV + ( N0-I0+2 )
ITER = ITER + 1
TAU = ZERO
*
90 CONTINUE
IF( TAU.LT.SIGMA ) THEN
DESIG = DESIG + TAU
T = SIGMA + DESIG
DESIG = DESIG - ( T-SIGMA )
ELSE
T = SIGMA + TAU
DESIG = SIGMA - ( T-TAU ) + DESIG
END IF
SIGMA = T
*
RETURN
*
* End of DLASQ3
*
END
SUBROUTINE DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN,
$ DN1, DN2, TAU, TTYPE, G )
*
* -- LAPACK routine (version 3.3.1) --
*
* -- Contributed by Osni Marques of the Lawrence Berkeley National --
* -- Laboratory and Beresford Parlett of the Univ. of California at --
* -- Berkeley --
* -- November 2008 --
*
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER I0, N0, N0IN, PP, TTYPE
DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, TAU
* ..
* .. Array Arguments ..
DOUBLE PRECISION Z( * )
* ..
*
* Purpose
* =======
*
* DLASQ4 computes an approximation TAU to the smallest eigenvalue
* using values of d from the previous transform.
*
* Arguments
* =========
*
* I0 (input) INTEGER
* First index.
*
* N0 (input) INTEGER
* Last index.
*
* Z (input) DOUBLE PRECISION array, dimension ( 4*N )
* Z holds the qd array.
*
* PP (input) INTEGER
* PP=0 for ping, PP=1 for pong.
*
* NOIN (input) INTEGER
* The value of N0 at start of EIGTEST.
*
* DMIN (input) DOUBLE PRECISION
* Minimum value of d.
*
* DMIN1 (input) DOUBLE PRECISION
* Minimum value of d, excluding D( N0 ).
*
* DMIN2 (input) DOUBLE PRECISION
* Minimum value of d, excluding D( N0 ) and D( N0-1 ).
*
* DN (input) DOUBLE PRECISION
* d(N)
*
* DN1 (input) DOUBLE PRECISION
* d(N-1)
*
* DN2 (input) DOUBLE PRECISION
* d(N-2)
*
* TAU (output) DOUBLE PRECISION
* This is the shift.
*
* TTYPE (output) INTEGER
* Shift type.
*
* G (input/output) REAL
* G is passed as an argument in order to save its value between
* calls to DLASQ4.
*
* Further Details
* ===============
* CNST1 = 9/16
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION CNST1, CNST2, CNST3
PARAMETER ( CNST1 = 0.5630D0, CNST2 = 1.010D0,
$ CNST3 = 1.050D0 )
DOUBLE PRECISION QURTR, THIRD, HALF, ZERO, ONE, TWO, HUNDRD
PARAMETER ( QURTR = 0.250D0, THIRD = 0.3330D0,
$ HALF = 0.50D0, ZERO = 0.0D0, ONE = 1.0D0,
$ TWO = 2.0D0, HUNDRD = 100.0D0 )
* ..
* .. Local Scalars ..
INTEGER I4, NN, NP
DOUBLE PRECISION A2, B1, B2, GAM, GAP1, GAP2, S
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN, SQRT
* ..
* .. Executable Statements ..
*
* A negative DMIN forces the shift to take that absolute value
* TTYPE records the type of shift.
*
IF( DMIN.LE.ZERO ) THEN
TAU = -DMIN
TTYPE = -1
RETURN
END IF
*
NN = 4*N0 + PP
IF( N0IN.EQ.N0 ) THEN
*
* No eigenvalues deflated.
*
IF( DMIN.EQ.DN .OR. DMIN.EQ.DN1 ) THEN
*
B1 = SQRT( Z( NN-3 ) )*SQRT( Z( NN-5 ) )
B2 = SQRT( Z( NN-7 ) )*SQRT( Z( NN-9 ) )
A2 = Z( NN-7 ) + Z( NN-5 )
*
* Cases 2 and 3.
*
IF( DMIN.EQ.DN .AND. DMIN1.EQ.DN1 ) THEN
GAP2 = DMIN2 - A2 - DMIN2*QURTR
IF( GAP2.GT.ZERO .AND. GAP2.GT.B2 ) THEN
GAP1 = A2 - DN - ( B2 / GAP2 )*B2
ELSE
GAP1 = A2 - DN - ( B1+B2 )
END IF
IF( GAP1.GT.ZERO .AND. GAP1.GT.B1 ) THEN
S = MAX( DN-( B1 / GAP1 )*B1, HALF*DMIN )
TTYPE = -2
ELSE
S = ZERO
IF( DN.GT.B1 )
$ S = DN - B1
IF( A2.GT.( B1+B2 ) )
$ S = MIN( S, A2-( B1+B2 ) )
S = MAX( S, THIRD*DMIN )
TTYPE = -3
END IF
ELSE
*
* Case 4.
*
TTYPE =