1 SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
2 $ WORK, LWORK, INFO )
3 *
4 * -- LAPACK driver routine (version 3.3.1) --
5 * -- LAPACK is a software package provided by Univ. of Tennessee, --
6 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
7 * -- April 2011 --
8 *
9 * .. Scalar Arguments ..
10 CHARACTER JOBU, JOBVT
11 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
12 * ..
13 * .. Array Arguments ..
14 DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
15 $ VT( LDVT, * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * DGESVD computes the singular value decomposition (SVD) of a real
22 * M-by-N matrix A, optionally computing the left and/or right singular
23 * vectors. The SVD is written
24 *
25 * A = U * SIGMA * transpose(V)
26 *
27 * where SIGMA is an M-by-N matrix which is zero except for its
28 * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
29 * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
30 * are the singular values of A; they are real and non-negative, and
31 * are returned in descending order. The first min(m,n) columns of
32 * U and V are the left and right singular vectors of A.
33 *
34 * Note that the routine returns V**T, not V.
35 *
36 * Arguments
37 * =========
38 *
39 * JOBU (input) CHARACTER*1
40 * Specifies options for computing all or part of the matrix U:
41 * = 'A': all M columns of U are returned in array U:
42 * = 'S': the first min(m,n) columns of U (the left singular
43 * vectors) are returned in the array U;
44 * = 'O': the first min(m,n) columns of U (the left singular
45 * vectors) are overwritten on the array A;
46 * = 'N': no columns of U (no left singular vectors) are
47 * computed.
48 *
49 * JOBVT (input) CHARACTER*1
50 * Specifies options for computing all or part of the matrix
51 * V**T:
52 * = 'A': all N rows of V**T are returned in the array VT;
53 * = 'S': the first min(m,n) rows of V**T (the right singular
54 * vectors) are returned in the array VT;
55 * = 'O': the first min(m,n) rows of V**T (the right singular
56 * vectors) are overwritten on the array A;
57 * = 'N': no rows of V**T (no right singular vectors) are
58 * computed.
59 *
60 * JOBVT and JOBU cannot both be 'O'.
61 *
62 * M (input) INTEGER
63 * The number of rows of the input matrix A. M >= 0.
64 *
65 * N (input) INTEGER
66 * The number of columns of the input matrix A. N >= 0.
67 *
68 * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
69 * On entry, the M-by-N matrix A.
70 * On exit,
71 * if JOBU = 'O', A is overwritten with the first min(m,n)
72 * columns of U (the left singular vectors,
73 * stored columnwise);
74 * if JOBVT = 'O', A is overwritten with the first min(m,n)
75 * rows of V**T (the right singular vectors,
76 * stored rowwise);
77 * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
78 * are destroyed.
79 *
80 * LDA (input) INTEGER
81 * The leading dimension of the array A. LDA >= max(1,M).
82 *
83 * S (output) DOUBLE PRECISION array, dimension (min(M,N))
84 * The singular values of A, sorted so that S(i) >= S(i+1).
85 *
86 * U (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
87 * (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
88 * If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
89 * if JOBU = 'S', U contains the first min(m,n) columns of U
90 * (the left singular vectors, stored columnwise);
91 * if JOBU = 'N' or 'O', U is not referenced.
92 *
93 * LDU (input) INTEGER
94 * The leading dimension of the array U. LDU >= 1; if
95 * JOBU = 'S' or 'A', LDU >= M.
96 *
97 * VT (output) DOUBLE PRECISION array, dimension (LDVT,N)
98 * If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
99 * V**T;
100 * if JOBVT = 'S', VT contains the first min(m,n) rows of
101 * V**T (the right singular vectors, stored rowwise);
102 * if JOBVT = 'N' or 'O', VT is not referenced.
103 *
104 * LDVT (input) INTEGER
105 * The leading dimension of the array VT. LDVT >= 1; if
106 * JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
107 *
108 * WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
109 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
110 * if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
111 * superdiagonal elements of an upper bidiagonal matrix B
112 * whose diagonal is in S (not necessarily sorted). B
113 * satisfies A = U * B * VT, so it has the same singular values
114 * as A, and singular vectors related by U and VT.
115 *
116 * LWORK (input) INTEGER
117 * The dimension of the array WORK.
118 * LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
119 * - PATH 1 (M much larger than N, JOBU='N')
120 * - PATH 1t (N much larger than M, JOBVT='N')
121 * LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
122 * For good performance, LWORK should generally be larger.
123 *
124 * If LWORK = -1, then a workspace query is assumed; the routine
125 * only calculates the optimal size of the WORK array, returns
126 * this value as the first entry of the WORK array, and no error
127 * message related to LWORK is issued by XERBLA.
128 *
129 * INFO (output) INTEGER
130 * = 0: successful exit.
131 * < 0: if INFO = -i, the i-th argument had an illegal value.
132 * > 0: if DBDSQR did not converge, INFO specifies how many
133 * superdiagonals of an intermediate bidiagonal form B
134 * did not converge to zero. See the description of WORK
135 * above for details.
136 *
137 * =====================================================================
138 *
139 * .. Parameters ..
140 DOUBLE PRECISION ZERO, ONE
141 PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
142 * ..
143 * .. Local Scalars ..
144 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
145 $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
146 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
147 $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
148 $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
149 $ NRVT, WRKBL
150 DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
151 * ..
152 * .. Local Arrays ..
153 DOUBLE PRECISION DUM( 1 )
154 * ..
155 * .. External Subroutines ..
156 EXTERNAL DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
157 $ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
158 $ XERBLA
159 * ..
160 * .. External Functions ..
161 LOGICAL LSAME
162 INTEGER ILAENV
163 DOUBLE PRECISION DLAMCH, DLANGE
164 EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE
165 * ..
166 * .. Intrinsic Functions ..
167 INTRINSIC MAX, MIN, SQRT
168 * ..
169 * .. Executable Statements ..
170 *
171 * Test the input arguments
172 *
173 INFO = 0
174 MINMN = MIN( M, N )
175 WNTUA = LSAME( JOBU, 'A' )
176 WNTUS = LSAME( JOBU, 'S' )
177 WNTUAS = WNTUA .OR. WNTUS
178 WNTUO = LSAME( JOBU, 'O' )
179 WNTUN = LSAME( JOBU, 'N' )
180 WNTVA = LSAME( JOBVT, 'A' )
181 WNTVS = LSAME( JOBVT, 'S' )
182 WNTVAS = WNTVA .OR. WNTVS
183 WNTVO = LSAME( JOBVT, 'O' )
184 WNTVN = LSAME( JOBVT, 'N' )
185 LQUERY = ( LWORK.EQ.-1 )
186 *
187 IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
188 INFO = -1
189 ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
190 $ ( WNTVO .AND. WNTUO ) ) THEN
191 INFO = -2
192 ELSE IF( M.LT.0 ) THEN
193 INFO = -3
194 ELSE IF( N.LT.0 ) THEN
195 INFO = -4
196 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
197 INFO = -6
198 ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
199 INFO = -9
200 ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
201 $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
202 INFO = -11
203 END IF
204 *
205 * Compute workspace
206 * (Note: Comments in the code beginning "Workspace:" describe the
207 * minimal amount of workspace needed at that point in the code,
208 * as well as the preferred amount for good performance.
209 * NB refers to the optimal block size for the immediately
210 * following subroutine, as returned by ILAENV.)
211 *
212 IF( INFO.EQ.0 ) THEN
213 MINWRK = 1
214 MAXWRK = 1
215 IF( M.GE.N .AND. MINMN.GT.0 ) THEN
216 *
217 * Compute space needed for DBDSQR
218 *
219 MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
220 BDSPAC = 5*N
221 IF( M.GE.MNTHR ) THEN
222 IF( WNTUN ) THEN
223 *
224 * Path 1 (M much larger than N, JOBU='N')
225 *
226 MAXWRK = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1,
227 $ -1 )
228 MAXWRK = MAX( MAXWRK, 3*N+2*N*
229 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
230 IF( WNTVO .OR. WNTVAS )
231 $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
232 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
233 MAXWRK = MAX( MAXWRK, BDSPAC )
234 MINWRK = MAX( 4*N, BDSPAC )
235 ELSE IF( WNTUO .AND. WNTVN ) THEN
236 *
237 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
238 *
239 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
240 WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
241 $ N, N, -1 ) )
242 WRKBL = MAX( WRKBL, 3*N+2*N*
243 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
244 WRKBL = MAX( WRKBL, 3*N+N*
245 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
246 WRKBL = MAX( WRKBL, BDSPAC )
247 MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
248 MINWRK = MAX( 3*N+M, BDSPAC )
249 ELSE IF( WNTUO .AND. WNTVAS ) THEN
250 *
251 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
252 * 'A')
253 *
254 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
255 WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
256 $ N, N, -1 ) )
257 WRKBL = MAX( WRKBL, 3*N+2*N*
258 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
259 WRKBL = MAX( WRKBL, 3*N+N*
260 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
261 WRKBL = MAX( WRKBL, 3*N+( N-1 )*
262 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
263 WRKBL = MAX( WRKBL, BDSPAC )
264 MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
265 MINWRK = MAX( 3*N+M, BDSPAC )
266 ELSE IF( WNTUS .AND. WNTVN ) THEN
267 *
268 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
269 *
270 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
271 WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
272 $ N, N, -1 ) )
273 WRKBL = MAX( WRKBL, 3*N+2*N*
274 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
275 WRKBL = MAX( WRKBL, 3*N+N*
276 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
277 WRKBL = MAX( WRKBL, BDSPAC )
278 MAXWRK = N*N + WRKBL
279 MINWRK = MAX( 3*N+M, BDSPAC )
280 ELSE IF( WNTUS .AND. WNTVO ) THEN
281 *
282 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
283 *
284 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
285 WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
286 $ N, N, -1 ) )
287 WRKBL = MAX( WRKBL, 3*N+2*N*
288 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
289 WRKBL = MAX( WRKBL, 3*N+N*
290 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
291 WRKBL = MAX( WRKBL, 3*N+( N-1 )*
292 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
293 WRKBL = MAX( WRKBL, BDSPAC )
294 MAXWRK = 2*N*N + WRKBL
295 MINWRK = MAX( 3*N+M, BDSPAC )
296 ELSE IF( WNTUS .AND. WNTVAS ) THEN
297 *
298 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
299 * 'A')
300 *
301 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
302 WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
303 $ N, N, -1 ) )
304 WRKBL = MAX( WRKBL, 3*N+2*N*
305 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
306 WRKBL = MAX( WRKBL, 3*N+N*
307 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
308 WRKBL = MAX( WRKBL, 3*N+( N-1 )*
309 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
310 WRKBL = MAX( WRKBL, BDSPAC )
311 MAXWRK = N*N + WRKBL
312 MINWRK = MAX( 3*N+M, BDSPAC )
313 ELSE IF( WNTUA .AND. WNTVN ) THEN
314 *
315 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
316 *
317 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
318 WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
319 $ M, N, -1 ) )
320 WRKBL = MAX( WRKBL, 3*N+2*N*
321 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
322 WRKBL = MAX( WRKBL, 3*N+N*
323 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
324 WRKBL = MAX( WRKBL, BDSPAC )
325 MAXWRK = N*N + WRKBL
326 MINWRK = MAX( 3*N+M, BDSPAC )
327 ELSE IF( WNTUA .AND. WNTVO ) THEN
328 *
329 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
330 *
331 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
332 WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
333 $ M, N, -1 ) )
334 WRKBL = MAX( WRKBL, 3*N+2*N*
335 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
336 WRKBL = MAX( WRKBL, 3*N+N*
337 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
338 WRKBL = MAX( WRKBL, 3*N+( N-1 )*
339 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
340 WRKBL = MAX( WRKBL, BDSPAC )
341 MAXWRK = 2*N*N + WRKBL
342 MINWRK = MAX( 3*N+M, BDSPAC )
343 ELSE IF( WNTUA .AND. WNTVAS ) THEN
344 *
345 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
346 * 'A')
347 *
348 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
349 WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
350 $ M, N, -1 ) )
351 WRKBL = MAX( WRKBL, 3*N+2*N*
352 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
353 WRKBL = MAX( WRKBL, 3*N+N*
354 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
355 WRKBL = MAX( WRKBL, 3*N+( N-1 )*
356 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
357 WRKBL = MAX( WRKBL, BDSPAC )
358 MAXWRK = N*N + WRKBL
359 MINWRK = MAX( 3*N+M, BDSPAC )
360 END IF
361 ELSE
362 *
363 * Path 10 (M at least N, but not much larger)
364 *
365 MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
366 $ -1, -1 )
367 IF( WNTUS .OR. WNTUO )
368 $ MAXWRK = MAX( MAXWRK, 3*N+N*
369 $ ILAENV( 1, 'DORGBR', 'Q', M, N, N, -1 ) )
370 IF( WNTUA )
371 $ MAXWRK = MAX( MAXWRK, 3*N+M*
372 $ ILAENV( 1, 'DORGBR', 'Q', M, M, N, -1 ) )
373 IF( .NOT.WNTVN )
374 $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
375 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
376 MAXWRK = MAX( MAXWRK, BDSPAC )
377 MINWRK = MAX( 3*N+M, BDSPAC )
378 END IF
379 ELSE IF( MINMN.GT.0 ) THEN
380 *
381 * Compute space needed for DBDSQR
382 *
383 MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
384 BDSPAC = 5*M
385 IF( N.GE.MNTHR ) THEN
386 IF( WNTVN ) THEN
387 *
388 * Path 1t(N much larger than M, JOBVT='N')
389 *
390 MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1,
391 $ -1 )
392 MAXWRK = MAX( MAXWRK, 3*M+2*M*
393 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
394 IF( WNTUO .OR. WNTUAS )
395 $ MAXWRK = MAX( MAXWRK, 3*M+M*
396 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
397 MAXWRK = MAX( MAXWRK, BDSPAC )
398 MINWRK = MAX( 4*M, BDSPAC )
399 ELSE IF( WNTVO .AND. WNTUN ) THEN
400 *
401 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
402 *
403 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
404 WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
405 $ N, M, -1 ) )
406 WRKBL = MAX( WRKBL, 3*M+2*M*
407 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
408 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
409 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
410 WRKBL = MAX( WRKBL, BDSPAC )
411 MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
412 MINWRK = MAX( 3*M+N, BDSPAC )
413 ELSE IF( WNTVO .AND. WNTUAS ) THEN
414 *
415 * Path 3t(N much larger than M, JOBU='S' or 'A',
416 * JOBVT='O')
417 *
418 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
419 WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
420 $ N, M, -1 ) )
421 WRKBL = MAX( WRKBL, 3*M+2*M*
422 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
423 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
424 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
425 WRKBL = MAX( WRKBL, 3*M+M*
426 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
427 WRKBL = MAX( WRKBL, BDSPAC )
428 MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
429 MINWRK = MAX( 3*M+N, BDSPAC )
430 ELSE IF( WNTVS .AND. WNTUN ) THEN
431 *
432 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
433 *
434 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
435 WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
436 $ N, M, -1 ) )
437 WRKBL = MAX( WRKBL, 3*M+2*M*
438 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
439 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
440 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
441 WRKBL = MAX( WRKBL, BDSPAC )
442 MAXWRK = M*M + WRKBL
443 MINWRK = MAX( 3*M+N, BDSPAC )
444 ELSE IF( WNTVS .AND. WNTUO ) THEN
445 *
446 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
447 *
448 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
449 WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
450 $ N, M, -1 ) )
451 WRKBL = MAX( WRKBL, 3*M+2*M*
452 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
453 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
454 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
455 WRKBL = MAX( WRKBL, 3*M+M*
456 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
457 WRKBL = MAX( WRKBL, BDSPAC )
458 MAXWRK = 2*M*M + WRKBL
459 MINWRK = MAX( 3*M+N, BDSPAC )
460 ELSE IF( WNTVS .AND. WNTUAS ) THEN
461 *
462 * Path 6t(N much larger than M, JOBU='S' or 'A',
463 * JOBVT='S')
464 *
465 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
466 WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
467 $ N, M, -1 ) )
468 WRKBL = MAX( WRKBL, 3*M+2*M*
469 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
470 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
471 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
472 WRKBL = MAX( WRKBL, 3*M+M*
473 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
474 WRKBL = MAX( WRKBL, BDSPAC )
475 MAXWRK = M*M + WRKBL
476 MINWRK = MAX( 3*M+N, BDSPAC )
477 ELSE IF( WNTVA .AND. WNTUN ) THEN
478 *
479 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
480 *
481 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
482 WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
483 $ N, M, -1 ) )
484 WRKBL = MAX( WRKBL, 3*M+2*M*
485 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
486 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
487 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
488 WRKBL = MAX( WRKBL, BDSPAC )
489 MAXWRK = M*M + WRKBL
490 MINWRK = MAX( 3*M+N, BDSPAC )
491 ELSE IF( WNTVA .AND. WNTUO ) THEN
492 *
493 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
494 *
495 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
496 WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
497 $ N, M, -1 ) )
498 WRKBL = MAX( WRKBL, 3*M+2*M*
499 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
500 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
501 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
502 WRKBL = MAX( WRKBL, 3*M+M*
503 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
504 WRKBL = MAX( WRKBL, BDSPAC )
505 MAXWRK = 2*M*M + WRKBL
506 MINWRK = MAX( 3*M+N, BDSPAC )
507 ELSE IF( WNTVA .AND. WNTUAS ) THEN
508 *
509 * Path 9t(N much larger than M, JOBU='S' or 'A',
510 * JOBVT='A')
511 *
512 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
513 WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
514 $ N, M, -1 ) )
515 WRKBL = MAX( WRKBL, 3*M+2*M*
516 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
517 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
518 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
519 WRKBL = MAX( WRKBL, 3*M+M*
520 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
521 WRKBL = MAX( WRKBL, BDSPAC )
522 MAXWRK = M*M + WRKBL
523 MINWRK = MAX( 3*M+N, BDSPAC )
524 END IF
525 ELSE
526 *
527 * Path 10t(N greater than M, but not much larger)
528 *
529 MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
530 $ -1, -1 )
531 IF( WNTVS .OR. WNTVO )
532 $ MAXWRK = MAX( MAXWRK, 3*M+M*
533 $ ILAENV( 1, 'DORGBR', 'P', M, N, M, -1 ) )
534 IF( WNTVA )
535 $ MAXWRK = MAX( MAXWRK, 3*M+N*
536 $ ILAENV( 1, 'DORGBR', 'P', N, N, M, -1 ) )
537 IF( .NOT.WNTUN )
538 $ MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
539 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
540 MAXWRK = MAX( MAXWRK, BDSPAC )
541 MINWRK = MAX( 3*M+N, BDSPAC )
542 END IF
543 END IF
544 MAXWRK = MAX( MAXWRK, MINWRK )
545 WORK( 1 ) = MAXWRK
546 *
547 IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
548 INFO = -13
549 END IF
550 END IF
551 *
552 IF( INFO.NE.0 ) THEN
553 CALL XERBLA( 'DGESVD', -INFO )
554 RETURN
555 ELSE IF( LQUERY ) THEN
556 RETURN
557 END IF
558 *
559 * Quick return if possible
560 *
561 IF( M.EQ.0 .OR. N.EQ.0 ) THEN
562 RETURN
563 END IF
564 *
565 * Get machine constants
566 *
567 EPS = DLAMCH( 'P' )
568 SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
569 BIGNUM = ONE / SMLNUM
570 *
571 * Scale A if max element outside range [SMLNUM,BIGNUM]
572 *
573 ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
574 ISCL = 0
575 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
576 ISCL = 1
577 CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
578 ELSE IF( ANRM.GT.BIGNUM ) THEN
579 ISCL = 1
580 CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
581 END IF
582 *
583 IF( M.GE.N ) THEN
584 *
585 * A has at least as many rows as columns. If A has sufficiently
586 * more rows than columns, first reduce using the QR
587 * decomposition (if sufficient workspace available)
588 *
589 IF( M.GE.MNTHR ) THEN
590 *
591 IF( WNTUN ) THEN
592 *
593 * Path 1 (M much larger than N, JOBU='N')
594 * No left singular vectors to be computed
595 *
596 ITAU = 1
597 IWORK = ITAU + N
598 *
599 * Compute A=Q*R
600 * (Workspace: need 2*N, prefer N+N*NB)
601 *
602 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
603 $ LWORK-IWORK+1, IERR )
604 *
605 * Zero out below R
606 *
607 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
608 IE = 1
609 ITAUQ = IE + N
610 ITAUP = ITAUQ + N
611 IWORK = ITAUP + N
612 *
613 * Bidiagonalize R in A
614 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
615 *
616 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
617 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
618 $ IERR )
619 NCVT = 0
620 IF( WNTVO .OR. WNTVAS ) THEN
621 *
622 * If right singular vectors desired, generate P'.
623 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
624 *
625 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
626 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
627 NCVT = N
628 END IF
629 IWORK = IE + N
630 *
631 * Perform bidiagonal QR iteration, computing right
632 * singular vectors of A in A if desired
633 * (Workspace: need BDSPAC)
634 *
635 CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
636 $ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
637 *
638 * If right singular vectors desired in VT, copy them there
639 *
640 IF( WNTVAS )
641 $ CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
642 *
643 ELSE IF( WNTUO .AND. WNTVN ) THEN
644 *
645 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
646 * N left singular vectors to be overwritten on A and
647 * no right singular vectors to be computed
648 *
649 IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
650 *
651 * Sufficient workspace for a fast algorithm
652 *
653 IR = 1
654 IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
655 *
656 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
657 *
658 LDWRKU = LDA
659 LDWRKR = LDA
660 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
661 *
662 * WORK(IU) is LDA by N, WORK(IR) is N by N
663 *
664 LDWRKU = LDA
665 LDWRKR = N
666 ELSE
667 *
668 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
669 *
670 LDWRKU = ( LWORK-N*N-N ) / N
671 LDWRKR = N
672 END IF
673 ITAU = IR + LDWRKR*N
674 IWORK = ITAU + N
675 *
676 * Compute A=Q*R
677 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
678 *
679 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
680 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
681 *
682 * Copy R to WORK(IR) and zero out below it
683 *
684 CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
685 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
686 $ LDWRKR )
687 *
688 * Generate Q in A
689 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
690 *
691 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
692 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
693 IE = ITAU
694 ITAUQ = IE + N
695 ITAUP = ITAUQ + N
696 IWORK = ITAUP + N
697 *
698 * Bidiagonalize R in WORK(IR)
699 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
700 *
701 CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
702 $ WORK( ITAUQ ), WORK( ITAUP ),
703 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
704 *
705 * Generate left vectors bidiagonalizing R
706 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
707 *
708 CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
709 $ WORK( ITAUQ ), WORK( IWORK ),
710 $ LWORK-IWORK+1, IERR )
711 IWORK = IE + N
712 *
713 * Perform bidiagonal QR iteration, computing left
714 * singular vectors of R in WORK(IR)
715 * (Workspace: need N*N+BDSPAC)
716 *
717 CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
718 $ WORK( IR ), LDWRKR, DUM, 1,
719 $ WORK( IWORK ), INFO )
720 IU = IE + N
721 *
722 * Multiply Q in A by left singular vectors of R in
723 * WORK(IR), storing result in WORK(IU) and copying to A
724 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
725 *
726 DO 10 I = 1, M, LDWRKU
727 CHUNK = MIN( M-I+1, LDWRKU )
728 CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
729 $ LDA, WORK( IR ), LDWRKR, ZERO,
730 $ WORK( IU ), LDWRKU )
731 CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
732 $ A( I, 1 ), LDA )
733 10 CONTINUE
734 *
735 ELSE
736 *
737 * Insufficient workspace for a fast algorithm
738 *
739 IE = 1
740 ITAUQ = IE + N
741 ITAUP = ITAUQ + N
742 IWORK = ITAUP + N
743 *
744 * Bidiagonalize A
745 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
746 *
747 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
748 $ WORK( ITAUQ ), WORK( ITAUP ),
749 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
750 *
751 * Generate left vectors bidiagonalizing A
752 * (Workspace: need 4*N, prefer 3*N+N*NB)
753 *
754 CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
755 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
756 IWORK = IE + N
757 *
758 * Perform bidiagonal QR iteration, computing left
759 * singular vectors of A in A
760 * (Workspace: need BDSPAC)
761 *
762 CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
763 $ A, LDA, DUM, 1, WORK( IWORK ), INFO )
764 *
765 END IF
766 *
767 ELSE IF( WNTUO .AND. WNTVAS ) THEN
768 *
769 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
770 * N left singular vectors to be overwritten on A and
771 * N right singular vectors to be computed in VT
772 *
773 IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
774 *
775 * Sufficient workspace for a fast algorithm
776 *
777 IR = 1
778 IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
779 *
780 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
781 *
782 LDWRKU = LDA
783 LDWRKR = LDA
784 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
785 *
786 * WORK(IU) is LDA by N and WORK(IR) is N by N
787 *
788 LDWRKU = LDA
789 LDWRKR = N
790 ELSE
791 *
792 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
793 *
794 LDWRKU = ( LWORK-N*N-N ) / N
795 LDWRKR = N
796 END IF
797 ITAU = IR + LDWRKR*N
798 IWORK = ITAU + N
799 *
800 * Compute A=Q*R
801 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
802 *
803 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
804 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
805 *
806 * Copy R to VT, zeroing out below it
807 *
808 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
809 IF( N.GT.1 )
810 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
811 $ VT( 2, 1 ), LDVT )
812 *
813 * Generate Q in A
814 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
815 *
816 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
817 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
818 IE = ITAU
819 ITAUQ = IE + N
820 ITAUP = ITAUQ + N
821 IWORK = ITAUP + N
822 *
823 * Bidiagonalize R in VT, copying result to WORK(IR)
824 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
825 *
826 CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
827 $ WORK( ITAUQ ), WORK( ITAUP ),
828 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
829 CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
830 *
831 * Generate left vectors bidiagonalizing R in WORK(IR)
832 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
833 *
834 CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
835 $ WORK( ITAUQ ), WORK( IWORK ),
836 $ LWORK-IWORK+1, IERR )
837 *
838 * Generate right vectors bidiagonalizing R in VT
839 * (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
840 *
841 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
842 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
843 IWORK = IE + N
844 *
845 * Perform bidiagonal QR iteration, computing left
846 * singular vectors of R in WORK(IR) and computing right
847 * singular vectors of R in VT
848 * (Workspace: need N*N+BDSPAC)
849 *
850 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
851 $ WORK( IR ), LDWRKR, DUM, 1,
852 $ WORK( IWORK ), INFO )
853 IU = IE + N
854 *
855 * Multiply Q in A by left singular vectors of R in
856 * WORK(IR), storing result in WORK(IU) and copying to A
857 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
858 *
859 DO 20 I = 1, M, LDWRKU
860 CHUNK = MIN( M-I+1, LDWRKU )
861 CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
862 $ LDA, WORK( IR ), LDWRKR, ZERO,
863 $ WORK( IU ), LDWRKU )
864 CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
865 $ A( I, 1 ), LDA )
866 20 CONTINUE
867 *
868 ELSE
869 *
870 * Insufficient workspace for a fast algorithm
871 *
872 ITAU = 1
873 IWORK = ITAU + N
874 *
875 * Compute A=Q*R
876 * (Workspace: need 2*N, prefer N+N*NB)
877 *
878 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
879 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
880 *
881 * Copy R to VT, zeroing out below it
882 *
883 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
884 IF( N.GT.1 )
885 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
886 $ VT( 2, 1 ), LDVT )
887 *
888 * Generate Q in A
889 * (Workspace: need 2*N, prefer N+N*NB)
890 *
891 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
892 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
893 IE = ITAU
894 ITAUQ = IE + N
895 ITAUP = ITAUQ + N
896 IWORK = ITAUP + N
897 *
898 * Bidiagonalize R in VT
899 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
900 *
901 CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
902 $ WORK( ITAUQ ), WORK( ITAUP ),
903 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
904 *
905 * Multiply Q in A by left vectors bidiagonalizing R
906 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
907 *
908 CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
909 $ WORK( ITAUQ ), A, LDA, WORK( IWORK ),
910 $ LWORK-IWORK+1, IERR )
911 *
912 * Generate right vectors bidiagonalizing R in VT
913 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
914 *
915 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
916 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
917 IWORK = IE + N
918 *
919 * Perform bidiagonal QR iteration, computing left
920 * singular vectors of A in A and computing right
921 * singular vectors of A in VT
922 * (Workspace: need BDSPAC)
923 *
924 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
925 $ A, LDA, DUM, 1, WORK( IWORK ), INFO )
926 *
927 END IF
928 *
929 ELSE IF( WNTUS ) THEN
930 *
931 IF( WNTVN ) THEN
932 *
933 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
934 * N left singular vectors to be computed in U and
935 * no right singular vectors to be computed
936 *
937 IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
938 *
939 * Sufficient workspace for a fast algorithm
940 *
941 IR = 1
942 IF( LWORK.GE.WRKBL+LDA*N ) THEN
943 *
944 * WORK(IR) is LDA by N
945 *
946 LDWRKR = LDA
947 ELSE
948 *
949 * WORK(IR) is N by N
950 *
951 LDWRKR = N
952 END IF
953 ITAU = IR + LDWRKR*N
954 IWORK = ITAU + N
955 *
956 * Compute A=Q*R
957 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
958 *
959 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
960 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
961 *
962 * Copy R to WORK(IR), zeroing out below it
963 *
964 CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
965 $ LDWRKR )
966 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
967 $ WORK( IR+1 ), LDWRKR )
968 *
969 * Generate Q in A
970 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
971 *
972 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
973 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
974 IE = ITAU
975 ITAUQ = IE + N
976 ITAUP = ITAUQ + N
977 IWORK = ITAUP + N
978 *
979 * Bidiagonalize R in WORK(IR)
980 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
981 *
982 CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
983 $ WORK( IE ), WORK( ITAUQ ),
984 $ WORK( ITAUP ), WORK( IWORK ),
985 $ LWORK-IWORK+1, IERR )
986 *
987 * Generate left vectors bidiagonalizing R in WORK(IR)
988 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
989 *
990 CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
991 $ WORK( ITAUQ ), WORK( IWORK ),
992 $ LWORK-IWORK+1, IERR )
993 IWORK = IE + N
994 *
995 * Perform bidiagonal QR iteration, computing left
996 * singular vectors of R in WORK(IR)
997 * (Workspace: need N*N+BDSPAC)
998 *
999 CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1000 $ 1, WORK( IR ), LDWRKR, DUM, 1,
1001 $ WORK( IWORK ), INFO )
1002 *
1003 * Multiply Q in A by left singular vectors of R in
1004 * WORK(IR), storing result in U
1005 * (Workspace: need N*N)
1006 *
1007 CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1008 $ WORK( IR ), LDWRKR, ZERO, U, LDU )
1009 *
1010 ELSE
1011 *
1012 * Insufficient workspace for a fast algorithm
1013 *
1014 ITAU = 1
1015 IWORK = ITAU + N
1016 *
1017 * Compute A=Q*R, copying result to U
1018 * (Workspace: need 2*N, prefer N+N*NB)
1019 *
1020 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1021 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1022 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1023 *
1024 * Generate Q in U
1025 * (Workspace: need 2*N, prefer N+N*NB)
1026 *
1027 CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1028 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1029 IE = ITAU
1030 ITAUQ = IE + N
1031 ITAUP = ITAUQ + N
1032 IWORK = ITAUP + N
1033 *
1034 * Zero out below R in A
1035 *
1036 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1037 $ LDA )
1038 *
1039 * Bidiagonalize R in A
1040 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1041 *
1042 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1043 $ WORK( ITAUQ ), WORK( ITAUP ),
1044 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1045 *
1046 * Multiply Q in U by left vectors bidiagonalizing R
1047 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1048 *
1049 CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1050 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1051 $ LWORK-IWORK+1, IERR )
1052 IWORK = IE + N
1053 *
1054 * Perform bidiagonal QR iteration, computing left
1055 * singular vectors of A in U
1056 * (Workspace: need BDSPAC)
1057 *
1058 CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1059 $ 1, U, LDU, DUM, 1, WORK( IWORK ),
1060 $ INFO )
1061 *
1062 END IF
1063 *
1064 ELSE IF( WNTVO ) THEN
1065 *
1066 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1067 * N left singular vectors to be computed in U and
1068 * N right singular vectors to be overwritten on A
1069 *
1070 IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1071 *
1072 * Sufficient workspace for a fast algorithm
1073 *
1074 IU = 1
1075 IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1076 *
1077 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1078 *
1079 LDWRKU = LDA
1080 IR = IU + LDWRKU*N
1081 LDWRKR = LDA
1082 ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1083 *
1084 * WORK(IU) is LDA by N and WORK(IR) is N by N
1085 *
1086 LDWRKU = LDA
1087 IR = IU + LDWRKU*N
1088 LDWRKR = N
1089 ELSE
1090 *
1091 * WORK(IU) is N by N and WORK(IR) is N by N
1092 *
1093 LDWRKU = N
1094 IR = IU + LDWRKU*N
1095 LDWRKR = N
1096 END IF
1097 ITAU = IR + LDWRKR*N
1098 IWORK = ITAU + N
1099 *
1100 * Compute A=Q*R
1101 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1102 *
1103 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1104 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1105 *
1106 * Copy R to WORK(IU), zeroing out below it
1107 *
1108 CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1109 $ LDWRKU )
1110 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1111 $ WORK( IU+1 ), LDWRKU )
1112 *
1113 * Generate Q in A
1114 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1115 *
1116 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1117 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1118 IE = ITAU
1119 ITAUQ = IE + N
1120 ITAUP = ITAUQ + N
1121 IWORK = ITAUP + N
1122 *
1123 * Bidiagonalize R in WORK(IU), copying result to
1124 * WORK(IR)
1125 * (Workspace: need 2*N*N+4*N,
1126 * prefer 2*N*N+3*N+2*N*NB)
1127 *
1128 CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1129 $ WORK( IE ), WORK( ITAUQ ),
1130 $ WORK( ITAUP ), WORK( IWORK ),
1131 $ LWORK-IWORK+1, IERR )
1132 CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1133 $ WORK( IR ), LDWRKR )
1134 *
1135 * Generate left bidiagonalizing vectors in WORK(IU)
1136 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1137 *
1138 CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1139 $ WORK( ITAUQ ), WORK( IWORK ),
1140 $ LWORK-IWORK+1, IERR )
1141 *
1142 * Generate right bidiagonalizing vectors in WORK(IR)
1143 * (Workspace: need 2*N*N+4*N-1,
1144 * prefer 2*N*N+3*N+(N-1)*NB)
1145 *
1146 CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1147 $ WORK( ITAUP ), WORK( IWORK ),
1148 $ LWORK-IWORK+1, IERR )
1149 IWORK = IE + N
1150 *
1151 * Perform bidiagonal QR iteration, computing left
1152 * singular vectors of R in WORK(IU) and computing
1153 * right singular vectors of R in WORK(IR)
1154 * (Workspace: need 2*N*N+BDSPAC)
1155 *
1156 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1157 $ WORK( IR ), LDWRKR, WORK( IU ),
1158 $ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1159 *
1160 * Multiply Q in A by left singular vectors of R in
1161 * WORK(IU), storing result in U
1162 * (Workspace: need N*N)
1163 *
1164 CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1165 $ WORK( IU ), LDWRKU, ZERO, U, LDU )
1166 *
1167 * Copy right singular vectors of R to A
1168 * (Workspace: need N*N)
1169 *
1170 CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1171 $ LDA )
1172 *
1173 ELSE
1174 *
1175 * Insufficient workspace for a fast algorithm
1176 *
1177 ITAU = 1
1178 IWORK = ITAU + N
1179 *
1180 * Compute A=Q*R, copying result to U
1181 * (Workspace: need 2*N, prefer N+N*NB)
1182 *
1183 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1184 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1185 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1186 *
1187 * Generate Q in U
1188 * (Workspace: need 2*N, prefer N+N*NB)
1189 *
1190 CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1191 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1192 IE = ITAU
1193 ITAUQ = IE + N
1194 ITAUP = ITAUQ + N
1195 IWORK = ITAUP + N
1196 *
1197 * Zero out below R in A
1198 *
1199 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1200 $ LDA )
1201 *
1202 * Bidiagonalize R in A
1203 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1204 *
1205 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1206 $ WORK( ITAUQ ), WORK( ITAUP ),
1207 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1208 *
1209 * Multiply Q in U by left vectors bidiagonalizing R
1210 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1211 *
1212 CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1213 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1214 $ LWORK-IWORK+1, IERR )
1215 *
1216 * Generate right vectors bidiagonalizing R in A
1217 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1218 *
1219 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1220 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1221 IWORK = IE + N
1222 *
1223 * Perform bidiagonal QR iteration, computing left
1224 * singular vectors of A in U and computing right
1225 * singular vectors of A in A
1226 * (Workspace: need BDSPAC)
1227 *
1228 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1229 $ LDA, U, LDU, DUM, 1, WORK( IWORK ),
1230 $ INFO )
1231 *
1232 END IF
1233 *
1234 ELSE IF( WNTVAS ) THEN
1235 *
1236 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1237 * or 'A')
1238 * N left singular vectors to be computed in U and
1239 * N right singular vectors to be computed in VT
1240 *
1241 IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1242 *
1243 * Sufficient workspace for a fast algorithm
1244 *
1245 IU = 1
1246 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1247 *
1248 * WORK(IU) is LDA by N
1249 *
1250 LDWRKU = LDA
1251 ELSE
1252 *
1253 * WORK(IU) is N by N
1254 *
1255 LDWRKU = N
1256 END IF
1257 ITAU = IU + LDWRKU*N
1258 IWORK = ITAU + N
1259 *
1260 * Compute A=Q*R
1261 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1262 *
1263 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1264 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1265 *
1266 * Copy R to WORK(IU), zeroing out below it
1267 *
1268 CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1269 $ LDWRKU )
1270 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1271 $ WORK( IU+1 ), LDWRKU )
1272 *
1273 * Generate Q in A
1274 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1275 *
1276 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1277 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1278 IE = ITAU
1279 ITAUQ = IE + N
1280 ITAUP = ITAUQ + N
1281 IWORK = ITAUP + N
1282 *
1283 * Bidiagonalize R in WORK(IU), copying result to VT
1284 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1285 *
1286 CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1287 $ WORK( IE ), WORK( ITAUQ ),
1288 $ WORK( ITAUP ), WORK( IWORK ),
1289 $ LWORK-IWORK+1, IERR )
1290 CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1291 $ LDVT )
1292 *
1293 * Generate left bidiagonalizing vectors in WORK(IU)
1294 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1295 *
1296 CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1297 $ WORK( ITAUQ ), WORK( IWORK ),
1298 $ LWORK-IWORK+1, IERR )
1299 *
1300 * Generate right bidiagonalizing vectors in VT
1301 * (Workspace: need N*N+4*N-1,
1302 * prefer N*N+3*N+(N-1)*NB)
1303 *
1304 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1305 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1306 IWORK = IE + N
1307 *
1308 * Perform bidiagonal QR iteration, computing left
1309 * singular vectors of R in WORK(IU) and computing
1310 * right singular vectors of R in VT
1311 * (Workspace: need N*N+BDSPAC)
1312 *
1313 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1314 $ LDVT, WORK( IU ), LDWRKU, DUM, 1,
1315 $ WORK( IWORK ), INFO )
1316 *
1317 * Multiply Q in A by left singular vectors of R in
1318 * WORK(IU), storing result in U
1319 * (Workspace: need N*N)
1320 *
1321 CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1322 $ WORK( IU ), LDWRKU, ZERO, U, LDU )
1323 *
1324 ELSE
1325 *
1326 * Insufficient workspace for a fast algorithm
1327 *
1328 ITAU = 1
1329 IWORK = ITAU + N
1330 *
1331 * Compute A=Q*R, copying result to U
1332 * (Workspace: need 2*N, prefer N+N*NB)
1333 *
1334 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1335 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1336 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1337 *
1338 * Generate Q in U
1339 * (Workspace: need 2*N, prefer N+N*NB)
1340 *
1341 CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1342 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1343 *
1344 * Copy R to VT, zeroing out below it
1345 *
1346 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1347 IF( N.GT.1 )
1348 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1349 $ VT( 2, 1 ), LDVT )
1350 IE = ITAU
1351 ITAUQ = IE + N
1352 ITAUP = ITAUQ + N
1353 IWORK = ITAUP + N
1354 *
1355 * Bidiagonalize R in VT
1356 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1357 *
1358 CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1359 $ WORK( ITAUQ ), WORK( ITAUP ),
1360 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1361 *
1362 * Multiply Q in U by left bidiagonalizing vectors
1363 * in VT
1364 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1365 *
1366 CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1367 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1368 $ LWORK-IWORK+1, IERR )
1369 *
1370 * Generate right bidiagonalizing vectors in VT
1371 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1372 *
1373 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1374 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1375 IWORK = IE + N
1376 *
1377 * Perform bidiagonal QR iteration, computing left
1378 * singular vectors of A in U and computing right
1379 * singular vectors of A in VT
1380 * (Workspace: need BDSPAC)
1381 *
1382 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1383 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1384 $ INFO )
1385 *
1386 END IF
1387 *
1388 END IF
1389 *
1390 ELSE IF( WNTUA ) THEN
1391 *
1392 IF( WNTVN ) THEN
1393 *
1394 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1395 * M left singular vectors to be computed in U and
1396 * no right singular vectors to be computed
1397 *
1398 IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1399 *
1400 * Sufficient workspace for a fast algorithm
1401 *
1402 IR = 1
1403 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1404 *
1405 * WORK(IR) is LDA by N
1406 *
1407 LDWRKR = LDA
1408 ELSE
1409 *
1410 * WORK(IR) is N by N
1411 *
1412 LDWRKR = N
1413 END IF
1414 ITAU = IR + LDWRKR*N
1415 IWORK = ITAU + N
1416 *
1417 * Compute A=Q*R, copying result to U
1418 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1419 *
1420 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1421 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1422 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1423 *
1424 * Copy R to WORK(IR), zeroing out below it
1425 *
1426 CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1427 $ LDWRKR )
1428 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1429 $ WORK( IR+1 ), LDWRKR )
1430 *
1431 * Generate Q in U
1432 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1433 *
1434 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1435 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1436 IE = ITAU
1437 ITAUQ = IE + N
1438 ITAUP = ITAUQ + N
1439 IWORK = ITAUP + N
1440 *
1441 * Bidiagonalize R in WORK(IR)
1442 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1443 *
1444 CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1445 $ WORK( IE ), WORK( ITAUQ ),
1446 $ WORK( ITAUP ), WORK( IWORK ),
1447 $ LWORK-IWORK+1, IERR )
1448 *
1449 * Generate left bidiagonalizing vectors in WORK(IR)
1450 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1451 *
1452 CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1453 $ WORK( ITAUQ ), WORK( IWORK ),
1454 $ LWORK-IWORK+1, IERR )
1455 IWORK = IE + N
1456 *
1457 * Perform bidiagonal QR iteration, computing left
1458 * singular vectors of R in WORK(IR)
1459 * (Workspace: need N*N+BDSPAC)
1460 *
1461 CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1462 $ 1, WORK( IR ), LDWRKR, DUM, 1,
1463 $ WORK( IWORK ), INFO )
1464 *
1465 * Multiply Q in U by left singular vectors of R in
1466 * WORK(IR), storing result in A
1467 * (Workspace: need N*N)
1468 *
1469 CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1470 $ WORK( IR ), LDWRKR, ZERO, A, LDA )
1471 *
1472 * Copy left singular vectors of A from A to U
1473 *
1474 CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1475 *
1476 ELSE
1477 *
1478 * Insufficient workspace for a fast algorithm
1479 *
1480 ITAU = 1
1481 IWORK = ITAU + N
1482 *
1483 * Compute A=Q*R, copying result to U
1484 * (Workspace: need 2*N, prefer N+N*NB)
1485 *
1486 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1487 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1488 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1489 *
1490 * Generate Q in U
1491 * (Workspace: need N+M, prefer N+M*NB)
1492 *
1493 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1494 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1495 IE = ITAU
1496 ITAUQ = IE + N
1497 ITAUP = ITAUQ + N
1498 IWORK = ITAUP + N
1499 *
1500 * Zero out below R in A
1501 *
1502 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1503 $ LDA )
1504 *
1505 * Bidiagonalize R in A
1506 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1507 *
1508 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1509 $ WORK( ITAUQ ), WORK( ITAUP ),
1510 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1511 *
1512 * Multiply Q in U by left bidiagonalizing vectors
1513 * in A
1514 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1515 *
1516 CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1517 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1518 $ LWORK-IWORK+1, IERR )
1519 IWORK = IE + N
1520 *
1521 * Perform bidiagonal QR iteration, computing left
1522 * singular vectors of A in U
1523 * (Workspace: need BDSPAC)
1524 *
1525 CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1526 $ 1, U, LDU, DUM, 1, WORK( IWORK ),
1527 $ INFO )
1528 *
1529 END IF
1530 *
1531 ELSE IF( WNTVO ) THEN
1532 *
1533 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1534 * M left singular vectors to be computed in U and
1535 * N right singular vectors to be overwritten on A
1536 *
1537 IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1538 *
1539 * Sufficient workspace for a fast algorithm
1540 *
1541 IU = 1
1542 IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1543 *
1544 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1545 *
1546 LDWRKU = LDA
1547 IR = IU + LDWRKU*N
1548 LDWRKR = LDA
1549 ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1550 *
1551 * WORK(IU) is LDA by N and WORK(IR) is N by N
1552 *
1553 LDWRKU = LDA
1554 IR = IU + LDWRKU*N
1555 LDWRKR = N
1556 ELSE
1557 *
1558 * WORK(IU) is N by N and WORK(IR) is N by N
1559 *
1560 LDWRKU = N
1561 IR = IU + LDWRKU*N
1562 LDWRKR = N
1563 END IF
1564 ITAU = IR + LDWRKR*N
1565 IWORK = ITAU + N
1566 *
1567 * Compute A=Q*R, copying result to U
1568 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1569 *
1570 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1571 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1572 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1573 *
1574 * Generate Q in U
1575 * (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1576 *
1577 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1578 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1579 *
1580 * Copy R to WORK(IU), zeroing out below it
1581 *
1582 CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1583 $ LDWRKU )
1584 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1585 $ WORK( IU+1 ), LDWRKU )
1586 IE = ITAU
1587 ITAUQ = IE + N
1588 ITAUP = ITAUQ + N
1589 IWORK = ITAUP + N
1590 *
1591 * Bidiagonalize R in WORK(IU), copying result to
1592 * WORK(IR)
1593 * (Workspace: need 2*N*N+4*N,
1594 * prefer 2*N*N+3*N+2*N*NB)
1595 *
1596 CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1597 $ WORK( IE ), WORK( ITAUQ ),
1598 $ WORK( ITAUP ), WORK( IWORK ),
1599 $ LWORK-IWORK+1, IERR )
1600 CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1601 $ WORK( IR ), LDWRKR )
1602 *
1603 * Generate left bidiagonalizing vectors in WORK(IU)
1604 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1605 *
1606 CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1607 $ WORK( ITAUQ ), WORK( IWORK ),
1608 $ LWORK-IWORK+1, IERR )
1609 *
1610 * Generate right bidiagonalizing vectors in WORK(IR)
1611 * (Workspace: need 2*N*N+4*N-1,
1612 * prefer 2*N*N+3*N+(N-1)*NB)
1613 *
1614 CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1615 $ WORK( ITAUP ), WORK( IWORK ),
1616 $ LWORK-IWORK+1, IERR )
1617 IWORK = IE + N
1618 *
1619 * Perform bidiagonal QR iteration, computing left
1620 * singular vectors of R in WORK(IU) and computing
1621 * right singular vectors of R in WORK(IR)
1622 * (Workspace: need 2*N*N+BDSPAC)
1623 *
1624 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1625 $ WORK( IR ), LDWRKR, WORK( IU ),
1626 $ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1627 *
1628 * Multiply Q in U by left singular vectors of R in
1629 * WORK(IU), storing result in A
1630 * (Workspace: need N*N)
1631 *
1632 CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1633 $ WORK( IU ), LDWRKU, ZERO, A, LDA )
1634 *
1635 * Copy left singular vectors of A from A to U
1636 *
1637 CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1638 *
1639 * Copy right singular vectors of R from WORK(IR) to A
1640 *
1641 CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1642 $ LDA )
1643 *
1644 ELSE
1645 *
1646 * Insufficient workspace for a fast algorithm
1647 *
1648 ITAU = 1
1649 IWORK = ITAU + N
1650 *
1651 * Compute A=Q*R, copying result to U
1652 * (Workspace: need 2*N, prefer N+N*NB)
1653 *
1654 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1655 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1656 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1657 *
1658 * Generate Q in U
1659 * (Workspace: need N+M, prefer N+M*NB)
1660 *
1661 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1662 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1663 IE = ITAU
1664 ITAUQ = IE + N
1665 ITAUP = ITAUQ + N
1666 IWORK = ITAUP + N
1667 *
1668 * Zero out below R in A
1669 *
1670 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1671 $ LDA )
1672 *
1673 * Bidiagonalize R in A
1674 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1675 *
1676 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1677 $ WORK( ITAUQ ), WORK( ITAUP ),
1678 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1679 *
1680 * Multiply Q in U by left bidiagonalizing vectors
1681 * in A
1682 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1683 *
1684 CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1685 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1686 $ LWORK-IWORK+1, IERR )
1687 *
1688 * Generate right bidiagonalizing vectors in A
1689 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1690 *
1691 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1692 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1693 IWORK = IE + N
1694 *
1695 * Perform bidiagonal QR iteration, computing left
1696 * singular vectors of A in U and computing right
1697 * singular vectors of A in A
1698 * (Workspace: need BDSPAC)
1699 *
1700 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1701 $ LDA, U, LDU, DUM, 1, WORK( IWORK ),
1702 $ INFO )
1703 *
1704 END IF
1705 *
1706 ELSE IF( WNTVAS ) THEN
1707 *
1708 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1709 * or 'A')
1710 * M left singular vectors to be computed in U and
1711 * N right singular vectors to be computed in VT
1712 *
1713 IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1714 *
1715 * Sufficient workspace for a fast algorithm
1716 *
1717 IU = 1
1718 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1719 *
1720 * WORK(IU) is LDA by N
1721 *
1722 LDWRKU = LDA
1723 ELSE
1724 *
1725 * WORK(IU) is N by N
1726 *
1727 LDWRKU = N
1728 END IF
1729 ITAU = IU + LDWRKU*N
1730 IWORK = ITAU + N
1731 *
1732 * Compute A=Q*R, copying result to U
1733 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1734 *
1735 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1736 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1737 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1738 *
1739 * Generate Q in U
1740 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1741 *
1742 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1743 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1744 *
1745 * Copy R to WORK(IU), zeroing out below it
1746 *
1747 CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1748 $ LDWRKU )
1749 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1750 $ WORK( IU+1 ), LDWRKU )
1751 IE = ITAU
1752 ITAUQ = IE + N
1753 ITAUP = ITAUQ + N
1754 IWORK = ITAUP + N
1755 *
1756 * Bidiagonalize R in WORK(IU), copying result to VT
1757 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1758 *
1759 CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1760 $ WORK( IE ), WORK( ITAUQ ),
1761 $ WORK( ITAUP ), WORK( IWORK ),
1762 $ LWORK-IWORK+1, IERR )
1763 CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1764 $ LDVT )
1765 *
1766 * Generate left bidiagonalizing vectors in WORK(IU)
1767 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1768 *
1769 CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1770 $ WORK( ITAUQ ), WORK( IWORK ),
1771 $ LWORK-IWORK+1, IERR )
1772 *
1773 * Generate right bidiagonalizing vectors in VT
1774 * (Workspace: need N*N+4*N-1,
1775 * prefer N*N+3*N+(N-1)*NB)
1776 *
1777 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1778 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1779 IWORK = IE + N
1780 *
1781 * Perform bidiagonal QR iteration, computing left
1782 * singular vectors of R in WORK(IU) and computing
1783 * right singular vectors of R in VT
1784 * (Workspace: need N*N+BDSPAC)
1785 *
1786 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1787 $ LDVT, WORK( IU ), LDWRKU, DUM, 1,
1788 $ WORK( IWORK ), INFO )
1789 *
1790 * Multiply Q in U by left singular vectors of R in
1791 * WORK(IU), storing result in A
1792 * (Workspace: need N*N)
1793 *
1794 CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1795 $ WORK( IU ), LDWRKU, ZERO, A, LDA )
1796 *
1797 * Copy left singular vectors of A from A to U
1798 *
1799 CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1800 *
1801 ELSE
1802 *
1803 * Insufficient workspace for a fast algorithm
1804 *
1805 ITAU = 1
1806 IWORK = ITAU + N
1807 *
1808 * Compute A=Q*R, copying result to U
1809 * (Workspace: need 2*N, prefer N+N*NB)
1810 *
1811 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1812 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1813 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1814 *
1815 * Generate Q in U
1816 * (Workspace: need N+M, prefer N+M*NB)
1817 *
1818 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1819 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1820 *
1821 * Copy R from A to VT, zeroing out below it
1822 *
1823 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1824 IF( N.GT.1 )
1825 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1826 $ VT( 2, 1 ), LDVT )
1827 IE = ITAU
1828 ITAUQ = IE + N
1829 ITAUP = ITAUQ + N
1830 IWORK = ITAUP + N
1831 *
1832 * Bidiagonalize R in VT
1833 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1834 *
1835 CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1836 $ WORK( ITAUQ ), WORK( ITAUP ),
1837 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1838 *
1839 * Multiply Q in U by left bidiagonalizing vectors
1840 * in VT
1841 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1842 *
1843 CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1844 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1845 $ LWORK-IWORK+1, IERR )
1846 *
1847 * Generate right bidiagonalizing vectors in VT
1848 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1849 *
1850 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1851 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1852 IWORK = IE + N
1853 *
1854 * Perform bidiagonal QR iteration, computing left
1855 * singular vectors of A in U and computing right
1856 * singular vectors of A in VT
1857 * (Workspace: need BDSPAC)
1858 *
1859 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1860 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1861 $ INFO )
1862 *
1863 END IF
1864 *
1865 END IF
1866 *
1867 END IF
1868 *
1869 ELSE
1870 *
1871 * M .LT. MNTHR
1872 *
1873 * Path 10 (M at least N, but not much larger)
1874 * Reduce to bidiagonal form without QR decomposition
1875 *
1876 IE = 1
1877 ITAUQ = IE + N
1878 ITAUP = ITAUQ + N
1879 IWORK = ITAUP + N
1880 *
1881 * Bidiagonalize A
1882 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1883 *
1884 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1885 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1886 $ IERR )
1887 IF( WNTUAS ) THEN
1888 *
1889 * If left singular vectors desired in U, copy result to U
1890 * and generate left bidiagonalizing vectors in U
1891 * (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1892 *
1893 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1894 IF( WNTUS )
1895 $ NCU = N
1896 IF( WNTUA )
1897 $ NCU = M
1898 CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1899 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1900 END IF
1901 IF( WNTVAS ) THEN
1902 *
1903 * If right singular vectors desired in VT, copy result to
1904 * VT and generate right bidiagonalizing vectors in VT
1905 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1906 *
1907 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1908 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1909 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1910 END IF
1911 IF( WNTUO ) THEN
1912 *
1913 * If left singular vectors desired in A, generate left
1914 * bidiagonalizing vectors in A
1915 * (Workspace: need 4*N, prefer 3*N+N*NB)
1916 *
1917 CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
1918 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1919 END IF
1920 IF( WNTVO ) THEN
1921 *
1922 * If right singular vectors desired in A, generate right
1923 * bidiagonalizing vectors in A
1924 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1925 *
1926 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1927 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1928 END IF
1929 IWORK = IE + N
1930 IF( WNTUAS .OR. WNTUO )
1931 $ NRU = M
1932 IF( WNTUN )
1933 $ NRU = 0
1934 IF( WNTVAS .OR. WNTVO )
1935 $ NCVT = N
1936 IF( WNTVN )
1937 $ NCVT = 0
1938 IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
1939 *
1940 * Perform bidiagonal QR iteration, if desired, computing
1941 * left singular vectors in U and computing right singular
1942 * vectors in VT
1943 * (Workspace: need BDSPAC)
1944 *
1945 CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1946 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
1947 ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
1948 *
1949 * Perform bidiagonal QR iteration, if desired, computing
1950 * left singular vectors in U and computing right singular
1951 * vectors in A
1952 * (Workspace: need BDSPAC)
1953 *
1954 CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
1955 $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
1956 ELSE
1957 *
1958 * Perform bidiagonal QR iteration, if desired, computing
1959 * left singular vectors in A and computing right singular
1960 * vectors in VT
1961 * (Workspace: need BDSPAC)
1962 *
1963 CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1964 $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
1965 END IF
1966 *
1967 END IF
1968 *
1969 ELSE
1970 *
1971 * A has more columns than rows. If A has sufficiently more
1972 * columns than rows, first reduce using the LQ decomposition (if
1973 * sufficient workspace available)
1974 *
1975 IF( N.GE.MNTHR ) THEN
1976 *
1977 IF( WNTVN ) THEN
1978 *
1979 * Path 1t(N much larger than M, JOBVT='N')
1980 * No right singular vectors to be computed
1981 *
1982 ITAU = 1
1983 IWORK = ITAU + M
1984 *
1985 * Compute A=L*Q
1986 * (Workspace: need 2*M, prefer M+M*NB)
1987 *
1988 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
1989 $ LWORK-IWORK+1, IERR )
1990 *
1991 * Zero out above L
1992 *
1993 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
1994 IE = 1
1995 ITAUQ = IE + M
1996 ITAUP = ITAUQ + M
1997 IWORK = ITAUP + M
1998 *
1999 * Bidiagonalize L in A
2000 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2001 *
2002 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2003 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2004 $ IERR )
2005 IF( WNTUO .OR. WNTUAS ) THEN
2006 *
2007 * If left singular vectors desired, generate Q
2008 * (Workspace: need 4*M, prefer 3*M+M*NB)
2009 *
2010 CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2011 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2012 END IF
2013 IWORK = IE + M
2014 NRU = 0
2015 IF( WNTUO .OR. WNTUAS )
2016 $ NRU = M
2017 *
2018 * Perform bidiagonal QR iteration, computing left singular
2019 * vectors of A in A if desired
2020 * (Workspace: need BDSPAC)
2021 *
2022 CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2023 $ LDA, DUM, 1, WORK( IWORK ), INFO )
2024 *
2025 * If left singular vectors desired in U, copy them there
2026 *
2027 IF( WNTUAS )
2028 $ CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
2029 *
2030 ELSE IF( WNTVO .AND. WNTUN ) THEN
2031 *
2032 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2033 * M right singular vectors to be overwritten on A and
2034 * no left singular vectors to be computed
2035 *
2036 IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2037 *
2038 * Sufficient workspace for a fast algorithm
2039 *
2040 IR = 1
2041 IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2042 *
2043 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2044 *
2045 LDWRKU = LDA
2046 CHUNK = N
2047 LDWRKR = LDA
2048 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2049 *
2050 * WORK(IU) is LDA by N and WORK(IR) is M by M
2051 *
2052 LDWRKU = LDA
2053 CHUNK = N
2054 LDWRKR = M
2055 ELSE
2056 *
2057 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2058 *
2059 LDWRKU = M
2060 CHUNK = ( LWORK-M*M-M ) / M
2061 LDWRKR = M
2062 END IF
2063 ITAU = IR + LDWRKR*M
2064 IWORK = ITAU + M
2065 *
2066 * Compute A=L*Q
2067 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2068 *
2069 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2070 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2071 *
2072 * Copy L to WORK(IR) and zero out above it
2073 *
2074 CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2075 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2076 $ WORK( IR+LDWRKR ), LDWRKR )
2077 *
2078 * Generate Q in A
2079 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2080 *
2081 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2082 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2083 IE = ITAU
2084 ITAUQ = IE + M
2085 ITAUP = ITAUQ + M
2086 IWORK = ITAUP + M
2087 *
2088 * Bidiagonalize L in WORK(IR)
2089 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2090 *
2091 CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2092 $ WORK( ITAUQ ), WORK( ITAUP ),
2093 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2094 *
2095 * Generate right vectors bidiagonalizing L
2096 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2097 *
2098 CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2099 $ WORK( ITAUP ), WORK( IWORK ),
2100 $ LWORK-IWORK+1, IERR )
2101 IWORK = IE + M
2102 *
2103 * Perform bidiagonal QR iteration, computing right
2104 * singular vectors of L in WORK(IR)
2105 * (Workspace: need M*M+BDSPAC)
2106 *
2107 CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2108 $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2109 $ WORK( IWORK ), INFO )
2110 IU = IE + M
2111 *
2112 * Multiply right singular vectors of L in WORK(IR) by Q
2113 * in A, storing result in WORK(IU) and copying to A
2114 * (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2115 *
2116 DO 30 I = 1, N, CHUNK
2117 BLK = MIN( N-I+1, CHUNK )
2118 CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2119 $ LDWRKR, A( 1, I ), LDA, ZERO,
2120 $ WORK( IU ), LDWRKU )
2121 CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2122 $ A( 1, I ), LDA )
2123 30 CONTINUE
2124 *
2125 ELSE
2126 *
2127 * Insufficient workspace for a fast algorithm
2128 *
2129 IE = 1
2130 ITAUQ = IE + M
2131 ITAUP = ITAUQ + M
2132 IWORK = ITAUP + M
2133 *
2134 * Bidiagonalize A
2135 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2136 *
2137 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
2138 $ WORK( ITAUQ ), WORK( ITAUP ),
2139 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2140 *
2141 * Generate right vectors bidiagonalizing A
2142 * (Workspace: need 4*M, prefer 3*M+M*NB)
2143 *
2144 CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2145 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2146 IWORK = IE + M
2147 *
2148 * Perform bidiagonal QR iteration, computing right
2149 * singular vectors of A in A
2150 * (Workspace: need BDSPAC)
2151 *
2152 CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2153 $ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2154 *
2155 END IF
2156 *
2157 ELSE IF( WNTVO .AND. WNTUAS ) THEN
2158 *
2159 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2160 * M right singular vectors to be overwritten on A and
2161 * M left singular vectors to be computed in U
2162 *
2163 IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2164 *
2165 * Sufficient workspace for a fast algorithm
2166 *
2167 IR = 1
2168 IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2169 *
2170 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2171 *
2172 LDWRKU = LDA
2173 CHUNK = N
2174 LDWRKR = LDA
2175 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2176 *
2177 * WORK(IU) is LDA by N and WORK(IR) is M by M
2178 *
2179 LDWRKU = LDA
2180 CHUNK = N
2181 LDWRKR = M
2182 ELSE
2183 *
2184 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2185 *
2186 LDWRKU = M
2187 CHUNK = ( LWORK-M*M-M ) / M
2188 LDWRKR = M
2189 END IF
2190 ITAU = IR + LDWRKR*M
2191 IWORK = ITAU + M
2192 *
2193 * Compute A=L*Q
2194 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2195 *
2196 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2197 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2198 *
2199 * Copy L to U, zeroing about above it
2200 *
2201 CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2202 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2203 $ LDU )
2204 *
2205 * Generate Q in A
2206 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2207 *
2208 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2209 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2210 IE = ITAU
2211 ITAUQ = IE + M
2212 ITAUP = ITAUQ + M
2213 IWORK = ITAUP + M
2214 *
2215 * Bidiagonalize L in U, copying result to WORK(IR)
2216 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2217 *
2218 CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2219 $ WORK( ITAUQ ), WORK( ITAUP ),
2220 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2221 CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2222 *
2223 * Generate right vectors bidiagonalizing L in WORK(IR)
2224 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2225 *
2226 CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2227 $ WORK( ITAUP ), WORK( IWORK ),
2228 $ LWORK-IWORK+1, IERR )
2229 *
2230 * Generate left vectors bidiagonalizing L in U
2231 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2232 *
2233 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2234 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2235 IWORK = IE + M
2236 *
2237 * Perform bidiagonal QR iteration, computing left
2238 * singular vectors of L in U, and computing right
2239 * singular vectors of L in WORK(IR)
2240 * (Workspace: need M*M+BDSPAC)
2241 *
2242 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2243 $ WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2244 $ WORK( IWORK ), INFO )
2245 IU = IE + M
2246 *
2247 * Multiply right singular vectors of L in WORK(IR) by Q
2248 * in A, storing result in WORK(IU) and copying to A
2249 * (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2250 *
2251 DO 40 I = 1, N, CHUNK
2252 BLK = MIN( N-I+1, CHUNK )
2253 CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2254 $ LDWRKR, A( 1, I ), LDA, ZERO,
2255 $ WORK( IU ), LDWRKU )
2256 CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2257 $ A( 1, I ), LDA )
2258 40 CONTINUE
2259 *
2260 ELSE
2261 *
2262 * Insufficient workspace for a fast algorithm
2263 *
2264 ITAU = 1
2265 IWORK = ITAU + M
2266 *
2267 * Compute A=L*Q
2268 * (Workspace: need 2*M, prefer M+M*NB)
2269 *
2270 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2271 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2272 *
2273 * Copy L to U, zeroing out above it
2274 *
2275 CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2276 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2277 $ LDU )
2278 *
2279 * Generate Q in A
2280 * (Workspace: need 2*M, prefer M+M*NB)
2281 *
2282 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2283 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2284 IE = ITAU
2285 ITAUQ = IE + M
2286 ITAUP = ITAUQ + M
2287 IWORK = ITAUP + M
2288 *
2289 * Bidiagonalize L in U
2290 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2291 *
2292 CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2293 $ WORK( ITAUQ ), WORK( ITAUP ),
2294 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2295 *
2296 * Multiply right vectors bidiagonalizing L by Q in A
2297 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2298 *
2299 CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2300 $ WORK( ITAUP ), A, LDA, WORK( IWORK ),
2301 $ LWORK-IWORK+1, IERR )
2302 *
2303 * Generate left vectors bidiagonalizing L in U
2304 * (Workspace: need 4*M, prefer 3*M+M*NB)
2305 *
2306 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2307 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2308 IWORK = IE + M
2309 *
2310 * Perform bidiagonal QR iteration, computing left
2311 * singular vectors of A in U and computing right
2312 * singular vectors of A in A
2313 * (Workspace: need BDSPAC)
2314 *
2315 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2316 $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
2317 *
2318 END IF
2319 *
2320 ELSE IF( WNTVS ) THEN
2321 *
2322 IF( WNTUN ) THEN
2323 *
2324 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2325 * M right singular vectors to be computed in VT and
2326 * no left singular vectors to be computed
2327 *
2328 IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2329 *
2330 * Sufficient workspace for a fast algorithm
2331 *
2332 IR = 1
2333 IF( LWORK.GE.WRKBL+LDA*M ) THEN
2334 *
2335 * WORK(IR) is LDA by M
2336 *
2337 LDWRKR = LDA
2338 ELSE
2339 *
2340 * WORK(IR) is M by M
2341 *
2342 LDWRKR = M
2343 END IF
2344 ITAU = IR + LDWRKR*M
2345 IWORK = ITAU + M
2346 *
2347 * Compute A=L*Q
2348 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2349 *
2350 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2351 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2352 *
2353 * Copy L to WORK(IR), zeroing out above it
2354 *
2355 CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2356 $ LDWRKR )
2357 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2358 $ WORK( IR+LDWRKR ), LDWRKR )
2359 *
2360 * Generate Q in A
2361 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2362 *
2363 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2364 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2365 IE = ITAU
2366 ITAUQ = IE + M
2367 ITAUP = ITAUQ + M
2368 IWORK = ITAUP + M
2369 *
2370 * Bidiagonalize L in WORK(IR)
2371 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2372 *
2373 CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2374 $ WORK( IE ), WORK( ITAUQ ),
2375 $ WORK( ITAUP ), WORK( IWORK ),
2376 $ LWORK-IWORK+1, IERR )
2377 *
2378 * Generate right vectors bidiagonalizing L in
2379 * WORK(IR)
2380 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2381 *
2382 CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2383 $ WORK( ITAUP ), WORK( IWORK ),
2384 $ LWORK-IWORK+1, IERR )
2385 IWORK = IE + M
2386 *
2387 * Perform bidiagonal QR iteration, computing right
2388 * singular vectors of L in WORK(IR)
2389 * (Workspace: need M*M+BDSPAC)
2390 *
2391 CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2392 $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2393 $ WORK( IWORK ), INFO )
2394 *
2395 * Multiply right singular vectors of L in WORK(IR) by
2396 * Q in A, storing result in VT
2397 * (Workspace: need M*M)
2398 *
2399 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2400 $ LDWRKR, A, LDA, ZERO, VT, LDVT )
2401 *
2402 ELSE
2403 *
2404 * Insufficient workspace for a fast algorithm
2405 *
2406 ITAU = 1
2407 IWORK = ITAU + M
2408 *
2409 * Compute A=L*Q
2410 * (Workspace: need 2*M, prefer M+M*NB)
2411 *
2412 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2413 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2414 *
2415 * Copy result to VT
2416 *
2417 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2418 *
2419 * Generate Q in VT
2420 * (Workspace: need 2*M, prefer M+M*NB)
2421 *
2422 CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2423 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2424 IE = ITAU
2425 ITAUQ = IE + M
2426 ITAUP = ITAUQ + M
2427 IWORK = ITAUP + M
2428 *
2429 * Zero out above L in A
2430 *
2431 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2432 $ LDA )
2433 *
2434 * Bidiagonalize L in A
2435 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2436 *
2437 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2438 $ WORK( ITAUQ ), WORK( ITAUP ),
2439 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2440 *
2441 * Multiply right vectors bidiagonalizing L by Q in VT
2442 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2443 *
2444 CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2445 $ WORK( ITAUP ), VT, LDVT,
2446 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2447 IWORK = IE + M
2448 *
2449 * Perform bidiagonal QR iteration, computing right
2450 * singular vectors of A in VT
2451 * (Workspace: need BDSPAC)
2452 *
2453 CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2454 $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2455 $ INFO )
2456 *
2457 END IF
2458 *
2459 ELSE IF( WNTUO ) THEN
2460 *
2461 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2462 * M right singular vectors to be computed in VT and
2463 * M left singular vectors to be overwritten on A
2464 *
2465 IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2466 *
2467 * Sufficient workspace for a fast algorithm
2468 *
2469 IU = 1
2470 IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2471 *
2472 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2473 *
2474 LDWRKU = LDA
2475 IR = IU + LDWRKU*M
2476 LDWRKR = LDA
2477 ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2478 *
2479 * WORK(IU) is LDA by M and WORK(IR) is M by M
2480 *
2481 LDWRKU = LDA
2482 IR = IU + LDWRKU*M
2483 LDWRKR = M
2484 ELSE
2485 *
2486 * WORK(IU) is M by M and WORK(IR) is M by M
2487 *
2488 LDWRKU = M
2489 IR = IU + LDWRKU*M
2490 LDWRKR = M
2491 END IF
2492 ITAU = IR + LDWRKR*M
2493 IWORK = ITAU + M
2494 *
2495 * Compute A=L*Q
2496 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2497 *
2498 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2499 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2500 *
2501 * Copy L to WORK(IU), zeroing out below it
2502 *
2503 CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2504 $ LDWRKU )
2505 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2506 $ WORK( IU+LDWRKU ), LDWRKU )
2507 *
2508 * Generate Q in A
2509 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2510 *
2511 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2512 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2513 IE = ITAU
2514 ITAUQ = IE + M
2515 ITAUP = ITAUQ + M
2516 IWORK = ITAUP + M
2517 *
2518 * Bidiagonalize L in WORK(IU), copying result to
2519 * WORK(IR)
2520 * (Workspace: need 2*M*M+4*M,
2521 * prefer 2*M*M+3*M+2*M*NB)
2522 *
2523 CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2524 $ WORK( IE ), WORK( ITAUQ ),
2525 $ WORK( ITAUP ), WORK( IWORK ),
2526 $ LWORK-IWORK+1, IERR )
2527 CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2528 $ WORK( IR ), LDWRKR )
2529 *
2530 * Generate right bidiagonalizing vectors in WORK(IU)
2531 * (Workspace: need 2*M*M+4*M-1,
2532 * prefer 2*M*M+3*M+(M-1)*NB)
2533 *
2534 CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2535 $ WORK( ITAUP ), WORK( IWORK ),
2536 $ LWORK-IWORK+1, IERR )
2537 *
2538 * Generate left bidiagonalizing vectors in WORK(IR)
2539 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2540 *
2541 CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2542 $ WORK( ITAUQ ), WORK( IWORK ),
2543 $ LWORK-IWORK+1, IERR )
2544 IWORK = IE + M
2545 *
2546 * Perform bidiagonal QR iteration, computing left
2547 * singular vectors of L in WORK(IR) and computing
2548 * right singular vectors of L in WORK(IU)
2549 * (Workspace: need 2*M*M+BDSPAC)
2550 *
2551 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2552 $ WORK( IU ), LDWRKU, WORK( IR ),
2553 $ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2554 *
2555 * Multiply right singular vectors of L in WORK(IU) by
2556 * Q in A, storing result in VT
2557 * (Workspace: need M*M)
2558 *
2559 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2560 $ LDWRKU, A, LDA, ZERO, VT, LDVT )
2561 *
2562 * Copy left singular vectors of L to A
2563 * (Workspace: need M*M)
2564 *
2565 CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2566 $ LDA )
2567 *
2568 ELSE
2569 *
2570 * Insufficient workspace for a fast algorithm
2571 *
2572 ITAU = 1
2573 IWORK = ITAU + M
2574 *
2575 * Compute A=L*Q, copying result to VT
2576 * (Workspace: need 2*M, prefer M+M*NB)
2577 *
2578 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2579 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2580 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2581 *
2582 * Generate Q in VT
2583 * (Workspace: need 2*M, prefer M+M*NB)
2584 *
2585 CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2586 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2587 IE = ITAU
2588 ITAUQ = IE + M
2589 ITAUP = ITAUQ + M
2590 IWORK = ITAUP + M
2591 *
2592 * Zero out above L in A
2593 *
2594 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2595 $ LDA )
2596 *
2597 * Bidiagonalize L in A
2598 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2599 *
2600 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2601 $ WORK( ITAUQ ), WORK( ITAUP ),
2602 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2603 *
2604 * Multiply right vectors bidiagonalizing L by Q in VT
2605 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2606 *
2607 CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2608 $ WORK( ITAUP ), VT, LDVT,
2609 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2610 *
2611 * Generate left bidiagonalizing vectors of L in A
2612 * (Workspace: need 4*M, prefer 3*M+M*NB)
2613 *
2614 CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2615 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2616 IWORK = IE + M
2617 *
2618 * Perform bidiagonal QR iteration, compute left
2619 * singular vectors of A in A and compute right
2620 * singular vectors of A in VT
2621 * (Workspace: need BDSPAC)
2622 *
2623 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2624 $ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2625 $ INFO )
2626 *
2627 END IF
2628 *
2629 ELSE IF( WNTUAS ) THEN
2630 *
2631 * Path 6t(N much larger than M, JOBU='S' or 'A',
2632 * JOBVT='S')
2633 * M right singular vectors to be computed in VT and
2634 * M left singular vectors to be computed in U
2635 *
2636 IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2637 *
2638 * Sufficient workspace for a fast algorithm
2639 *
2640 IU = 1
2641 IF( LWORK.GE.WRKBL+LDA*M ) THEN
2642 *
2643 * WORK(IU) is LDA by N
2644 *
2645 LDWRKU = LDA
2646 ELSE
2647 *
2648 * WORK(IU) is LDA by M
2649 *
2650 LDWRKU = M
2651 END IF
2652 ITAU = IU + LDWRKU*M
2653 IWORK = ITAU + M
2654 *
2655 * Compute A=L*Q
2656 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2657 *
2658 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2659 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2660 *
2661 * Copy L to WORK(IU), zeroing out above it
2662 *
2663 CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2664 $ LDWRKU )
2665 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2666 $ WORK( IU+LDWRKU ), LDWRKU )
2667 *
2668 * Generate Q in A
2669 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2670 *
2671 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2672 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2673 IE = ITAU
2674 ITAUQ = IE + M
2675 ITAUP = ITAUQ + M
2676 IWORK = ITAUP + M
2677 *
2678 * Bidiagonalize L in WORK(IU), copying result to U
2679 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2680 *
2681 CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2682 $ WORK( IE ), WORK( ITAUQ ),
2683 $ WORK( ITAUP ), WORK( IWORK ),
2684 $ LWORK-IWORK+1, IERR )
2685 CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2686 $ LDU )
2687 *
2688 * Generate right bidiagonalizing vectors in WORK(IU)
2689 * (Workspace: need M*M+4*M-1,
2690 * prefer M*M+3*M+(M-1)*NB)
2691 *
2692 CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2693 $ WORK( ITAUP ), WORK( IWORK ),
2694 $ LWORK-IWORK+1, IERR )
2695 *
2696 * Generate left bidiagonalizing vectors in U
2697 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2698 *
2699 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2700 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2701 IWORK = IE + M
2702 *
2703 * Perform bidiagonal QR iteration, computing left
2704 * singular vectors of L in U and computing right
2705 * singular vectors of L in WORK(IU)
2706 * (Workspace: need M*M+BDSPAC)
2707 *
2708 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2709 $ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2710 $ WORK( IWORK ), INFO )
2711 *
2712 * Multiply right singular vectors of L in WORK(IU) by
2713 * Q in A, storing result in VT
2714 * (Workspace: need M*M)
2715 *
2716 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2717 $ LDWRKU, A, LDA, ZERO, VT, LDVT )
2718 *
2719 ELSE
2720 *
2721 * Insufficient workspace for a fast algorithm
2722 *
2723 ITAU = 1
2724 IWORK = ITAU + M
2725 *
2726 * Compute A=L*Q, copying result to VT
2727 * (Workspace: need 2*M, prefer M+M*NB)
2728 *
2729 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2730 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2731 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2732 *
2733 * Generate Q in VT
2734 * (Workspace: need 2*M, prefer M+M*NB)
2735 *
2736 CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2737 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2738 *
2739 * Copy L to U, zeroing out above it
2740 *
2741 CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2742 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2743 $ LDU )
2744 IE = ITAU
2745 ITAUQ = IE + M
2746 ITAUP = ITAUQ + M
2747 IWORK = ITAUP + M
2748 *
2749 * Bidiagonalize L in U
2750 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2751 *
2752 CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2753 $ WORK( ITAUQ ), WORK( ITAUP ),
2754 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2755 *
2756 * Multiply right bidiagonalizing vectors in U by Q
2757 * in VT
2758 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2759 *
2760 CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2761 $ WORK( ITAUP ), VT, LDVT,
2762 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2763 *
2764 * Generate left bidiagonalizing vectors in U
2765 * (Workspace: need 4*M, prefer 3*M+M*NB)
2766 *
2767 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2768 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2769 IWORK = IE + M
2770 *
2771 * Perform bidiagonal QR iteration, computing left
2772 * singular vectors of A in U and computing right
2773 * singular vectors of A in VT
2774 * (Workspace: need BDSPAC)
2775 *
2776 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2777 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2778 $ INFO )
2779 *
2780 END IF
2781 *
2782 END IF
2783 *
2784 ELSE IF( WNTVA ) THEN
2785 *
2786 IF( WNTUN ) THEN
2787 *
2788 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2789 * N right singular vectors to be computed in VT and
2790 * no left singular vectors to be computed
2791 *
2792 IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2793 *
2794 * Sufficient workspace for a fast algorithm
2795 *
2796 IR = 1
2797 IF( LWORK.GE.WRKBL+LDA*M ) THEN
2798 *
2799 * WORK(IR) is LDA by M
2800 *
2801 LDWRKR = LDA
2802 ELSE
2803 *
2804 * WORK(IR) is M by M
2805 *
2806 LDWRKR = M
2807 END IF
2808 ITAU = IR + LDWRKR*M
2809 IWORK = ITAU + M
2810 *
2811 * Compute A=L*Q, copying result to VT
2812 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2813 *
2814 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2815 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2816 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2817 *
2818 * Copy L to WORK(IR), zeroing out above it
2819 *
2820 CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2821 $ LDWRKR )
2822 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2823 $ WORK( IR+LDWRKR ), LDWRKR )
2824 *
2825 * Generate Q in VT
2826 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2827 *
2828 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2829 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2830 IE = ITAU
2831 ITAUQ = IE + M
2832 ITAUP = ITAUQ + M
2833 IWORK = ITAUP + M
2834 *
2835 * Bidiagonalize L in WORK(IR)
2836 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2837 *
2838 CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2839 $ WORK( IE ), WORK( ITAUQ ),
2840 $ WORK( ITAUP ), WORK( IWORK ),
2841 $ LWORK-IWORK+1, IERR )
2842 *
2843 * Generate right bidiagonalizing vectors in WORK(IR)
2844 * (Workspace: need M*M+4*M-1,
2845 * prefer M*M+3*M+(M-1)*NB)
2846 *
2847 CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2848 $ WORK( ITAUP ), WORK( IWORK ),
2849 $ LWORK-IWORK+1, IERR )
2850 IWORK = IE + M
2851 *
2852 * Perform bidiagonal QR iteration, computing right
2853 * singular vectors of L in WORK(IR)
2854 * (Workspace: need M*M+BDSPAC)
2855 *
2856 CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2857 $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2858 $ WORK( IWORK ), INFO )
2859 *
2860 * Multiply right singular vectors of L in WORK(IR) by
2861 * Q in VT, storing result in A
2862 * (Workspace: need M*M)
2863 *
2864 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2865 $ LDWRKR, VT, LDVT, ZERO, A, LDA )
2866 *
2867 * Copy right singular vectors of A from A to VT
2868 *
2869 CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
2870 *
2871 ELSE
2872 *
2873 * Insufficient workspace for a fast algorithm
2874 *
2875 ITAU = 1
2876 IWORK = ITAU + M
2877 *
2878 * Compute A=L*Q, copying result to VT
2879 * (Workspace: need 2*M, prefer M+M*NB)
2880 *
2881 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2882 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2883 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2884 *
2885 * Generate Q in VT
2886 * (Workspace: need M+N, prefer M+N*NB)
2887 *
2888 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2889 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2890 IE = ITAU
2891 ITAUQ = IE + M
2892 ITAUP = ITAUQ + M
2893 IWORK = ITAUP + M
2894 *
2895 * Zero out above L in A
2896 *
2897 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2898 $ LDA )
2899 *
2900 * Bidiagonalize L in A
2901 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2902 *
2903 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2904 $ WORK( ITAUQ ), WORK( ITAUP ),
2905 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2906 *
2907 * Multiply right bidiagonalizing vectors in A by Q
2908 * in VT
2909 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2910 *
2911 CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2912 $ WORK( ITAUP ), VT, LDVT,
2913 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2914 IWORK = IE + M
2915 *
2916 * Perform bidiagonal QR iteration, computing right
2917 * singular vectors of A in VT
2918 * (Workspace: need BDSPAC)
2919 *
2920 CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2921 $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2922 $ INFO )
2923 *
2924 END IF
2925 *
2926 ELSE IF( WNTUO ) THEN
2927 *
2928 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
2929 * N right singular vectors to be computed in VT and
2930 * M left singular vectors to be overwritten on A
2931 *
2932 IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2933 *
2934 * Sufficient workspace for a fast algorithm
2935 *
2936 IU = 1
2937 IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2938 *
2939 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2940 *
2941 LDWRKU = LDA
2942 IR = IU + LDWRKU*M
2943 LDWRKR = LDA
2944 ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2945 *
2946 * WORK(IU) is LDA by M and WORK(IR) is M by M
2947 *
2948 LDWRKU = LDA
2949 IR = IU + LDWRKU*M
2950 LDWRKR = M
2951 ELSE
2952 *
2953 * WORK(IU) is M by M and WORK(IR) is M by M
2954 *
2955 LDWRKU = M
2956 IR = IU + LDWRKU*M
2957 LDWRKR = M
2958 END IF
2959 ITAU = IR + LDWRKR*M
2960 IWORK = ITAU + M
2961 *
2962 * Compute A=L*Q, copying result to VT
2963 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2964 *
2965 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2966 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2967 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2968 *
2969 * Generate Q in VT
2970 * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
2971 *
2972 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2973 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2974 *
2975 * Copy L to WORK(IU), zeroing out above it
2976 *
2977 CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2978 $ LDWRKU )
2979 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2980 $ WORK( IU+LDWRKU ), LDWRKU )
2981 IE = ITAU
2982 ITAUQ = IE + M
2983 ITAUP = ITAUQ + M
2984 IWORK = ITAUP + M
2985 *
2986 * Bidiagonalize L in WORK(IU), copying result to
2987 * WORK(IR)
2988 * (Workspace: need 2*M*M+4*M,
2989 * prefer 2*M*M+3*M+2*M*NB)
2990 *
2991 CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2992 $ WORK( IE ), WORK( ITAUQ ),
2993 $ WORK( ITAUP ), WORK( IWORK ),
2994 $ LWORK-IWORK+1, IERR )
2995 CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2996 $ WORK( IR ), LDWRKR )
2997 *
2998 * Generate right bidiagonalizing vectors in WORK(IU)
2999 * (Workspace: need 2*M*M+4*M-1,
3000 * prefer 2*M*M+3*M+(M-1)*NB)
3001 *
3002 CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3003 $ WORK( ITAUP ), WORK( IWORK ),
3004 $ LWORK-IWORK+1, IERR )
3005 *
3006 * Generate left bidiagonalizing vectors in WORK(IR)
3007 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3008 *
3009 CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3010 $ WORK( ITAUQ ), WORK( IWORK ),
3011 $ LWORK-IWORK+1, IERR )
3012 IWORK = IE + M
3013 *
3014 * Perform bidiagonal QR iteration, computing left
3015 * singular vectors of L in WORK(IR) and computing
3016 * right singular vectors of L in WORK(IU)
3017 * (Workspace: need 2*M*M+BDSPAC)
3018 *
3019 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3020 $ WORK( IU ), LDWRKU, WORK( IR ),
3021 $ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3022 *
3023 * Multiply right singular vectors of L in WORK(IU) by
3024 * Q in VT, storing result in A
3025 * (Workspace: need M*M)
3026 *
3027 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3028 $ LDWRKU, VT, LDVT, ZERO, A, LDA )
3029 *
3030 * Copy right singular vectors of A from A to VT
3031 *
3032 CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3033 *
3034 * Copy left singular vectors of A from WORK(IR) to A
3035 *
3036 CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3037 $ LDA )
3038 *
3039 ELSE
3040 *
3041 * Insufficient workspace for a fast algorithm
3042 *
3043 ITAU = 1
3044 IWORK = ITAU + M
3045 *
3046 * Compute A=L*Q, copying result to VT
3047 * (Workspace: need 2*M, prefer M+M*NB)
3048 *
3049 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3050 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3051 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3052 *
3053 * Generate Q in VT
3054 * (Workspace: need M+N, prefer M+N*NB)
3055 *
3056 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3057 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3058 IE = ITAU
3059 ITAUQ = IE + M
3060 ITAUP = ITAUQ + M
3061 IWORK = ITAUP + M
3062 *
3063 * Zero out above L in A
3064 *
3065 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3066 $ LDA )
3067 *
3068 * Bidiagonalize L in A
3069 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3070 *
3071 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3072 $ WORK( ITAUQ ), WORK( ITAUP ),
3073 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3074 *
3075 * Multiply right bidiagonalizing vectors in A by Q
3076 * in VT
3077 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3078 *
3079 CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3080 $ WORK( ITAUP ), VT, LDVT,
3081 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3082 *
3083 * Generate left bidiagonalizing vectors in A
3084 * (Workspace: need 4*M, prefer 3*M+M*NB)
3085 *
3086 CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3087 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3088 IWORK = IE + M
3089 *
3090 * Perform bidiagonal QR iteration, computing left
3091 * singular vectors of A in A and computing right
3092 * singular vectors of A in VT
3093 * (Workspace: need BDSPAC)
3094 *
3095 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3096 $ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3097 $ INFO )
3098 *
3099 END IF
3100 *
3101 ELSE IF( WNTUAS ) THEN
3102 *
3103 * Path 9t(N much larger than M, JOBU='S' or 'A',
3104 * JOBVT='A')
3105 * N right singular vectors to be computed in VT and
3106 * M left singular vectors to be computed in U
3107 *
3108 IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3109 *
3110 * Sufficient workspace for a fast algorithm
3111 *
3112 IU = 1
3113 IF( LWORK.GE.WRKBL+LDA*M ) THEN
3114 *
3115 * WORK(IU) is LDA by M
3116 *
3117 LDWRKU = LDA
3118 ELSE
3119 *
3120 * WORK(IU) is M by M
3121 *
3122 LDWRKU = M
3123 END IF
3124 ITAU = IU + LDWRKU*M
3125 IWORK = ITAU + M
3126 *
3127 * Compute A=L*Q, copying result to VT
3128 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3129 *
3130 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3131 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3132 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3133 *
3134 * Generate Q in VT
3135 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3136 *
3137 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3138 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3139 *
3140 * Copy L to WORK(IU), zeroing out above it
3141 *
3142 CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3143 $ LDWRKU )
3144 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3145 $ WORK( IU+LDWRKU ), LDWRKU )
3146 IE = ITAU
3147 ITAUQ = IE + M
3148 ITAUP = ITAUQ + M
3149 IWORK = ITAUP + M
3150 *
3151 * Bidiagonalize L in WORK(IU), copying result to U
3152 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3153 *
3154 CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3155 $ WORK( IE ), WORK( ITAUQ ),
3156 $ WORK( ITAUP ), WORK( IWORK ),
3157 $ LWORK-IWORK+1, IERR )
3158 CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3159 $ LDU )
3160 *
3161 * Generate right bidiagonalizing vectors in WORK(IU)
3162 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3163 *
3164 CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3165 $ WORK( ITAUP ), WORK( IWORK ),
3166 $ LWORK-IWORK+1, IERR )
3167 *
3168 * Generate left bidiagonalizing vectors in U
3169 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3170 *
3171 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3172 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3173 IWORK = IE + M
3174 *
3175 * Perform bidiagonal QR iteration, computing left
3176 * singular vectors of L in U and computing right
3177 * singular vectors of L in WORK(IU)
3178 * (Workspace: need M*M+BDSPAC)
3179 *
3180 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3181 $ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3182 $ WORK( IWORK ), INFO )
3183 *
3184 * Multiply right singular vectors of L in WORK(IU) by
3185 * Q in VT, storing result in A
3186 * (Workspace: need M*M)
3187 *
3188 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3189 $ LDWRKU, VT, LDVT, ZERO, A, LDA )
3190 *
3191 * Copy right singular vectors of A from A to VT
3192 *
3193 CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3194 *
3195 ELSE
3196 *
3197 * Insufficient workspace for a fast algorithm
3198 *
3199 ITAU = 1
3200 IWORK = ITAU + M
3201 *
3202 * Compute A=L*Q, copying result to VT
3203 * (Workspace: need 2*M, prefer M+M*NB)
3204 *
3205 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3206 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3207 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3208 *
3209 * Generate Q in VT
3210 * (Workspace: need M+N, prefer M+N*NB)
3211 *
3212 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3213 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3214 *
3215 * Copy L to U, zeroing out above it
3216 *
3217 CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3218 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3219 $ LDU )
3220 IE = ITAU
3221 ITAUQ = IE + M
3222 ITAUP = ITAUQ + M
3223 IWORK = ITAUP + M
3224 *
3225 * Bidiagonalize L in U
3226 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3227 *
3228 CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
3229 $ WORK( ITAUQ ), WORK( ITAUP ),
3230 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3231 *
3232 * Multiply right bidiagonalizing vectors in U by Q
3233 * in VT
3234 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3235 *
3236 CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3237 $ WORK( ITAUP ), VT, LDVT,
3238 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3239 *
3240 * Generate left bidiagonalizing vectors in U
3241 * (Workspace: need 4*M, prefer 3*M+M*NB)
3242 *
3243 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3244 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3245 IWORK = IE + M
3246 *
3247 * Perform bidiagonal QR iteration, computing left
3248 * singular vectors of A in U and computing right
3249 * singular vectors of A in VT
3250 * (Workspace: need BDSPAC)
3251 *
3252 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3253 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3254 $ INFO )
3255 *
3256 END IF
3257 *
3258 END IF
3259 *
3260 END IF
3261 *
3262 ELSE
3263 *
3264 * N .LT. MNTHR
3265 *
3266 * Path 10t(N greater than M, but not much larger)
3267 * Reduce to bidiagonal form without LQ decomposition
3268 *
3269 IE = 1
3270 ITAUQ = IE + M
3271 ITAUP = ITAUQ + M
3272 IWORK = ITAUP + M
3273 *
3274 * Bidiagonalize A
3275 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3276 *
3277 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3278 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3279 $ IERR )
3280 IF( WNTUAS ) THEN
3281 *
3282 * If left singular vectors desired in U, copy result to U
3283 * and generate left bidiagonalizing vectors in U
3284 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3285 *
3286 CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3287 CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3288 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3289 END IF
3290 IF( WNTVAS ) THEN
3291 *
3292 * If right singular vectors desired in VT, copy result to
3293 * VT and generate right bidiagonalizing vectors in VT
3294 * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3295 *
3296 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3297 IF( WNTVA )
3298 $ NRVT = N
3299 IF( WNTVS )
3300 $ NRVT = M
3301 CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3302 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3303 END IF
3304 IF( WNTUO ) THEN
3305 *
3306 * If left singular vectors desired in A, generate left
3307 * bidiagonalizing vectors in A
3308 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3309 *
3310 CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3311 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3312 END IF
3313 IF( WNTVO ) THEN
3314 *
3315 * If right singular vectors desired in A, generate right
3316 * bidiagonalizing vectors in A
3317 * (Workspace: need 4*M, prefer 3*M+M*NB)
3318 *
3319 CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3320 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3321 END IF
3322 IWORK = IE + M
3323 IF( WNTUAS .OR. WNTUO )
3324 $ NRU = M
3325 IF( WNTUN )
3326 $ NRU = 0
3327 IF( WNTVAS .OR. WNTVO )
3328 $ NCVT = N
3329 IF( WNTVN )
3330 $ NCVT = 0
3331 IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3332 *
3333 * Perform bidiagonal QR iteration, if desired, computing
3334 * left singular vectors in U and computing right singular
3335 * vectors in VT
3336 * (Workspace: need BDSPAC)
3337 *
3338 CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3339 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3340 ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3341 *
3342 * Perform bidiagonal QR iteration, if desired, computing
3343 * left singular vectors in U and computing right singular
3344 * vectors in A
3345 * (Workspace: need BDSPAC)
3346 *
3347 CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3348 $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
3349 ELSE
3350 *
3351 * Perform bidiagonal QR iteration, if desired, computing
3352 * left singular vectors in A and computing right singular
3353 * vectors in VT
3354 * (Workspace: need BDSPAC)
3355 *
3356 CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3357 $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3358 END IF
3359 *
3360 END IF
3361 *
3362 END IF
3363 *
3364 * If DBDSQR failed to converge, copy unconverged superdiagonals
3365 * to WORK( 2:MINMN )
3366 *
3367 IF( INFO.NE.0 ) THEN
3368 IF( IE.GT.2 ) THEN
3369 DO 50 I = 1, MINMN - 1
3370 WORK( I+1 ) = WORK( I+IE-1 )
3371 50 CONTINUE
3372 END IF
3373 IF( IE.LT.2 ) THEN
3374 DO 60 I = MINMN - 1, 1, -1
3375 WORK( I+1 ) = WORK( I+IE-1 )
3376 60 CONTINUE
3377 END IF
3378 END IF
3379 *
3380 * Undo scaling if necessary
3381 *
3382 IF( ISCL.EQ.1 ) THEN
3383 IF( ANRM.GT.BIGNUM )
3384 $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3385 $ IERR )
3386 IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3387 $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3388 $ MINMN, IERR )
3389 IF( ANRM.LT.SMLNUM )
3390 $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3391 $ IERR )
3392 IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3393 $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3394 $ MINMN, IERR )
3395 END IF
3396 *
3397 * Return optimal workspace in WORK(1)
3398 *
3399 WORK( 1 ) = MAXWRK
3400 *
3401 RETURN
3402 *
3403 * End of DGESVD
3404 *
3405 END
2 $ WORK, LWORK, INFO )
3 *
4 * -- LAPACK driver routine (version 3.3.1) --
5 * -- LAPACK is a software package provided by Univ. of Tennessee, --
6 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
7 * -- April 2011 --
8 *
9 * .. Scalar Arguments ..
10 CHARACTER JOBU, JOBVT
11 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
12 * ..
13 * .. Array Arguments ..
14 DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
15 $ VT( LDVT, * ), WORK( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * DGESVD computes the singular value decomposition (SVD) of a real
22 * M-by-N matrix A, optionally computing the left and/or right singular
23 * vectors. The SVD is written
24 *
25 * A = U * SIGMA * transpose(V)
26 *
27 * where SIGMA is an M-by-N matrix which is zero except for its
28 * min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
29 * V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
30 * are the singular values of A; they are real and non-negative, and
31 * are returned in descending order. The first min(m,n) columns of
32 * U and V are the left and right singular vectors of A.
33 *
34 * Note that the routine returns V**T, not V.
35 *
36 * Arguments
37 * =========
38 *
39 * JOBU (input) CHARACTER*1
40 * Specifies options for computing all or part of the matrix U:
41 * = 'A': all M columns of U are returned in array U:
42 * = 'S': the first min(m,n) columns of U (the left singular
43 * vectors) are returned in the array U;
44 * = 'O': the first min(m,n) columns of U (the left singular
45 * vectors) are overwritten on the array A;
46 * = 'N': no columns of U (no left singular vectors) are
47 * computed.
48 *
49 * JOBVT (input) CHARACTER*1
50 * Specifies options for computing all or part of the matrix
51 * V**T:
52 * = 'A': all N rows of V**T are returned in the array VT;
53 * = 'S': the first min(m,n) rows of V**T (the right singular
54 * vectors) are returned in the array VT;
55 * = 'O': the first min(m,n) rows of V**T (the right singular
56 * vectors) are overwritten on the array A;
57 * = 'N': no rows of V**T (no right singular vectors) are
58 * computed.
59 *
60 * JOBVT and JOBU cannot both be 'O'.
61 *
62 * M (input) INTEGER
63 * The number of rows of the input matrix A. M >= 0.
64 *
65 * N (input) INTEGER
66 * The number of columns of the input matrix A. N >= 0.
67 *
68 * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
69 * On entry, the M-by-N matrix A.
70 * On exit,
71 * if JOBU = 'O', A is overwritten with the first min(m,n)
72 * columns of U (the left singular vectors,
73 * stored columnwise);
74 * if JOBVT = 'O', A is overwritten with the first min(m,n)
75 * rows of V**T (the right singular vectors,
76 * stored rowwise);
77 * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
78 * are destroyed.
79 *
80 * LDA (input) INTEGER
81 * The leading dimension of the array A. LDA >= max(1,M).
82 *
83 * S (output) DOUBLE PRECISION array, dimension (min(M,N))
84 * The singular values of A, sorted so that S(i) >= S(i+1).
85 *
86 * U (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
87 * (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
88 * If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
89 * if JOBU = 'S', U contains the first min(m,n) columns of U
90 * (the left singular vectors, stored columnwise);
91 * if JOBU = 'N' or 'O', U is not referenced.
92 *
93 * LDU (input) INTEGER
94 * The leading dimension of the array U. LDU >= 1; if
95 * JOBU = 'S' or 'A', LDU >= M.
96 *
97 * VT (output) DOUBLE PRECISION array, dimension (LDVT,N)
98 * If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
99 * V**T;
100 * if JOBVT = 'S', VT contains the first min(m,n) rows of
101 * V**T (the right singular vectors, stored rowwise);
102 * if JOBVT = 'N' or 'O', VT is not referenced.
103 *
104 * LDVT (input) INTEGER
105 * The leading dimension of the array VT. LDVT >= 1; if
106 * JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
107 *
108 * WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
109 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
110 * if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
111 * superdiagonal elements of an upper bidiagonal matrix B
112 * whose diagonal is in S (not necessarily sorted). B
113 * satisfies A = U * B * VT, so it has the same singular values
114 * as A, and singular vectors related by U and VT.
115 *
116 * LWORK (input) INTEGER
117 * The dimension of the array WORK.
118 * LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
119 * - PATH 1 (M much larger than N, JOBU='N')
120 * - PATH 1t (N much larger than M, JOBVT='N')
121 * LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
122 * For good performance, LWORK should generally be larger.
123 *
124 * If LWORK = -1, then a workspace query is assumed; the routine
125 * only calculates the optimal size of the WORK array, returns
126 * this value as the first entry of the WORK array, and no error
127 * message related to LWORK is issued by XERBLA.
128 *
129 * INFO (output) INTEGER
130 * = 0: successful exit.
131 * < 0: if INFO = -i, the i-th argument had an illegal value.
132 * > 0: if DBDSQR did not converge, INFO specifies how many
133 * superdiagonals of an intermediate bidiagonal form B
134 * did not converge to zero. See the description of WORK
135 * above for details.
136 *
137 * =====================================================================
138 *
139 * .. Parameters ..
140 DOUBLE PRECISION ZERO, ONE
141 PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
142 * ..
143 * .. Local Scalars ..
144 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
145 $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
146 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
147 $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
148 $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
149 $ NRVT, WRKBL
150 DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
151 * ..
152 * .. Local Arrays ..
153 DOUBLE PRECISION DUM( 1 )
154 * ..
155 * .. External Subroutines ..
156 EXTERNAL DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
157 $ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
158 $ XERBLA
159 * ..
160 * .. External Functions ..
161 LOGICAL LSAME
162 INTEGER ILAENV
163 DOUBLE PRECISION DLAMCH, DLANGE
164 EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE
165 * ..
166 * .. Intrinsic Functions ..
167 INTRINSIC MAX, MIN, SQRT
168 * ..
169 * .. Executable Statements ..
170 *
171 * Test the input arguments
172 *
173 INFO = 0
174 MINMN = MIN( M, N )
175 WNTUA = LSAME( JOBU, 'A' )
176 WNTUS = LSAME( JOBU, 'S' )
177 WNTUAS = WNTUA .OR. WNTUS
178 WNTUO = LSAME( JOBU, 'O' )
179 WNTUN = LSAME( JOBU, 'N' )
180 WNTVA = LSAME( JOBVT, 'A' )
181 WNTVS = LSAME( JOBVT, 'S' )
182 WNTVAS = WNTVA .OR. WNTVS
183 WNTVO = LSAME( JOBVT, 'O' )
184 WNTVN = LSAME( JOBVT, 'N' )
185 LQUERY = ( LWORK.EQ.-1 )
186 *
187 IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
188 INFO = -1
189 ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
190 $ ( WNTVO .AND. WNTUO ) ) THEN
191 INFO = -2
192 ELSE IF( M.LT.0 ) THEN
193 INFO = -3
194 ELSE IF( N.LT.0 ) THEN
195 INFO = -4
196 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
197 INFO = -6
198 ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
199 INFO = -9
200 ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
201 $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
202 INFO = -11
203 END IF
204 *
205 * Compute workspace
206 * (Note: Comments in the code beginning "Workspace:" describe the
207 * minimal amount of workspace needed at that point in the code,
208 * as well as the preferred amount for good performance.
209 * NB refers to the optimal block size for the immediately
210 * following subroutine, as returned by ILAENV.)
211 *
212 IF( INFO.EQ.0 ) THEN
213 MINWRK = 1
214 MAXWRK = 1
215 IF( M.GE.N .AND. MINMN.GT.0 ) THEN
216 *
217 * Compute space needed for DBDSQR
218 *
219 MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
220 BDSPAC = 5*N
221 IF( M.GE.MNTHR ) THEN
222 IF( WNTUN ) THEN
223 *
224 * Path 1 (M much larger than N, JOBU='N')
225 *
226 MAXWRK = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1,
227 $ -1 )
228 MAXWRK = MAX( MAXWRK, 3*N+2*N*
229 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
230 IF( WNTVO .OR. WNTVAS )
231 $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
232 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
233 MAXWRK = MAX( MAXWRK, BDSPAC )
234 MINWRK = MAX( 4*N, BDSPAC )
235 ELSE IF( WNTUO .AND. WNTVN ) THEN
236 *
237 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
238 *
239 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
240 WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
241 $ N, N, -1 ) )
242 WRKBL = MAX( WRKBL, 3*N+2*N*
243 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
244 WRKBL = MAX( WRKBL, 3*N+N*
245 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
246 WRKBL = MAX( WRKBL, BDSPAC )
247 MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
248 MINWRK = MAX( 3*N+M, BDSPAC )
249 ELSE IF( WNTUO .AND. WNTVAS ) THEN
250 *
251 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
252 * 'A')
253 *
254 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
255 WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
256 $ N, N, -1 ) )
257 WRKBL = MAX( WRKBL, 3*N+2*N*
258 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
259 WRKBL = MAX( WRKBL, 3*N+N*
260 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
261 WRKBL = MAX( WRKBL, 3*N+( N-1 )*
262 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
263 WRKBL = MAX( WRKBL, BDSPAC )
264 MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
265 MINWRK = MAX( 3*N+M, BDSPAC )
266 ELSE IF( WNTUS .AND. WNTVN ) THEN
267 *
268 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
269 *
270 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
271 WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
272 $ N, N, -1 ) )
273 WRKBL = MAX( WRKBL, 3*N+2*N*
274 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
275 WRKBL = MAX( WRKBL, 3*N+N*
276 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
277 WRKBL = MAX( WRKBL, BDSPAC )
278 MAXWRK = N*N + WRKBL
279 MINWRK = MAX( 3*N+M, BDSPAC )
280 ELSE IF( WNTUS .AND. WNTVO ) THEN
281 *
282 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
283 *
284 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
285 WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
286 $ N, N, -1 ) )
287 WRKBL = MAX( WRKBL, 3*N+2*N*
288 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
289 WRKBL = MAX( WRKBL, 3*N+N*
290 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
291 WRKBL = MAX( WRKBL, 3*N+( N-1 )*
292 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
293 WRKBL = MAX( WRKBL, BDSPAC )
294 MAXWRK = 2*N*N + WRKBL
295 MINWRK = MAX( 3*N+M, BDSPAC )
296 ELSE IF( WNTUS .AND. WNTVAS ) THEN
297 *
298 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
299 * 'A')
300 *
301 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
302 WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
303 $ N, N, -1 ) )
304 WRKBL = MAX( WRKBL, 3*N+2*N*
305 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
306 WRKBL = MAX( WRKBL, 3*N+N*
307 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
308 WRKBL = MAX( WRKBL, 3*N+( N-1 )*
309 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
310 WRKBL = MAX( WRKBL, BDSPAC )
311 MAXWRK = N*N + WRKBL
312 MINWRK = MAX( 3*N+M, BDSPAC )
313 ELSE IF( WNTUA .AND. WNTVN ) THEN
314 *
315 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
316 *
317 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
318 WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
319 $ M, N, -1 ) )
320 WRKBL = MAX( WRKBL, 3*N+2*N*
321 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
322 WRKBL = MAX( WRKBL, 3*N+N*
323 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
324 WRKBL = MAX( WRKBL, BDSPAC )
325 MAXWRK = N*N + WRKBL
326 MINWRK = MAX( 3*N+M, BDSPAC )
327 ELSE IF( WNTUA .AND. WNTVO ) THEN
328 *
329 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
330 *
331 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
332 WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
333 $ M, N, -1 ) )
334 WRKBL = MAX( WRKBL, 3*N+2*N*
335 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
336 WRKBL = MAX( WRKBL, 3*N+N*
337 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
338 WRKBL = MAX( WRKBL, 3*N+( N-1 )*
339 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
340 WRKBL = MAX( WRKBL, BDSPAC )
341 MAXWRK = 2*N*N + WRKBL
342 MINWRK = MAX( 3*N+M, BDSPAC )
343 ELSE IF( WNTUA .AND. WNTVAS ) THEN
344 *
345 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
346 * 'A')
347 *
348 WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
349 WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
350 $ M, N, -1 ) )
351 WRKBL = MAX( WRKBL, 3*N+2*N*
352 $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
353 WRKBL = MAX( WRKBL, 3*N+N*
354 $ ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
355 WRKBL = MAX( WRKBL, 3*N+( N-1 )*
356 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
357 WRKBL = MAX( WRKBL, BDSPAC )
358 MAXWRK = N*N + WRKBL
359 MINWRK = MAX( 3*N+M, BDSPAC )
360 END IF
361 ELSE
362 *
363 * Path 10 (M at least N, but not much larger)
364 *
365 MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
366 $ -1, -1 )
367 IF( WNTUS .OR. WNTUO )
368 $ MAXWRK = MAX( MAXWRK, 3*N+N*
369 $ ILAENV( 1, 'DORGBR', 'Q', M, N, N, -1 ) )
370 IF( WNTUA )
371 $ MAXWRK = MAX( MAXWRK, 3*N+M*
372 $ ILAENV( 1, 'DORGBR', 'Q', M, M, N, -1 ) )
373 IF( .NOT.WNTVN )
374 $ MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
375 $ ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
376 MAXWRK = MAX( MAXWRK, BDSPAC )
377 MINWRK = MAX( 3*N+M, BDSPAC )
378 END IF
379 ELSE IF( MINMN.GT.0 ) THEN
380 *
381 * Compute space needed for DBDSQR
382 *
383 MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
384 BDSPAC = 5*M
385 IF( N.GE.MNTHR ) THEN
386 IF( WNTVN ) THEN
387 *
388 * Path 1t(N much larger than M, JOBVT='N')
389 *
390 MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1,
391 $ -1 )
392 MAXWRK = MAX( MAXWRK, 3*M+2*M*
393 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
394 IF( WNTUO .OR. WNTUAS )
395 $ MAXWRK = MAX( MAXWRK, 3*M+M*
396 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
397 MAXWRK = MAX( MAXWRK, BDSPAC )
398 MINWRK = MAX( 4*M, BDSPAC )
399 ELSE IF( WNTVO .AND. WNTUN ) THEN
400 *
401 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
402 *
403 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
404 WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
405 $ N, M, -1 ) )
406 WRKBL = MAX( WRKBL, 3*M+2*M*
407 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
408 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
409 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
410 WRKBL = MAX( WRKBL, BDSPAC )
411 MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
412 MINWRK = MAX( 3*M+N, BDSPAC )
413 ELSE IF( WNTVO .AND. WNTUAS ) THEN
414 *
415 * Path 3t(N much larger than M, JOBU='S' or 'A',
416 * JOBVT='O')
417 *
418 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
419 WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
420 $ N, M, -1 ) )
421 WRKBL = MAX( WRKBL, 3*M+2*M*
422 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
423 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
424 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
425 WRKBL = MAX( WRKBL, 3*M+M*
426 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
427 WRKBL = MAX( WRKBL, BDSPAC )
428 MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
429 MINWRK = MAX( 3*M+N, BDSPAC )
430 ELSE IF( WNTVS .AND. WNTUN ) THEN
431 *
432 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
433 *
434 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
435 WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
436 $ N, M, -1 ) )
437 WRKBL = MAX( WRKBL, 3*M+2*M*
438 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
439 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
440 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
441 WRKBL = MAX( WRKBL, BDSPAC )
442 MAXWRK = M*M + WRKBL
443 MINWRK = MAX( 3*M+N, BDSPAC )
444 ELSE IF( WNTVS .AND. WNTUO ) THEN
445 *
446 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
447 *
448 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
449 WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
450 $ N, M, -1 ) )
451 WRKBL = MAX( WRKBL, 3*M+2*M*
452 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
453 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
454 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
455 WRKBL = MAX( WRKBL, 3*M+M*
456 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
457 WRKBL = MAX( WRKBL, BDSPAC )
458 MAXWRK = 2*M*M + WRKBL
459 MINWRK = MAX( 3*M+N, BDSPAC )
460 ELSE IF( WNTVS .AND. WNTUAS ) THEN
461 *
462 * Path 6t(N much larger than M, JOBU='S' or 'A',
463 * JOBVT='S')
464 *
465 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
466 WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
467 $ N, M, -1 ) )
468 WRKBL = MAX( WRKBL, 3*M+2*M*
469 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
470 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
471 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
472 WRKBL = MAX( WRKBL, 3*M+M*
473 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
474 WRKBL = MAX( WRKBL, BDSPAC )
475 MAXWRK = M*M + WRKBL
476 MINWRK = MAX( 3*M+N, BDSPAC )
477 ELSE IF( WNTVA .AND. WNTUN ) THEN
478 *
479 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
480 *
481 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
482 WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
483 $ N, M, -1 ) )
484 WRKBL = MAX( WRKBL, 3*M+2*M*
485 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
486 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
487 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
488 WRKBL = MAX( WRKBL, BDSPAC )
489 MAXWRK = M*M + WRKBL
490 MINWRK = MAX( 3*M+N, BDSPAC )
491 ELSE IF( WNTVA .AND. WNTUO ) THEN
492 *
493 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
494 *
495 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
496 WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
497 $ N, M, -1 ) )
498 WRKBL = MAX( WRKBL, 3*M+2*M*
499 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
500 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
501 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
502 WRKBL = MAX( WRKBL, 3*M+M*
503 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
504 WRKBL = MAX( WRKBL, BDSPAC )
505 MAXWRK = 2*M*M + WRKBL
506 MINWRK = MAX( 3*M+N, BDSPAC )
507 ELSE IF( WNTVA .AND. WNTUAS ) THEN
508 *
509 * Path 9t(N much larger than M, JOBU='S' or 'A',
510 * JOBVT='A')
511 *
512 WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
513 WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
514 $ N, M, -1 ) )
515 WRKBL = MAX( WRKBL, 3*M+2*M*
516 $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
517 WRKBL = MAX( WRKBL, 3*M+( M-1 )*
518 $ ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
519 WRKBL = MAX( WRKBL, 3*M+M*
520 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
521 WRKBL = MAX( WRKBL, BDSPAC )
522 MAXWRK = M*M + WRKBL
523 MINWRK = MAX( 3*M+N, BDSPAC )
524 END IF
525 ELSE
526 *
527 * Path 10t(N greater than M, but not much larger)
528 *
529 MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
530 $ -1, -1 )
531 IF( WNTVS .OR. WNTVO )
532 $ MAXWRK = MAX( MAXWRK, 3*M+M*
533 $ ILAENV( 1, 'DORGBR', 'P', M, N, M, -1 ) )
534 IF( WNTVA )
535 $ MAXWRK = MAX( MAXWRK, 3*M+N*
536 $ ILAENV( 1, 'DORGBR', 'P', N, N, M, -1 ) )
537 IF( .NOT.WNTUN )
538 $ MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
539 $ ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
540 MAXWRK = MAX( MAXWRK, BDSPAC )
541 MINWRK = MAX( 3*M+N, BDSPAC )
542 END IF
543 END IF
544 MAXWRK = MAX( MAXWRK, MINWRK )
545 WORK( 1 ) = MAXWRK
546 *
547 IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
548 INFO = -13
549 END IF
550 END IF
551 *
552 IF( INFO.NE.0 ) THEN
553 CALL XERBLA( 'DGESVD', -INFO )
554 RETURN
555 ELSE IF( LQUERY ) THEN
556 RETURN
557 END IF
558 *
559 * Quick return if possible
560 *
561 IF( M.EQ.0 .OR. N.EQ.0 ) THEN
562 RETURN
563 END IF
564 *
565 * Get machine constants
566 *
567 EPS = DLAMCH( 'P' )
568 SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
569 BIGNUM = ONE / SMLNUM
570 *
571 * Scale A if max element outside range [SMLNUM,BIGNUM]
572 *
573 ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
574 ISCL = 0
575 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
576 ISCL = 1
577 CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
578 ELSE IF( ANRM.GT.BIGNUM ) THEN
579 ISCL = 1
580 CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
581 END IF
582 *
583 IF( M.GE.N ) THEN
584 *
585 * A has at least as many rows as columns. If A has sufficiently
586 * more rows than columns, first reduce using the QR
587 * decomposition (if sufficient workspace available)
588 *
589 IF( M.GE.MNTHR ) THEN
590 *
591 IF( WNTUN ) THEN
592 *
593 * Path 1 (M much larger than N, JOBU='N')
594 * No left singular vectors to be computed
595 *
596 ITAU = 1
597 IWORK = ITAU + N
598 *
599 * Compute A=Q*R
600 * (Workspace: need 2*N, prefer N+N*NB)
601 *
602 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
603 $ LWORK-IWORK+1, IERR )
604 *
605 * Zero out below R
606 *
607 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
608 IE = 1
609 ITAUQ = IE + N
610 ITAUP = ITAUQ + N
611 IWORK = ITAUP + N
612 *
613 * Bidiagonalize R in A
614 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
615 *
616 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
617 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
618 $ IERR )
619 NCVT = 0
620 IF( WNTVO .OR. WNTVAS ) THEN
621 *
622 * If right singular vectors desired, generate P'.
623 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
624 *
625 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
626 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
627 NCVT = N
628 END IF
629 IWORK = IE + N
630 *
631 * Perform bidiagonal QR iteration, computing right
632 * singular vectors of A in A if desired
633 * (Workspace: need BDSPAC)
634 *
635 CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
636 $ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
637 *
638 * If right singular vectors desired in VT, copy them there
639 *
640 IF( WNTVAS )
641 $ CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
642 *
643 ELSE IF( WNTUO .AND. WNTVN ) THEN
644 *
645 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
646 * N left singular vectors to be overwritten on A and
647 * no right singular vectors to be computed
648 *
649 IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
650 *
651 * Sufficient workspace for a fast algorithm
652 *
653 IR = 1
654 IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
655 *
656 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
657 *
658 LDWRKU = LDA
659 LDWRKR = LDA
660 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
661 *
662 * WORK(IU) is LDA by N, WORK(IR) is N by N
663 *
664 LDWRKU = LDA
665 LDWRKR = N
666 ELSE
667 *
668 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
669 *
670 LDWRKU = ( LWORK-N*N-N ) / N
671 LDWRKR = N
672 END IF
673 ITAU = IR + LDWRKR*N
674 IWORK = ITAU + N
675 *
676 * Compute A=Q*R
677 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
678 *
679 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
680 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
681 *
682 * Copy R to WORK(IR) and zero out below it
683 *
684 CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
685 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
686 $ LDWRKR )
687 *
688 * Generate Q in A
689 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
690 *
691 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
692 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
693 IE = ITAU
694 ITAUQ = IE + N
695 ITAUP = ITAUQ + N
696 IWORK = ITAUP + N
697 *
698 * Bidiagonalize R in WORK(IR)
699 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
700 *
701 CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
702 $ WORK( ITAUQ ), WORK( ITAUP ),
703 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
704 *
705 * Generate left vectors bidiagonalizing R
706 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
707 *
708 CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
709 $ WORK( ITAUQ ), WORK( IWORK ),
710 $ LWORK-IWORK+1, IERR )
711 IWORK = IE + N
712 *
713 * Perform bidiagonal QR iteration, computing left
714 * singular vectors of R in WORK(IR)
715 * (Workspace: need N*N+BDSPAC)
716 *
717 CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
718 $ WORK( IR ), LDWRKR, DUM, 1,
719 $ WORK( IWORK ), INFO )
720 IU = IE + N
721 *
722 * Multiply Q in A by left singular vectors of R in
723 * WORK(IR), storing result in WORK(IU) and copying to A
724 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
725 *
726 DO 10 I = 1, M, LDWRKU
727 CHUNK = MIN( M-I+1, LDWRKU )
728 CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
729 $ LDA, WORK( IR ), LDWRKR, ZERO,
730 $ WORK( IU ), LDWRKU )
731 CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
732 $ A( I, 1 ), LDA )
733 10 CONTINUE
734 *
735 ELSE
736 *
737 * Insufficient workspace for a fast algorithm
738 *
739 IE = 1
740 ITAUQ = IE + N
741 ITAUP = ITAUQ + N
742 IWORK = ITAUP + N
743 *
744 * Bidiagonalize A
745 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
746 *
747 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
748 $ WORK( ITAUQ ), WORK( ITAUP ),
749 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
750 *
751 * Generate left vectors bidiagonalizing A
752 * (Workspace: need 4*N, prefer 3*N+N*NB)
753 *
754 CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
755 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
756 IWORK = IE + N
757 *
758 * Perform bidiagonal QR iteration, computing left
759 * singular vectors of A in A
760 * (Workspace: need BDSPAC)
761 *
762 CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
763 $ A, LDA, DUM, 1, WORK( IWORK ), INFO )
764 *
765 END IF
766 *
767 ELSE IF( WNTUO .AND. WNTVAS ) THEN
768 *
769 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
770 * N left singular vectors to be overwritten on A and
771 * N right singular vectors to be computed in VT
772 *
773 IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
774 *
775 * Sufficient workspace for a fast algorithm
776 *
777 IR = 1
778 IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
779 *
780 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
781 *
782 LDWRKU = LDA
783 LDWRKR = LDA
784 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
785 *
786 * WORK(IU) is LDA by N and WORK(IR) is N by N
787 *
788 LDWRKU = LDA
789 LDWRKR = N
790 ELSE
791 *
792 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
793 *
794 LDWRKU = ( LWORK-N*N-N ) / N
795 LDWRKR = N
796 END IF
797 ITAU = IR + LDWRKR*N
798 IWORK = ITAU + N
799 *
800 * Compute A=Q*R
801 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
802 *
803 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
804 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
805 *
806 * Copy R to VT, zeroing out below it
807 *
808 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
809 IF( N.GT.1 )
810 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
811 $ VT( 2, 1 ), LDVT )
812 *
813 * Generate Q in A
814 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
815 *
816 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
817 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
818 IE = ITAU
819 ITAUQ = IE + N
820 ITAUP = ITAUQ + N
821 IWORK = ITAUP + N
822 *
823 * Bidiagonalize R in VT, copying result to WORK(IR)
824 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
825 *
826 CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
827 $ WORK( ITAUQ ), WORK( ITAUP ),
828 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
829 CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
830 *
831 * Generate left vectors bidiagonalizing R in WORK(IR)
832 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
833 *
834 CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
835 $ WORK( ITAUQ ), WORK( IWORK ),
836 $ LWORK-IWORK+1, IERR )
837 *
838 * Generate right vectors bidiagonalizing R in VT
839 * (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
840 *
841 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
842 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
843 IWORK = IE + N
844 *
845 * Perform bidiagonal QR iteration, computing left
846 * singular vectors of R in WORK(IR) and computing right
847 * singular vectors of R in VT
848 * (Workspace: need N*N+BDSPAC)
849 *
850 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
851 $ WORK( IR ), LDWRKR, DUM, 1,
852 $ WORK( IWORK ), INFO )
853 IU = IE + N
854 *
855 * Multiply Q in A by left singular vectors of R in
856 * WORK(IR), storing result in WORK(IU) and copying to A
857 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
858 *
859 DO 20 I = 1, M, LDWRKU
860 CHUNK = MIN( M-I+1, LDWRKU )
861 CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
862 $ LDA, WORK( IR ), LDWRKR, ZERO,
863 $ WORK( IU ), LDWRKU )
864 CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
865 $ A( I, 1 ), LDA )
866 20 CONTINUE
867 *
868 ELSE
869 *
870 * Insufficient workspace for a fast algorithm
871 *
872 ITAU = 1
873 IWORK = ITAU + N
874 *
875 * Compute A=Q*R
876 * (Workspace: need 2*N, prefer N+N*NB)
877 *
878 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
879 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
880 *
881 * Copy R to VT, zeroing out below it
882 *
883 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
884 IF( N.GT.1 )
885 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
886 $ VT( 2, 1 ), LDVT )
887 *
888 * Generate Q in A
889 * (Workspace: need 2*N, prefer N+N*NB)
890 *
891 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
892 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
893 IE = ITAU
894 ITAUQ = IE + N
895 ITAUP = ITAUQ + N
896 IWORK = ITAUP + N
897 *
898 * Bidiagonalize R in VT
899 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
900 *
901 CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
902 $ WORK( ITAUQ ), WORK( ITAUP ),
903 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
904 *
905 * Multiply Q in A by left vectors bidiagonalizing R
906 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
907 *
908 CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
909 $ WORK( ITAUQ ), A, LDA, WORK( IWORK ),
910 $ LWORK-IWORK+1, IERR )
911 *
912 * Generate right vectors bidiagonalizing R in VT
913 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
914 *
915 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
916 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
917 IWORK = IE + N
918 *
919 * Perform bidiagonal QR iteration, computing left
920 * singular vectors of A in A and computing right
921 * singular vectors of A in VT
922 * (Workspace: need BDSPAC)
923 *
924 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
925 $ A, LDA, DUM, 1, WORK( IWORK ), INFO )
926 *
927 END IF
928 *
929 ELSE IF( WNTUS ) THEN
930 *
931 IF( WNTVN ) THEN
932 *
933 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
934 * N left singular vectors to be computed in U and
935 * no right singular vectors to be computed
936 *
937 IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
938 *
939 * Sufficient workspace for a fast algorithm
940 *
941 IR = 1
942 IF( LWORK.GE.WRKBL+LDA*N ) THEN
943 *
944 * WORK(IR) is LDA by N
945 *
946 LDWRKR = LDA
947 ELSE
948 *
949 * WORK(IR) is N by N
950 *
951 LDWRKR = N
952 END IF
953 ITAU = IR + LDWRKR*N
954 IWORK = ITAU + N
955 *
956 * Compute A=Q*R
957 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
958 *
959 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
960 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
961 *
962 * Copy R to WORK(IR), zeroing out below it
963 *
964 CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
965 $ LDWRKR )
966 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
967 $ WORK( IR+1 ), LDWRKR )
968 *
969 * Generate Q in A
970 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
971 *
972 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
973 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
974 IE = ITAU
975 ITAUQ = IE + N
976 ITAUP = ITAUQ + N
977 IWORK = ITAUP + N
978 *
979 * Bidiagonalize R in WORK(IR)
980 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
981 *
982 CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
983 $ WORK( IE ), WORK( ITAUQ ),
984 $ WORK( ITAUP ), WORK( IWORK ),
985 $ LWORK-IWORK+1, IERR )
986 *
987 * Generate left vectors bidiagonalizing R in WORK(IR)
988 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
989 *
990 CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
991 $ WORK( ITAUQ ), WORK( IWORK ),
992 $ LWORK-IWORK+1, IERR )
993 IWORK = IE + N
994 *
995 * Perform bidiagonal QR iteration, computing left
996 * singular vectors of R in WORK(IR)
997 * (Workspace: need N*N+BDSPAC)
998 *
999 CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1000 $ 1, WORK( IR ), LDWRKR, DUM, 1,
1001 $ WORK( IWORK ), INFO )
1002 *
1003 * Multiply Q in A by left singular vectors of R in
1004 * WORK(IR), storing result in U
1005 * (Workspace: need N*N)
1006 *
1007 CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1008 $ WORK( IR ), LDWRKR, ZERO, U, LDU )
1009 *
1010 ELSE
1011 *
1012 * Insufficient workspace for a fast algorithm
1013 *
1014 ITAU = 1
1015 IWORK = ITAU + N
1016 *
1017 * Compute A=Q*R, copying result to U
1018 * (Workspace: need 2*N, prefer N+N*NB)
1019 *
1020 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1021 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1022 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1023 *
1024 * Generate Q in U
1025 * (Workspace: need 2*N, prefer N+N*NB)
1026 *
1027 CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1028 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1029 IE = ITAU
1030 ITAUQ = IE + N
1031 ITAUP = ITAUQ + N
1032 IWORK = ITAUP + N
1033 *
1034 * Zero out below R in A
1035 *
1036 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1037 $ LDA )
1038 *
1039 * Bidiagonalize R in A
1040 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1041 *
1042 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1043 $ WORK( ITAUQ ), WORK( ITAUP ),
1044 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1045 *
1046 * Multiply Q in U by left vectors bidiagonalizing R
1047 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1048 *
1049 CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1050 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1051 $ LWORK-IWORK+1, IERR )
1052 IWORK = IE + N
1053 *
1054 * Perform bidiagonal QR iteration, computing left
1055 * singular vectors of A in U
1056 * (Workspace: need BDSPAC)
1057 *
1058 CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1059 $ 1, U, LDU, DUM, 1, WORK( IWORK ),
1060 $ INFO )
1061 *
1062 END IF
1063 *
1064 ELSE IF( WNTVO ) THEN
1065 *
1066 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1067 * N left singular vectors to be computed in U and
1068 * N right singular vectors to be overwritten on A
1069 *
1070 IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1071 *
1072 * Sufficient workspace for a fast algorithm
1073 *
1074 IU = 1
1075 IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1076 *
1077 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1078 *
1079 LDWRKU = LDA
1080 IR = IU + LDWRKU*N
1081 LDWRKR = LDA
1082 ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1083 *
1084 * WORK(IU) is LDA by N and WORK(IR) is N by N
1085 *
1086 LDWRKU = LDA
1087 IR = IU + LDWRKU*N
1088 LDWRKR = N
1089 ELSE
1090 *
1091 * WORK(IU) is N by N and WORK(IR) is N by N
1092 *
1093 LDWRKU = N
1094 IR = IU + LDWRKU*N
1095 LDWRKR = N
1096 END IF
1097 ITAU = IR + LDWRKR*N
1098 IWORK = ITAU + N
1099 *
1100 * Compute A=Q*R
1101 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1102 *
1103 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1104 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1105 *
1106 * Copy R to WORK(IU), zeroing out below it
1107 *
1108 CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1109 $ LDWRKU )
1110 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1111 $ WORK( IU+1 ), LDWRKU )
1112 *
1113 * Generate Q in A
1114 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1115 *
1116 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1117 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1118 IE = ITAU
1119 ITAUQ = IE + N
1120 ITAUP = ITAUQ + N
1121 IWORK = ITAUP + N
1122 *
1123 * Bidiagonalize R in WORK(IU), copying result to
1124 * WORK(IR)
1125 * (Workspace: need 2*N*N+4*N,
1126 * prefer 2*N*N+3*N+2*N*NB)
1127 *
1128 CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1129 $ WORK( IE ), WORK( ITAUQ ),
1130 $ WORK( ITAUP ), WORK( IWORK ),
1131 $ LWORK-IWORK+1, IERR )
1132 CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1133 $ WORK( IR ), LDWRKR )
1134 *
1135 * Generate left bidiagonalizing vectors in WORK(IU)
1136 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1137 *
1138 CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1139 $ WORK( ITAUQ ), WORK( IWORK ),
1140 $ LWORK-IWORK+1, IERR )
1141 *
1142 * Generate right bidiagonalizing vectors in WORK(IR)
1143 * (Workspace: need 2*N*N+4*N-1,
1144 * prefer 2*N*N+3*N+(N-1)*NB)
1145 *
1146 CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1147 $ WORK( ITAUP ), WORK( IWORK ),
1148 $ LWORK-IWORK+1, IERR )
1149 IWORK = IE + N
1150 *
1151 * Perform bidiagonal QR iteration, computing left
1152 * singular vectors of R in WORK(IU) and computing
1153 * right singular vectors of R in WORK(IR)
1154 * (Workspace: need 2*N*N+BDSPAC)
1155 *
1156 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1157 $ WORK( IR ), LDWRKR, WORK( IU ),
1158 $ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1159 *
1160 * Multiply Q in A by left singular vectors of R in
1161 * WORK(IU), storing result in U
1162 * (Workspace: need N*N)
1163 *
1164 CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1165 $ WORK( IU ), LDWRKU, ZERO, U, LDU )
1166 *
1167 * Copy right singular vectors of R to A
1168 * (Workspace: need N*N)
1169 *
1170 CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1171 $ LDA )
1172 *
1173 ELSE
1174 *
1175 * Insufficient workspace for a fast algorithm
1176 *
1177 ITAU = 1
1178 IWORK = ITAU + N
1179 *
1180 * Compute A=Q*R, copying result to U
1181 * (Workspace: need 2*N, prefer N+N*NB)
1182 *
1183 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1184 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1185 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1186 *
1187 * Generate Q in U
1188 * (Workspace: need 2*N, prefer N+N*NB)
1189 *
1190 CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1191 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1192 IE = ITAU
1193 ITAUQ = IE + N
1194 ITAUP = ITAUQ + N
1195 IWORK = ITAUP + N
1196 *
1197 * Zero out below R in A
1198 *
1199 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1200 $ LDA )
1201 *
1202 * Bidiagonalize R in A
1203 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1204 *
1205 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1206 $ WORK( ITAUQ ), WORK( ITAUP ),
1207 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1208 *
1209 * Multiply Q in U by left vectors bidiagonalizing R
1210 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1211 *
1212 CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1213 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1214 $ LWORK-IWORK+1, IERR )
1215 *
1216 * Generate right vectors bidiagonalizing R in A
1217 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1218 *
1219 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1220 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1221 IWORK = IE + N
1222 *
1223 * Perform bidiagonal QR iteration, computing left
1224 * singular vectors of A in U and computing right
1225 * singular vectors of A in A
1226 * (Workspace: need BDSPAC)
1227 *
1228 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1229 $ LDA, U, LDU, DUM, 1, WORK( IWORK ),
1230 $ INFO )
1231 *
1232 END IF
1233 *
1234 ELSE IF( WNTVAS ) THEN
1235 *
1236 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1237 * or 'A')
1238 * N left singular vectors to be computed in U and
1239 * N right singular vectors to be computed in VT
1240 *
1241 IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1242 *
1243 * Sufficient workspace for a fast algorithm
1244 *
1245 IU = 1
1246 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1247 *
1248 * WORK(IU) is LDA by N
1249 *
1250 LDWRKU = LDA
1251 ELSE
1252 *
1253 * WORK(IU) is N by N
1254 *
1255 LDWRKU = N
1256 END IF
1257 ITAU = IU + LDWRKU*N
1258 IWORK = ITAU + N
1259 *
1260 * Compute A=Q*R
1261 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1262 *
1263 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1264 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1265 *
1266 * Copy R to WORK(IU), zeroing out below it
1267 *
1268 CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1269 $ LDWRKU )
1270 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1271 $ WORK( IU+1 ), LDWRKU )
1272 *
1273 * Generate Q in A
1274 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1275 *
1276 CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1277 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1278 IE = ITAU
1279 ITAUQ = IE + N
1280 ITAUP = ITAUQ + N
1281 IWORK = ITAUP + N
1282 *
1283 * Bidiagonalize R in WORK(IU), copying result to VT
1284 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1285 *
1286 CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1287 $ WORK( IE ), WORK( ITAUQ ),
1288 $ WORK( ITAUP ), WORK( IWORK ),
1289 $ LWORK-IWORK+1, IERR )
1290 CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1291 $ LDVT )
1292 *
1293 * Generate left bidiagonalizing vectors in WORK(IU)
1294 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1295 *
1296 CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1297 $ WORK( ITAUQ ), WORK( IWORK ),
1298 $ LWORK-IWORK+1, IERR )
1299 *
1300 * Generate right bidiagonalizing vectors in VT
1301 * (Workspace: need N*N+4*N-1,
1302 * prefer N*N+3*N+(N-1)*NB)
1303 *
1304 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1305 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1306 IWORK = IE + N
1307 *
1308 * Perform bidiagonal QR iteration, computing left
1309 * singular vectors of R in WORK(IU) and computing
1310 * right singular vectors of R in VT
1311 * (Workspace: need N*N+BDSPAC)
1312 *
1313 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1314 $ LDVT, WORK( IU ), LDWRKU, DUM, 1,
1315 $ WORK( IWORK ), INFO )
1316 *
1317 * Multiply Q in A by left singular vectors of R in
1318 * WORK(IU), storing result in U
1319 * (Workspace: need N*N)
1320 *
1321 CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1322 $ WORK( IU ), LDWRKU, ZERO, U, LDU )
1323 *
1324 ELSE
1325 *
1326 * Insufficient workspace for a fast algorithm
1327 *
1328 ITAU = 1
1329 IWORK = ITAU + N
1330 *
1331 * Compute A=Q*R, copying result to U
1332 * (Workspace: need 2*N, prefer N+N*NB)
1333 *
1334 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1335 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1336 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1337 *
1338 * Generate Q in U
1339 * (Workspace: need 2*N, prefer N+N*NB)
1340 *
1341 CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1342 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1343 *
1344 * Copy R to VT, zeroing out below it
1345 *
1346 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1347 IF( N.GT.1 )
1348 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1349 $ VT( 2, 1 ), LDVT )
1350 IE = ITAU
1351 ITAUQ = IE + N
1352 ITAUP = ITAUQ + N
1353 IWORK = ITAUP + N
1354 *
1355 * Bidiagonalize R in VT
1356 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1357 *
1358 CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1359 $ WORK( ITAUQ ), WORK( ITAUP ),
1360 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1361 *
1362 * Multiply Q in U by left bidiagonalizing vectors
1363 * in VT
1364 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1365 *
1366 CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1367 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1368 $ LWORK-IWORK+1, IERR )
1369 *
1370 * Generate right bidiagonalizing vectors in VT
1371 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1372 *
1373 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1374 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1375 IWORK = IE + N
1376 *
1377 * Perform bidiagonal QR iteration, computing left
1378 * singular vectors of A in U and computing right
1379 * singular vectors of A in VT
1380 * (Workspace: need BDSPAC)
1381 *
1382 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1383 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1384 $ INFO )
1385 *
1386 END IF
1387 *
1388 END IF
1389 *
1390 ELSE IF( WNTUA ) THEN
1391 *
1392 IF( WNTVN ) THEN
1393 *
1394 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1395 * M left singular vectors to be computed in U and
1396 * no right singular vectors to be computed
1397 *
1398 IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1399 *
1400 * Sufficient workspace for a fast algorithm
1401 *
1402 IR = 1
1403 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1404 *
1405 * WORK(IR) is LDA by N
1406 *
1407 LDWRKR = LDA
1408 ELSE
1409 *
1410 * WORK(IR) is N by N
1411 *
1412 LDWRKR = N
1413 END IF
1414 ITAU = IR + LDWRKR*N
1415 IWORK = ITAU + N
1416 *
1417 * Compute A=Q*R, copying result to U
1418 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1419 *
1420 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1421 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1422 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1423 *
1424 * Copy R to WORK(IR), zeroing out below it
1425 *
1426 CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1427 $ LDWRKR )
1428 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1429 $ WORK( IR+1 ), LDWRKR )
1430 *
1431 * Generate Q in U
1432 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1433 *
1434 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1435 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1436 IE = ITAU
1437 ITAUQ = IE + N
1438 ITAUP = ITAUQ + N
1439 IWORK = ITAUP + N
1440 *
1441 * Bidiagonalize R in WORK(IR)
1442 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1443 *
1444 CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1445 $ WORK( IE ), WORK( ITAUQ ),
1446 $ WORK( ITAUP ), WORK( IWORK ),
1447 $ LWORK-IWORK+1, IERR )
1448 *
1449 * Generate left bidiagonalizing vectors in WORK(IR)
1450 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1451 *
1452 CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1453 $ WORK( ITAUQ ), WORK( IWORK ),
1454 $ LWORK-IWORK+1, IERR )
1455 IWORK = IE + N
1456 *
1457 * Perform bidiagonal QR iteration, computing left
1458 * singular vectors of R in WORK(IR)
1459 * (Workspace: need N*N+BDSPAC)
1460 *
1461 CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1462 $ 1, WORK( IR ), LDWRKR, DUM, 1,
1463 $ WORK( IWORK ), INFO )
1464 *
1465 * Multiply Q in U by left singular vectors of R in
1466 * WORK(IR), storing result in A
1467 * (Workspace: need N*N)
1468 *
1469 CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1470 $ WORK( IR ), LDWRKR, ZERO, A, LDA )
1471 *
1472 * Copy left singular vectors of A from A to U
1473 *
1474 CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1475 *
1476 ELSE
1477 *
1478 * Insufficient workspace for a fast algorithm
1479 *
1480 ITAU = 1
1481 IWORK = ITAU + N
1482 *
1483 * Compute A=Q*R, copying result to U
1484 * (Workspace: need 2*N, prefer N+N*NB)
1485 *
1486 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1487 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1488 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1489 *
1490 * Generate Q in U
1491 * (Workspace: need N+M, prefer N+M*NB)
1492 *
1493 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1494 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1495 IE = ITAU
1496 ITAUQ = IE + N
1497 ITAUP = ITAUQ + N
1498 IWORK = ITAUP + N
1499 *
1500 * Zero out below R in A
1501 *
1502 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1503 $ LDA )
1504 *
1505 * Bidiagonalize R in A
1506 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1507 *
1508 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1509 $ WORK( ITAUQ ), WORK( ITAUP ),
1510 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1511 *
1512 * Multiply Q in U by left bidiagonalizing vectors
1513 * in A
1514 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1515 *
1516 CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1517 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1518 $ LWORK-IWORK+1, IERR )
1519 IWORK = IE + N
1520 *
1521 * Perform bidiagonal QR iteration, computing left
1522 * singular vectors of A in U
1523 * (Workspace: need BDSPAC)
1524 *
1525 CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1526 $ 1, U, LDU, DUM, 1, WORK( IWORK ),
1527 $ INFO )
1528 *
1529 END IF
1530 *
1531 ELSE IF( WNTVO ) THEN
1532 *
1533 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1534 * M left singular vectors to be computed in U and
1535 * N right singular vectors to be overwritten on A
1536 *
1537 IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1538 *
1539 * Sufficient workspace for a fast algorithm
1540 *
1541 IU = 1
1542 IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1543 *
1544 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1545 *
1546 LDWRKU = LDA
1547 IR = IU + LDWRKU*N
1548 LDWRKR = LDA
1549 ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1550 *
1551 * WORK(IU) is LDA by N and WORK(IR) is N by N
1552 *
1553 LDWRKU = LDA
1554 IR = IU + LDWRKU*N
1555 LDWRKR = N
1556 ELSE
1557 *
1558 * WORK(IU) is N by N and WORK(IR) is N by N
1559 *
1560 LDWRKU = N
1561 IR = IU + LDWRKU*N
1562 LDWRKR = N
1563 END IF
1564 ITAU = IR + LDWRKR*N
1565 IWORK = ITAU + N
1566 *
1567 * Compute A=Q*R, copying result to U
1568 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1569 *
1570 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1571 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1572 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1573 *
1574 * Generate Q in U
1575 * (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1576 *
1577 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1578 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1579 *
1580 * Copy R to WORK(IU), zeroing out below it
1581 *
1582 CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1583 $ LDWRKU )
1584 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1585 $ WORK( IU+1 ), LDWRKU )
1586 IE = ITAU
1587 ITAUQ = IE + N
1588 ITAUP = ITAUQ + N
1589 IWORK = ITAUP + N
1590 *
1591 * Bidiagonalize R in WORK(IU), copying result to
1592 * WORK(IR)
1593 * (Workspace: need 2*N*N+4*N,
1594 * prefer 2*N*N+3*N+2*N*NB)
1595 *
1596 CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1597 $ WORK( IE ), WORK( ITAUQ ),
1598 $ WORK( ITAUP ), WORK( IWORK ),
1599 $ LWORK-IWORK+1, IERR )
1600 CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1601 $ WORK( IR ), LDWRKR )
1602 *
1603 * Generate left bidiagonalizing vectors in WORK(IU)
1604 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1605 *
1606 CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1607 $ WORK( ITAUQ ), WORK( IWORK ),
1608 $ LWORK-IWORK+1, IERR )
1609 *
1610 * Generate right bidiagonalizing vectors in WORK(IR)
1611 * (Workspace: need 2*N*N+4*N-1,
1612 * prefer 2*N*N+3*N+(N-1)*NB)
1613 *
1614 CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1615 $ WORK( ITAUP ), WORK( IWORK ),
1616 $ LWORK-IWORK+1, IERR )
1617 IWORK = IE + N
1618 *
1619 * Perform bidiagonal QR iteration, computing left
1620 * singular vectors of R in WORK(IU) and computing
1621 * right singular vectors of R in WORK(IR)
1622 * (Workspace: need 2*N*N+BDSPAC)
1623 *
1624 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1625 $ WORK( IR ), LDWRKR, WORK( IU ),
1626 $ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1627 *
1628 * Multiply Q in U by left singular vectors of R in
1629 * WORK(IU), storing result in A
1630 * (Workspace: need N*N)
1631 *
1632 CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1633 $ WORK( IU ), LDWRKU, ZERO, A, LDA )
1634 *
1635 * Copy left singular vectors of A from A to U
1636 *
1637 CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1638 *
1639 * Copy right singular vectors of R from WORK(IR) to A
1640 *
1641 CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1642 $ LDA )
1643 *
1644 ELSE
1645 *
1646 * Insufficient workspace for a fast algorithm
1647 *
1648 ITAU = 1
1649 IWORK = ITAU + N
1650 *
1651 * Compute A=Q*R, copying result to U
1652 * (Workspace: need 2*N, prefer N+N*NB)
1653 *
1654 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1655 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1656 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1657 *
1658 * Generate Q in U
1659 * (Workspace: need N+M, prefer N+M*NB)
1660 *
1661 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1662 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1663 IE = ITAU
1664 ITAUQ = IE + N
1665 ITAUP = ITAUQ + N
1666 IWORK = ITAUP + N
1667 *
1668 * Zero out below R in A
1669 *
1670 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1671 $ LDA )
1672 *
1673 * Bidiagonalize R in A
1674 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1675 *
1676 CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1677 $ WORK( ITAUQ ), WORK( ITAUP ),
1678 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1679 *
1680 * Multiply Q in U by left bidiagonalizing vectors
1681 * in A
1682 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1683 *
1684 CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1685 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1686 $ LWORK-IWORK+1, IERR )
1687 *
1688 * Generate right bidiagonalizing vectors in A
1689 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1690 *
1691 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1692 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1693 IWORK = IE + N
1694 *
1695 * Perform bidiagonal QR iteration, computing left
1696 * singular vectors of A in U and computing right
1697 * singular vectors of A in A
1698 * (Workspace: need BDSPAC)
1699 *
1700 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1701 $ LDA, U, LDU, DUM, 1, WORK( IWORK ),
1702 $ INFO )
1703 *
1704 END IF
1705 *
1706 ELSE IF( WNTVAS ) THEN
1707 *
1708 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1709 * or 'A')
1710 * M left singular vectors to be computed in U and
1711 * N right singular vectors to be computed in VT
1712 *
1713 IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1714 *
1715 * Sufficient workspace for a fast algorithm
1716 *
1717 IU = 1
1718 IF( LWORK.GE.WRKBL+LDA*N ) THEN
1719 *
1720 * WORK(IU) is LDA by N
1721 *
1722 LDWRKU = LDA
1723 ELSE
1724 *
1725 * WORK(IU) is N by N
1726 *
1727 LDWRKU = N
1728 END IF
1729 ITAU = IU + LDWRKU*N
1730 IWORK = ITAU + N
1731 *
1732 * Compute A=Q*R, copying result to U
1733 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1734 *
1735 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1736 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1737 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1738 *
1739 * Generate Q in U
1740 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1741 *
1742 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1743 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1744 *
1745 * Copy R to WORK(IU), zeroing out below it
1746 *
1747 CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1748 $ LDWRKU )
1749 CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1750 $ WORK( IU+1 ), LDWRKU )
1751 IE = ITAU
1752 ITAUQ = IE + N
1753 ITAUP = ITAUQ + N
1754 IWORK = ITAUP + N
1755 *
1756 * Bidiagonalize R in WORK(IU), copying result to VT
1757 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1758 *
1759 CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1760 $ WORK( IE ), WORK( ITAUQ ),
1761 $ WORK( ITAUP ), WORK( IWORK ),
1762 $ LWORK-IWORK+1, IERR )
1763 CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1764 $ LDVT )
1765 *
1766 * Generate left bidiagonalizing vectors in WORK(IU)
1767 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1768 *
1769 CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1770 $ WORK( ITAUQ ), WORK( IWORK ),
1771 $ LWORK-IWORK+1, IERR )
1772 *
1773 * Generate right bidiagonalizing vectors in VT
1774 * (Workspace: need N*N+4*N-1,
1775 * prefer N*N+3*N+(N-1)*NB)
1776 *
1777 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1778 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1779 IWORK = IE + N
1780 *
1781 * Perform bidiagonal QR iteration, computing left
1782 * singular vectors of R in WORK(IU) and computing
1783 * right singular vectors of R in VT
1784 * (Workspace: need N*N+BDSPAC)
1785 *
1786 CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1787 $ LDVT, WORK( IU ), LDWRKU, DUM, 1,
1788 $ WORK( IWORK ), INFO )
1789 *
1790 * Multiply Q in U by left singular vectors of R in
1791 * WORK(IU), storing result in A
1792 * (Workspace: need N*N)
1793 *
1794 CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1795 $ WORK( IU ), LDWRKU, ZERO, A, LDA )
1796 *
1797 * Copy left singular vectors of A from A to U
1798 *
1799 CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1800 *
1801 ELSE
1802 *
1803 * Insufficient workspace for a fast algorithm
1804 *
1805 ITAU = 1
1806 IWORK = ITAU + N
1807 *
1808 * Compute A=Q*R, copying result to U
1809 * (Workspace: need 2*N, prefer N+N*NB)
1810 *
1811 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1812 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1813 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1814 *
1815 * Generate Q in U
1816 * (Workspace: need N+M, prefer N+M*NB)
1817 *
1818 CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1819 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1820 *
1821 * Copy R from A to VT, zeroing out below it
1822 *
1823 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1824 IF( N.GT.1 )
1825 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1826 $ VT( 2, 1 ), LDVT )
1827 IE = ITAU
1828 ITAUQ = IE + N
1829 ITAUP = ITAUQ + N
1830 IWORK = ITAUP + N
1831 *
1832 * Bidiagonalize R in VT
1833 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1834 *
1835 CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1836 $ WORK( ITAUQ ), WORK( ITAUP ),
1837 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1838 *
1839 * Multiply Q in U by left bidiagonalizing vectors
1840 * in VT
1841 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1842 *
1843 CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1844 $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1845 $ LWORK-IWORK+1, IERR )
1846 *
1847 * Generate right bidiagonalizing vectors in VT
1848 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1849 *
1850 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1851 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1852 IWORK = IE + N
1853 *
1854 * Perform bidiagonal QR iteration, computing left
1855 * singular vectors of A in U and computing right
1856 * singular vectors of A in VT
1857 * (Workspace: need BDSPAC)
1858 *
1859 CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1860 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1861 $ INFO )
1862 *
1863 END IF
1864 *
1865 END IF
1866 *
1867 END IF
1868 *
1869 ELSE
1870 *
1871 * M .LT. MNTHR
1872 *
1873 * Path 10 (M at least N, but not much larger)
1874 * Reduce to bidiagonal form without QR decomposition
1875 *
1876 IE = 1
1877 ITAUQ = IE + N
1878 ITAUP = ITAUQ + N
1879 IWORK = ITAUP + N
1880 *
1881 * Bidiagonalize A
1882 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1883 *
1884 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1885 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1886 $ IERR )
1887 IF( WNTUAS ) THEN
1888 *
1889 * If left singular vectors desired in U, copy result to U
1890 * and generate left bidiagonalizing vectors in U
1891 * (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1892 *
1893 CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1894 IF( WNTUS )
1895 $ NCU = N
1896 IF( WNTUA )
1897 $ NCU = M
1898 CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1899 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1900 END IF
1901 IF( WNTVAS ) THEN
1902 *
1903 * If right singular vectors desired in VT, copy result to
1904 * VT and generate right bidiagonalizing vectors in VT
1905 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1906 *
1907 CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1908 CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1909 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1910 END IF
1911 IF( WNTUO ) THEN
1912 *
1913 * If left singular vectors desired in A, generate left
1914 * bidiagonalizing vectors in A
1915 * (Workspace: need 4*N, prefer 3*N+N*NB)
1916 *
1917 CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
1918 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1919 END IF
1920 IF( WNTVO ) THEN
1921 *
1922 * If right singular vectors desired in A, generate right
1923 * bidiagonalizing vectors in A
1924 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1925 *
1926 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1927 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
1928 END IF
1929 IWORK = IE + N
1930 IF( WNTUAS .OR. WNTUO )
1931 $ NRU = M
1932 IF( WNTUN )
1933 $ NRU = 0
1934 IF( WNTVAS .OR. WNTVO )
1935 $ NCVT = N
1936 IF( WNTVN )
1937 $ NCVT = 0
1938 IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
1939 *
1940 * Perform bidiagonal QR iteration, if desired, computing
1941 * left singular vectors in U and computing right singular
1942 * vectors in VT
1943 * (Workspace: need BDSPAC)
1944 *
1945 CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1946 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
1947 ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
1948 *
1949 * Perform bidiagonal QR iteration, if desired, computing
1950 * left singular vectors in U and computing right singular
1951 * vectors in A
1952 * (Workspace: need BDSPAC)
1953 *
1954 CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
1955 $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
1956 ELSE
1957 *
1958 * Perform bidiagonal QR iteration, if desired, computing
1959 * left singular vectors in A and computing right singular
1960 * vectors in VT
1961 * (Workspace: need BDSPAC)
1962 *
1963 CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1964 $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
1965 END IF
1966 *
1967 END IF
1968 *
1969 ELSE
1970 *
1971 * A has more columns than rows. If A has sufficiently more
1972 * columns than rows, first reduce using the LQ decomposition (if
1973 * sufficient workspace available)
1974 *
1975 IF( N.GE.MNTHR ) THEN
1976 *
1977 IF( WNTVN ) THEN
1978 *
1979 * Path 1t(N much larger than M, JOBVT='N')
1980 * No right singular vectors to be computed
1981 *
1982 ITAU = 1
1983 IWORK = ITAU + M
1984 *
1985 * Compute A=L*Q
1986 * (Workspace: need 2*M, prefer M+M*NB)
1987 *
1988 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
1989 $ LWORK-IWORK+1, IERR )
1990 *
1991 * Zero out above L
1992 *
1993 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
1994 IE = 1
1995 ITAUQ = IE + M
1996 ITAUP = ITAUQ + M
1997 IWORK = ITAUP + M
1998 *
1999 * Bidiagonalize L in A
2000 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2001 *
2002 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2003 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2004 $ IERR )
2005 IF( WNTUO .OR. WNTUAS ) THEN
2006 *
2007 * If left singular vectors desired, generate Q
2008 * (Workspace: need 4*M, prefer 3*M+M*NB)
2009 *
2010 CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2011 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2012 END IF
2013 IWORK = IE + M
2014 NRU = 0
2015 IF( WNTUO .OR. WNTUAS )
2016 $ NRU = M
2017 *
2018 * Perform bidiagonal QR iteration, computing left singular
2019 * vectors of A in A if desired
2020 * (Workspace: need BDSPAC)
2021 *
2022 CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2023 $ LDA, DUM, 1, WORK( IWORK ), INFO )
2024 *
2025 * If left singular vectors desired in U, copy them there
2026 *
2027 IF( WNTUAS )
2028 $ CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
2029 *
2030 ELSE IF( WNTVO .AND. WNTUN ) THEN
2031 *
2032 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2033 * M right singular vectors to be overwritten on A and
2034 * no left singular vectors to be computed
2035 *
2036 IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2037 *
2038 * Sufficient workspace for a fast algorithm
2039 *
2040 IR = 1
2041 IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2042 *
2043 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2044 *
2045 LDWRKU = LDA
2046 CHUNK = N
2047 LDWRKR = LDA
2048 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2049 *
2050 * WORK(IU) is LDA by N and WORK(IR) is M by M
2051 *
2052 LDWRKU = LDA
2053 CHUNK = N
2054 LDWRKR = M
2055 ELSE
2056 *
2057 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2058 *
2059 LDWRKU = M
2060 CHUNK = ( LWORK-M*M-M ) / M
2061 LDWRKR = M
2062 END IF
2063 ITAU = IR + LDWRKR*M
2064 IWORK = ITAU + M
2065 *
2066 * Compute A=L*Q
2067 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2068 *
2069 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2070 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2071 *
2072 * Copy L to WORK(IR) and zero out above it
2073 *
2074 CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2075 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2076 $ WORK( IR+LDWRKR ), LDWRKR )
2077 *
2078 * Generate Q in A
2079 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2080 *
2081 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2082 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2083 IE = ITAU
2084 ITAUQ = IE + M
2085 ITAUP = ITAUQ + M
2086 IWORK = ITAUP + M
2087 *
2088 * Bidiagonalize L in WORK(IR)
2089 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2090 *
2091 CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2092 $ WORK( ITAUQ ), WORK( ITAUP ),
2093 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2094 *
2095 * Generate right vectors bidiagonalizing L
2096 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2097 *
2098 CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2099 $ WORK( ITAUP ), WORK( IWORK ),
2100 $ LWORK-IWORK+1, IERR )
2101 IWORK = IE + M
2102 *
2103 * Perform bidiagonal QR iteration, computing right
2104 * singular vectors of L in WORK(IR)
2105 * (Workspace: need M*M+BDSPAC)
2106 *
2107 CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2108 $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2109 $ WORK( IWORK ), INFO )
2110 IU = IE + M
2111 *
2112 * Multiply right singular vectors of L in WORK(IR) by Q
2113 * in A, storing result in WORK(IU) and copying to A
2114 * (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2115 *
2116 DO 30 I = 1, N, CHUNK
2117 BLK = MIN( N-I+1, CHUNK )
2118 CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2119 $ LDWRKR, A( 1, I ), LDA, ZERO,
2120 $ WORK( IU ), LDWRKU )
2121 CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2122 $ A( 1, I ), LDA )
2123 30 CONTINUE
2124 *
2125 ELSE
2126 *
2127 * Insufficient workspace for a fast algorithm
2128 *
2129 IE = 1
2130 ITAUQ = IE + M
2131 ITAUP = ITAUQ + M
2132 IWORK = ITAUP + M
2133 *
2134 * Bidiagonalize A
2135 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2136 *
2137 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
2138 $ WORK( ITAUQ ), WORK( ITAUP ),
2139 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2140 *
2141 * Generate right vectors bidiagonalizing A
2142 * (Workspace: need 4*M, prefer 3*M+M*NB)
2143 *
2144 CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2145 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2146 IWORK = IE + M
2147 *
2148 * Perform bidiagonal QR iteration, computing right
2149 * singular vectors of A in A
2150 * (Workspace: need BDSPAC)
2151 *
2152 CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2153 $ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2154 *
2155 END IF
2156 *
2157 ELSE IF( WNTVO .AND. WNTUAS ) THEN
2158 *
2159 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2160 * M right singular vectors to be overwritten on A and
2161 * M left singular vectors to be computed in U
2162 *
2163 IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2164 *
2165 * Sufficient workspace for a fast algorithm
2166 *
2167 IR = 1
2168 IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2169 *
2170 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2171 *
2172 LDWRKU = LDA
2173 CHUNK = N
2174 LDWRKR = LDA
2175 ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2176 *
2177 * WORK(IU) is LDA by N and WORK(IR) is M by M
2178 *
2179 LDWRKU = LDA
2180 CHUNK = N
2181 LDWRKR = M
2182 ELSE
2183 *
2184 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2185 *
2186 LDWRKU = M
2187 CHUNK = ( LWORK-M*M-M ) / M
2188 LDWRKR = M
2189 END IF
2190 ITAU = IR + LDWRKR*M
2191 IWORK = ITAU + M
2192 *
2193 * Compute A=L*Q
2194 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2195 *
2196 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2197 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2198 *
2199 * Copy L to U, zeroing about above it
2200 *
2201 CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2202 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2203 $ LDU )
2204 *
2205 * Generate Q in A
2206 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2207 *
2208 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2209 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2210 IE = ITAU
2211 ITAUQ = IE + M
2212 ITAUP = ITAUQ + M
2213 IWORK = ITAUP + M
2214 *
2215 * Bidiagonalize L in U, copying result to WORK(IR)
2216 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2217 *
2218 CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2219 $ WORK( ITAUQ ), WORK( ITAUP ),
2220 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2221 CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2222 *
2223 * Generate right vectors bidiagonalizing L in WORK(IR)
2224 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2225 *
2226 CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2227 $ WORK( ITAUP ), WORK( IWORK ),
2228 $ LWORK-IWORK+1, IERR )
2229 *
2230 * Generate left vectors bidiagonalizing L in U
2231 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2232 *
2233 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2234 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2235 IWORK = IE + M
2236 *
2237 * Perform bidiagonal QR iteration, computing left
2238 * singular vectors of L in U, and computing right
2239 * singular vectors of L in WORK(IR)
2240 * (Workspace: need M*M+BDSPAC)
2241 *
2242 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2243 $ WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2244 $ WORK( IWORK ), INFO )
2245 IU = IE + M
2246 *
2247 * Multiply right singular vectors of L in WORK(IR) by Q
2248 * in A, storing result in WORK(IU) and copying to A
2249 * (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2250 *
2251 DO 40 I = 1, N, CHUNK
2252 BLK = MIN( N-I+1, CHUNK )
2253 CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2254 $ LDWRKR, A( 1, I ), LDA, ZERO,
2255 $ WORK( IU ), LDWRKU )
2256 CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2257 $ A( 1, I ), LDA )
2258 40 CONTINUE
2259 *
2260 ELSE
2261 *
2262 * Insufficient workspace for a fast algorithm
2263 *
2264 ITAU = 1
2265 IWORK = ITAU + M
2266 *
2267 * Compute A=L*Q
2268 * (Workspace: need 2*M, prefer M+M*NB)
2269 *
2270 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2271 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2272 *
2273 * Copy L to U, zeroing out above it
2274 *
2275 CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2276 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2277 $ LDU )
2278 *
2279 * Generate Q in A
2280 * (Workspace: need 2*M, prefer M+M*NB)
2281 *
2282 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2283 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2284 IE = ITAU
2285 ITAUQ = IE + M
2286 ITAUP = ITAUQ + M
2287 IWORK = ITAUP + M
2288 *
2289 * Bidiagonalize L in U
2290 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2291 *
2292 CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2293 $ WORK( ITAUQ ), WORK( ITAUP ),
2294 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2295 *
2296 * Multiply right vectors bidiagonalizing L by Q in A
2297 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2298 *
2299 CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2300 $ WORK( ITAUP ), A, LDA, WORK( IWORK ),
2301 $ LWORK-IWORK+1, IERR )
2302 *
2303 * Generate left vectors bidiagonalizing L in U
2304 * (Workspace: need 4*M, prefer 3*M+M*NB)
2305 *
2306 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2307 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2308 IWORK = IE + M
2309 *
2310 * Perform bidiagonal QR iteration, computing left
2311 * singular vectors of A in U and computing right
2312 * singular vectors of A in A
2313 * (Workspace: need BDSPAC)
2314 *
2315 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2316 $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
2317 *
2318 END IF
2319 *
2320 ELSE IF( WNTVS ) THEN
2321 *
2322 IF( WNTUN ) THEN
2323 *
2324 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2325 * M right singular vectors to be computed in VT and
2326 * no left singular vectors to be computed
2327 *
2328 IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2329 *
2330 * Sufficient workspace for a fast algorithm
2331 *
2332 IR = 1
2333 IF( LWORK.GE.WRKBL+LDA*M ) THEN
2334 *
2335 * WORK(IR) is LDA by M
2336 *
2337 LDWRKR = LDA
2338 ELSE
2339 *
2340 * WORK(IR) is M by M
2341 *
2342 LDWRKR = M
2343 END IF
2344 ITAU = IR + LDWRKR*M
2345 IWORK = ITAU + M
2346 *
2347 * Compute A=L*Q
2348 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2349 *
2350 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2351 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2352 *
2353 * Copy L to WORK(IR), zeroing out above it
2354 *
2355 CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2356 $ LDWRKR )
2357 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2358 $ WORK( IR+LDWRKR ), LDWRKR )
2359 *
2360 * Generate Q in A
2361 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2362 *
2363 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2364 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2365 IE = ITAU
2366 ITAUQ = IE + M
2367 ITAUP = ITAUQ + M
2368 IWORK = ITAUP + M
2369 *
2370 * Bidiagonalize L in WORK(IR)
2371 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2372 *
2373 CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2374 $ WORK( IE ), WORK( ITAUQ ),
2375 $ WORK( ITAUP ), WORK( IWORK ),
2376 $ LWORK-IWORK+1, IERR )
2377 *
2378 * Generate right vectors bidiagonalizing L in
2379 * WORK(IR)
2380 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2381 *
2382 CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2383 $ WORK( ITAUP ), WORK( IWORK ),
2384 $ LWORK-IWORK+1, IERR )
2385 IWORK = IE + M
2386 *
2387 * Perform bidiagonal QR iteration, computing right
2388 * singular vectors of L in WORK(IR)
2389 * (Workspace: need M*M+BDSPAC)
2390 *
2391 CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2392 $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2393 $ WORK( IWORK ), INFO )
2394 *
2395 * Multiply right singular vectors of L in WORK(IR) by
2396 * Q in A, storing result in VT
2397 * (Workspace: need M*M)
2398 *
2399 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2400 $ LDWRKR, A, LDA, ZERO, VT, LDVT )
2401 *
2402 ELSE
2403 *
2404 * Insufficient workspace for a fast algorithm
2405 *
2406 ITAU = 1
2407 IWORK = ITAU + M
2408 *
2409 * Compute A=L*Q
2410 * (Workspace: need 2*M, prefer M+M*NB)
2411 *
2412 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2413 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2414 *
2415 * Copy result to VT
2416 *
2417 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2418 *
2419 * Generate Q in VT
2420 * (Workspace: need 2*M, prefer M+M*NB)
2421 *
2422 CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2423 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2424 IE = ITAU
2425 ITAUQ = IE + M
2426 ITAUP = ITAUQ + M
2427 IWORK = ITAUP + M
2428 *
2429 * Zero out above L in A
2430 *
2431 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2432 $ LDA )
2433 *
2434 * Bidiagonalize L in A
2435 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2436 *
2437 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2438 $ WORK( ITAUQ ), WORK( ITAUP ),
2439 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2440 *
2441 * Multiply right vectors bidiagonalizing L by Q in VT
2442 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2443 *
2444 CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2445 $ WORK( ITAUP ), VT, LDVT,
2446 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2447 IWORK = IE + M
2448 *
2449 * Perform bidiagonal QR iteration, computing right
2450 * singular vectors of A in VT
2451 * (Workspace: need BDSPAC)
2452 *
2453 CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2454 $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2455 $ INFO )
2456 *
2457 END IF
2458 *
2459 ELSE IF( WNTUO ) THEN
2460 *
2461 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2462 * M right singular vectors to be computed in VT and
2463 * M left singular vectors to be overwritten on A
2464 *
2465 IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2466 *
2467 * Sufficient workspace for a fast algorithm
2468 *
2469 IU = 1
2470 IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2471 *
2472 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2473 *
2474 LDWRKU = LDA
2475 IR = IU + LDWRKU*M
2476 LDWRKR = LDA
2477 ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2478 *
2479 * WORK(IU) is LDA by M and WORK(IR) is M by M
2480 *
2481 LDWRKU = LDA
2482 IR = IU + LDWRKU*M
2483 LDWRKR = M
2484 ELSE
2485 *
2486 * WORK(IU) is M by M and WORK(IR) is M by M
2487 *
2488 LDWRKU = M
2489 IR = IU + LDWRKU*M
2490 LDWRKR = M
2491 END IF
2492 ITAU = IR + LDWRKR*M
2493 IWORK = ITAU + M
2494 *
2495 * Compute A=L*Q
2496 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2497 *
2498 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2499 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2500 *
2501 * Copy L to WORK(IU), zeroing out below it
2502 *
2503 CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2504 $ LDWRKU )
2505 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2506 $ WORK( IU+LDWRKU ), LDWRKU )
2507 *
2508 * Generate Q in A
2509 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2510 *
2511 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2512 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2513 IE = ITAU
2514 ITAUQ = IE + M
2515 ITAUP = ITAUQ + M
2516 IWORK = ITAUP + M
2517 *
2518 * Bidiagonalize L in WORK(IU), copying result to
2519 * WORK(IR)
2520 * (Workspace: need 2*M*M+4*M,
2521 * prefer 2*M*M+3*M+2*M*NB)
2522 *
2523 CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2524 $ WORK( IE ), WORK( ITAUQ ),
2525 $ WORK( ITAUP ), WORK( IWORK ),
2526 $ LWORK-IWORK+1, IERR )
2527 CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2528 $ WORK( IR ), LDWRKR )
2529 *
2530 * Generate right bidiagonalizing vectors in WORK(IU)
2531 * (Workspace: need 2*M*M+4*M-1,
2532 * prefer 2*M*M+3*M+(M-1)*NB)
2533 *
2534 CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2535 $ WORK( ITAUP ), WORK( IWORK ),
2536 $ LWORK-IWORK+1, IERR )
2537 *
2538 * Generate left bidiagonalizing vectors in WORK(IR)
2539 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2540 *
2541 CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2542 $ WORK( ITAUQ ), WORK( IWORK ),
2543 $ LWORK-IWORK+1, IERR )
2544 IWORK = IE + M
2545 *
2546 * Perform bidiagonal QR iteration, computing left
2547 * singular vectors of L in WORK(IR) and computing
2548 * right singular vectors of L in WORK(IU)
2549 * (Workspace: need 2*M*M+BDSPAC)
2550 *
2551 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2552 $ WORK( IU ), LDWRKU, WORK( IR ),
2553 $ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2554 *
2555 * Multiply right singular vectors of L in WORK(IU) by
2556 * Q in A, storing result in VT
2557 * (Workspace: need M*M)
2558 *
2559 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2560 $ LDWRKU, A, LDA, ZERO, VT, LDVT )
2561 *
2562 * Copy left singular vectors of L to A
2563 * (Workspace: need M*M)
2564 *
2565 CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2566 $ LDA )
2567 *
2568 ELSE
2569 *
2570 * Insufficient workspace for a fast algorithm
2571 *
2572 ITAU = 1
2573 IWORK = ITAU + M
2574 *
2575 * Compute A=L*Q, copying result to VT
2576 * (Workspace: need 2*M, prefer M+M*NB)
2577 *
2578 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2579 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2580 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2581 *
2582 * Generate Q in VT
2583 * (Workspace: need 2*M, prefer M+M*NB)
2584 *
2585 CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2586 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2587 IE = ITAU
2588 ITAUQ = IE + M
2589 ITAUP = ITAUQ + M
2590 IWORK = ITAUP + M
2591 *
2592 * Zero out above L in A
2593 *
2594 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2595 $ LDA )
2596 *
2597 * Bidiagonalize L in A
2598 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2599 *
2600 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2601 $ WORK( ITAUQ ), WORK( ITAUP ),
2602 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2603 *
2604 * Multiply right vectors bidiagonalizing L by Q in VT
2605 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2606 *
2607 CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2608 $ WORK( ITAUP ), VT, LDVT,
2609 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2610 *
2611 * Generate left bidiagonalizing vectors of L in A
2612 * (Workspace: need 4*M, prefer 3*M+M*NB)
2613 *
2614 CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2615 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2616 IWORK = IE + M
2617 *
2618 * Perform bidiagonal QR iteration, compute left
2619 * singular vectors of A in A and compute right
2620 * singular vectors of A in VT
2621 * (Workspace: need BDSPAC)
2622 *
2623 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2624 $ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2625 $ INFO )
2626 *
2627 END IF
2628 *
2629 ELSE IF( WNTUAS ) THEN
2630 *
2631 * Path 6t(N much larger than M, JOBU='S' or 'A',
2632 * JOBVT='S')
2633 * M right singular vectors to be computed in VT and
2634 * M left singular vectors to be computed in U
2635 *
2636 IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2637 *
2638 * Sufficient workspace for a fast algorithm
2639 *
2640 IU = 1
2641 IF( LWORK.GE.WRKBL+LDA*M ) THEN
2642 *
2643 * WORK(IU) is LDA by N
2644 *
2645 LDWRKU = LDA
2646 ELSE
2647 *
2648 * WORK(IU) is LDA by M
2649 *
2650 LDWRKU = M
2651 END IF
2652 ITAU = IU + LDWRKU*M
2653 IWORK = ITAU + M
2654 *
2655 * Compute A=L*Q
2656 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2657 *
2658 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2659 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2660 *
2661 * Copy L to WORK(IU), zeroing out above it
2662 *
2663 CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2664 $ LDWRKU )
2665 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2666 $ WORK( IU+LDWRKU ), LDWRKU )
2667 *
2668 * Generate Q in A
2669 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2670 *
2671 CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2672 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2673 IE = ITAU
2674 ITAUQ = IE + M
2675 ITAUP = ITAUQ + M
2676 IWORK = ITAUP + M
2677 *
2678 * Bidiagonalize L in WORK(IU), copying result to U
2679 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2680 *
2681 CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2682 $ WORK( IE ), WORK( ITAUQ ),
2683 $ WORK( ITAUP ), WORK( IWORK ),
2684 $ LWORK-IWORK+1, IERR )
2685 CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2686 $ LDU )
2687 *
2688 * Generate right bidiagonalizing vectors in WORK(IU)
2689 * (Workspace: need M*M+4*M-1,
2690 * prefer M*M+3*M+(M-1)*NB)
2691 *
2692 CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2693 $ WORK( ITAUP ), WORK( IWORK ),
2694 $ LWORK-IWORK+1, IERR )
2695 *
2696 * Generate left bidiagonalizing vectors in U
2697 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2698 *
2699 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2700 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2701 IWORK = IE + M
2702 *
2703 * Perform bidiagonal QR iteration, computing left
2704 * singular vectors of L in U and computing right
2705 * singular vectors of L in WORK(IU)
2706 * (Workspace: need M*M+BDSPAC)
2707 *
2708 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2709 $ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2710 $ WORK( IWORK ), INFO )
2711 *
2712 * Multiply right singular vectors of L in WORK(IU) by
2713 * Q in A, storing result in VT
2714 * (Workspace: need M*M)
2715 *
2716 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2717 $ LDWRKU, A, LDA, ZERO, VT, LDVT )
2718 *
2719 ELSE
2720 *
2721 * Insufficient workspace for a fast algorithm
2722 *
2723 ITAU = 1
2724 IWORK = ITAU + M
2725 *
2726 * Compute A=L*Q, copying result to VT
2727 * (Workspace: need 2*M, prefer M+M*NB)
2728 *
2729 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2730 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2731 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2732 *
2733 * Generate Q in VT
2734 * (Workspace: need 2*M, prefer M+M*NB)
2735 *
2736 CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2737 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2738 *
2739 * Copy L to U, zeroing out above it
2740 *
2741 CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2742 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2743 $ LDU )
2744 IE = ITAU
2745 ITAUQ = IE + M
2746 ITAUP = ITAUQ + M
2747 IWORK = ITAUP + M
2748 *
2749 * Bidiagonalize L in U
2750 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2751 *
2752 CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2753 $ WORK( ITAUQ ), WORK( ITAUP ),
2754 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2755 *
2756 * Multiply right bidiagonalizing vectors in U by Q
2757 * in VT
2758 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2759 *
2760 CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2761 $ WORK( ITAUP ), VT, LDVT,
2762 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2763 *
2764 * Generate left bidiagonalizing vectors in U
2765 * (Workspace: need 4*M, prefer 3*M+M*NB)
2766 *
2767 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2768 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2769 IWORK = IE + M
2770 *
2771 * Perform bidiagonal QR iteration, computing left
2772 * singular vectors of A in U and computing right
2773 * singular vectors of A in VT
2774 * (Workspace: need BDSPAC)
2775 *
2776 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2777 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2778 $ INFO )
2779 *
2780 END IF
2781 *
2782 END IF
2783 *
2784 ELSE IF( WNTVA ) THEN
2785 *
2786 IF( WNTUN ) THEN
2787 *
2788 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2789 * N right singular vectors to be computed in VT and
2790 * no left singular vectors to be computed
2791 *
2792 IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2793 *
2794 * Sufficient workspace for a fast algorithm
2795 *
2796 IR = 1
2797 IF( LWORK.GE.WRKBL+LDA*M ) THEN
2798 *
2799 * WORK(IR) is LDA by M
2800 *
2801 LDWRKR = LDA
2802 ELSE
2803 *
2804 * WORK(IR) is M by M
2805 *
2806 LDWRKR = M
2807 END IF
2808 ITAU = IR + LDWRKR*M
2809 IWORK = ITAU + M
2810 *
2811 * Compute A=L*Q, copying result to VT
2812 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2813 *
2814 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2815 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2816 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2817 *
2818 * Copy L to WORK(IR), zeroing out above it
2819 *
2820 CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2821 $ LDWRKR )
2822 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2823 $ WORK( IR+LDWRKR ), LDWRKR )
2824 *
2825 * Generate Q in VT
2826 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2827 *
2828 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2829 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2830 IE = ITAU
2831 ITAUQ = IE + M
2832 ITAUP = ITAUQ + M
2833 IWORK = ITAUP + M
2834 *
2835 * Bidiagonalize L in WORK(IR)
2836 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2837 *
2838 CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2839 $ WORK( IE ), WORK( ITAUQ ),
2840 $ WORK( ITAUP ), WORK( IWORK ),
2841 $ LWORK-IWORK+1, IERR )
2842 *
2843 * Generate right bidiagonalizing vectors in WORK(IR)
2844 * (Workspace: need M*M+4*M-1,
2845 * prefer M*M+3*M+(M-1)*NB)
2846 *
2847 CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2848 $ WORK( ITAUP ), WORK( IWORK ),
2849 $ LWORK-IWORK+1, IERR )
2850 IWORK = IE + M
2851 *
2852 * Perform bidiagonal QR iteration, computing right
2853 * singular vectors of L in WORK(IR)
2854 * (Workspace: need M*M+BDSPAC)
2855 *
2856 CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2857 $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2858 $ WORK( IWORK ), INFO )
2859 *
2860 * Multiply right singular vectors of L in WORK(IR) by
2861 * Q in VT, storing result in A
2862 * (Workspace: need M*M)
2863 *
2864 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2865 $ LDWRKR, VT, LDVT, ZERO, A, LDA )
2866 *
2867 * Copy right singular vectors of A from A to VT
2868 *
2869 CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
2870 *
2871 ELSE
2872 *
2873 * Insufficient workspace for a fast algorithm
2874 *
2875 ITAU = 1
2876 IWORK = ITAU + M
2877 *
2878 * Compute A=L*Q, copying result to VT
2879 * (Workspace: need 2*M, prefer M+M*NB)
2880 *
2881 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2882 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2883 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2884 *
2885 * Generate Q in VT
2886 * (Workspace: need M+N, prefer M+N*NB)
2887 *
2888 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2889 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2890 IE = ITAU
2891 ITAUQ = IE + M
2892 ITAUP = ITAUQ + M
2893 IWORK = ITAUP + M
2894 *
2895 * Zero out above L in A
2896 *
2897 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2898 $ LDA )
2899 *
2900 * Bidiagonalize L in A
2901 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2902 *
2903 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2904 $ WORK( ITAUQ ), WORK( ITAUP ),
2905 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2906 *
2907 * Multiply right bidiagonalizing vectors in A by Q
2908 * in VT
2909 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2910 *
2911 CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2912 $ WORK( ITAUP ), VT, LDVT,
2913 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2914 IWORK = IE + M
2915 *
2916 * Perform bidiagonal QR iteration, computing right
2917 * singular vectors of A in VT
2918 * (Workspace: need BDSPAC)
2919 *
2920 CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2921 $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2922 $ INFO )
2923 *
2924 END IF
2925 *
2926 ELSE IF( WNTUO ) THEN
2927 *
2928 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
2929 * N right singular vectors to be computed in VT and
2930 * M left singular vectors to be overwritten on A
2931 *
2932 IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2933 *
2934 * Sufficient workspace for a fast algorithm
2935 *
2936 IU = 1
2937 IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2938 *
2939 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2940 *
2941 LDWRKU = LDA
2942 IR = IU + LDWRKU*M
2943 LDWRKR = LDA
2944 ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2945 *
2946 * WORK(IU) is LDA by M and WORK(IR) is M by M
2947 *
2948 LDWRKU = LDA
2949 IR = IU + LDWRKU*M
2950 LDWRKR = M
2951 ELSE
2952 *
2953 * WORK(IU) is M by M and WORK(IR) is M by M
2954 *
2955 LDWRKU = M
2956 IR = IU + LDWRKU*M
2957 LDWRKR = M
2958 END IF
2959 ITAU = IR + LDWRKR*M
2960 IWORK = ITAU + M
2961 *
2962 * Compute A=L*Q, copying result to VT
2963 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2964 *
2965 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2966 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2967 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2968 *
2969 * Generate Q in VT
2970 * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
2971 *
2972 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2973 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
2974 *
2975 * Copy L to WORK(IU), zeroing out above it
2976 *
2977 CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2978 $ LDWRKU )
2979 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2980 $ WORK( IU+LDWRKU ), LDWRKU )
2981 IE = ITAU
2982 ITAUQ = IE + M
2983 ITAUP = ITAUQ + M
2984 IWORK = ITAUP + M
2985 *
2986 * Bidiagonalize L in WORK(IU), copying result to
2987 * WORK(IR)
2988 * (Workspace: need 2*M*M+4*M,
2989 * prefer 2*M*M+3*M+2*M*NB)
2990 *
2991 CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2992 $ WORK( IE ), WORK( ITAUQ ),
2993 $ WORK( ITAUP ), WORK( IWORK ),
2994 $ LWORK-IWORK+1, IERR )
2995 CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2996 $ WORK( IR ), LDWRKR )
2997 *
2998 * Generate right bidiagonalizing vectors in WORK(IU)
2999 * (Workspace: need 2*M*M+4*M-1,
3000 * prefer 2*M*M+3*M+(M-1)*NB)
3001 *
3002 CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3003 $ WORK( ITAUP ), WORK( IWORK ),
3004 $ LWORK-IWORK+1, IERR )
3005 *
3006 * Generate left bidiagonalizing vectors in WORK(IR)
3007 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3008 *
3009 CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3010 $ WORK( ITAUQ ), WORK( IWORK ),
3011 $ LWORK-IWORK+1, IERR )
3012 IWORK = IE + M
3013 *
3014 * Perform bidiagonal QR iteration, computing left
3015 * singular vectors of L in WORK(IR) and computing
3016 * right singular vectors of L in WORK(IU)
3017 * (Workspace: need 2*M*M+BDSPAC)
3018 *
3019 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3020 $ WORK( IU ), LDWRKU, WORK( IR ),
3021 $ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3022 *
3023 * Multiply right singular vectors of L in WORK(IU) by
3024 * Q in VT, storing result in A
3025 * (Workspace: need M*M)
3026 *
3027 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3028 $ LDWRKU, VT, LDVT, ZERO, A, LDA )
3029 *
3030 * Copy right singular vectors of A from A to VT
3031 *
3032 CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3033 *
3034 * Copy left singular vectors of A from WORK(IR) to A
3035 *
3036 CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3037 $ LDA )
3038 *
3039 ELSE
3040 *
3041 * Insufficient workspace for a fast algorithm
3042 *
3043 ITAU = 1
3044 IWORK = ITAU + M
3045 *
3046 * Compute A=L*Q, copying result to VT
3047 * (Workspace: need 2*M, prefer M+M*NB)
3048 *
3049 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3050 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3051 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3052 *
3053 * Generate Q in VT
3054 * (Workspace: need M+N, prefer M+N*NB)
3055 *
3056 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3057 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3058 IE = ITAU
3059 ITAUQ = IE + M
3060 ITAUP = ITAUQ + M
3061 IWORK = ITAUP + M
3062 *
3063 * Zero out above L in A
3064 *
3065 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3066 $ LDA )
3067 *
3068 * Bidiagonalize L in A
3069 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3070 *
3071 CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3072 $ WORK( ITAUQ ), WORK( ITAUP ),
3073 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3074 *
3075 * Multiply right bidiagonalizing vectors in A by Q
3076 * in VT
3077 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3078 *
3079 CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3080 $ WORK( ITAUP ), VT, LDVT,
3081 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3082 *
3083 * Generate left bidiagonalizing vectors in A
3084 * (Workspace: need 4*M, prefer 3*M+M*NB)
3085 *
3086 CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3087 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3088 IWORK = IE + M
3089 *
3090 * Perform bidiagonal QR iteration, computing left
3091 * singular vectors of A in A and computing right
3092 * singular vectors of A in VT
3093 * (Workspace: need BDSPAC)
3094 *
3095 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3096 $ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3097 $ INFO )
3098 *
3099 END IF
3100 *
3101 ELSE IF( WNTUAS ) THEN
3102 *
3103 * Path 9t(N much larger than M, JOBU='S' or 'A',
3104 * JOBVT='A')
3105 * N right singular vectors to be computed in VT and
3106 * M left singular vectors to be computed in U
3107 *
3108 IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3109 *
3110 * Sufficient workspace for a fast algorithm
3111 *
3112 IU = 1
3113 IF( LWORK.GE.WRKBL+LDA*M ) THEN
3114 *
3115 * WORK(IU) is LDA by M
3116 *
3117 LDWRKU = LDA
3118 ELSE
3119 *
3120 * WORK(IU) is M by M
3121 *
3122 LDWRKU = M
3123 END IF
3124 ITAU = IU + LDWRKU*M
3125 IWORK = ITAU + M
3126 *
3127 * Compute A=L*Q, copying result to VT
3128 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3129 *
3130 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3131 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3132 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3133 *
3134 * Generate Q in VT
3135 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3136 *
3137 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3138 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3139 *
3140 * Copy L to WORK(IU), zeroing out above it
3141 *
3142 CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3143 $ LDWRKU )
3144 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3145 $ WORK( IU+LDWRKU ), LDWRKU )
3146 IE = ITAU
3147 ITAUQ = IE + M
3148 ITAUP = ITAUQ + M
3149 IWORK = ITAUP + M
3150 *
3151 * Bidiagonalize L in WORK(IU), copying result to U
3152 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3153 *
3154 CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3155 $ WORK( IE ), WORK( ITAUQ ),
3156 $ WORK( ITAUP ), WORK( IWORK ),
3157 $ LWORK-IWORK+1, IERR )
3158 CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3159 $ LDU )
3160 *
3161 * Generate right bidiagonalizing vectors in WORK(IU)
3162 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3163 *
3164 CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3165 $ WORK( ITAUP ), WORK( IWORK ),
3166 $ LWORK-IWORK+1, IERR )
3167 *
3168 * Generate left bidiagonalizing vectors in U
3169 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3170 *
3171 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3172 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3173 IWORK = IE + M
3174 *
3175 * Perform bidiagonal QR iteration, computing left
3176 * singular vectors of L in U and computing right
3177 * singular vectors of L in WORK(IU)
3178 * (Workspace: need M*M+BDSPAC)
3179 *
3180 CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3181 $ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3182 $ WORK( IWORK ), INFO )
3183 *
3184 * Multiply right singular vectors of L in WORK(IU) by
3185 * Q in VT, storing result in A
3186 * (Workspace: need M*M)
3187 *
3188 CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3189 $ LDWRKU, VT, LDVT, ZERO, A, LDA )
3190 *
3191 * Copy right singular vectors of A from A to VT
3192 *
3193 CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3194 *
3195 ELSE
3196 *
3197 * Insufficient workspace for a fast algorithm
3198 *
3199 ITAU = 1
3200 IWORK = ITAU + M
3201 *
3202 * Compute A=L*Q, copying result to VT
3203 * (Workspace: need 2*M, prefer M+M*NB)
3204 *
3205 CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3206 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3207 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3208 *
3209 * Generate Q in VT
3210 * (Workspace: need M+N, prefer M+N*NB)
3211 *
3212 CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3213 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3214 *
3215 * Copy L to U, zeroing out above it
3216 *
3217 CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3218 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3219 $ LDU )
3220 IE = ITAU
3221 ITAUQ = IE + M
3222 ITAUP = ITAUQ + M
3223 IWORK = ITAUP + M
3224 *
3225 * Bidiagonalize L in U
3226 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3227 *
3228 CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
3229 $ WORK( ITAUQ ), WORK( ITAUP ),
3230 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3231 *
3232 * Multiply right bidiagonalizing vectors in U by Q
3233 * in VT
3234 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3235 *
3236 CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3237 $ WORK( ITAUP ), VT, LDVT,
3238 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3239 *
3240 * Generate left bidiagonalizing vectors in U
3241 * (Workspace: need 4*M, prefer 3*M+M*NB)
3242 *
3243 CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3244 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3245 IWORK = IE + M
3246 *
3247 * Perform bidiagonal QR iteration, computing left
3248 * singular vectors of A in U and computing right
3249 * singular vectors of A in VT
3250 * (Workspace: need BDSPAC)
3251 *
3252 CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3253 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3254 $ INFO )
3255 *
3256 END IF
3257 *
3258 END IF
3259 *
3260 END IF
3261 *
3262 ELSE
3263 *
3264 * N .LT. MNTHR
3265 *
3266 * Path 10t(N greater than M, but not much larger)
3267 * Reduce to bidiagonal form without LQ decomposition
3268 *
3269 IE = 1
3270 ITAUQ = IE + M
3271 ITAUP = ITAUQ + M
3272 IWORK = ITAUP + M
3273 *
3274 * Bidiagonalize A
3275 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3276 *
3277 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3278 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3279 $ IERR )
3280 IF( WNTUAS ) THEN
3281 *
3282 * If left singular vectors desired in U, copy result to U
3283 * and generate left bidiagonalizing vectors in U
3284 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3285 *
3286 CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3287 CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3288 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3289 END IF
3290 IF( WNTVAS ) THEN
3291 *
3292 * If right singular vectors desired in VT, copy result to
3293 * VT and generate right bidiagonalizing vectors in VT
3294 * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3295 *
3296 CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3297 IF( WNTVA )
3298 $ NRVT = N
3299 IF( WNTVS )
3300 $ NRVT = M
3301 CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3302 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3303 END IF
3304 IF( WNTUO ) THEN
3305 *
3306 * If left singular vectors desired in A, generate left
3307 * bidiagonalizing vectors in A
3308 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3309 *
3310 CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3311 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3312 END IF
3313 IF( WNTVO ) THEN
3314 *
3315 * If right singular vectors desired in A, generate right
3316 * bidiagonalizing vectors in A
3317 * (Workspace: need 4*M, prefer 3*M+M*NB)
3318 *
3319 CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3320 $ WORK( IWORK ), LWORK-IWORK+1, IERR )
3321 END IF
3322 IWORK = IE + M
3323 IF( WNTUAS .OR. WNTUO )
3324 $ NRU = M
3325 IF( WNTUN )
3326 $ NRU = 0
3327 IF( WNTVAS .OR. WNTVO )
3328 $ NCVT = N
3329 IF( WNTVN )
3330 $ NCVT = 0
3331 IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3332 *
3333 * Perform bidiagonal QR iteration, if desired, computing
3334 * left singular vectors in U and computing right singular
3335 * vectors in VT
3336 * (Workspace: need BDSPAC)
3337 *
3338 CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3339 $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3340 ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3341 *
3342 * Perform bidiagonal QR iteration, if desired, computing
3343 * left singular vectors in U and computing right singular
3344 * vectors in A
3345 * (Workspace: need BDSPAC)
3346 *
3347 CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3348 $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
3349 ELSE
3350 *
3351 * Perform bidiagonal QR iteration, if desired, computing
3352 * left singular vectors in A and computing right singular
3353 * vectors in VT
3354 * (Workspace: need BDSPAC)
3355 *
3356 CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3357 $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3358 END IF
3359 *
3360 END IF
3361 *
3362 END IF
3363 *
3364 * If DBDSQR failed to converge, copy unconverged superdiagonals
3365 * to WORK( 2:MINMN )
3366 *
3367 IF( INFO.NE.0 ) THEN
3368 IF( IE.GT.2 ) THEN
3369 DO 50 I = 1, MINMN - 1
3370 WORK( I+1 ) = WORK( I+IE-1 )
3371 50 CONTINUE
3372 END IF
3373 IF( IE.LT.2 ) THEN
3374 DO 60 I = MINMN - 1, 1, -1
3375 WORK( I+1 ) = WORK( I+IE-1 )
3376 60 CONTINUE
3377 END IF
3378 END IF
3379 *
3380 * Undo scaling if necessary
3381 *
3382 IF( ISCL.EQ.1 ) THEN
3383 IF( ANRM.GT.BIGNUM )
3384 $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3385 $ IERR )
3386 IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3387 $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3388 $ MINMN, IERR )
3389 IF( ANRM.LT.SMLNUM )
3390 $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3391 $ IERR )
3392 IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3393 $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3394 $ MINMN, IERR )
3395 END IF
3396 *
3397 * Return optimal workspace in WORK(1)
3398 *
3399 WORK( 1 ) = MAXWRK
3400 *
3401 RETURN
3402 *
3403 * End of DGESVD
3404 *
3405 END