1 SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
2 $ WORK, LWORK, INFO )
3 *
4 * -- LAPACK driver routine (version 3.2.2) --
5 * -- LAPACK is a software package provided by Univ. of Tennessee, --
6 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
7 * June 2010
8 *
9 * .. Scalar Arguments ..
10 INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
11 DOUBLE PRECISION RCOND
12 * ..
13 * .. Array Arguments ..
14 DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * DGELSS computes the minimum norm solution to a real linear least
21 * squares problem:
22 *
23 * Minimize 2-norm(| b - A*x |).
24 *
25 * using the singular value decomposition (SVD) of A. A is an M-by-N
26 * matrix which may be rank-deficient.
27 *
28 * Several right hand side vectors b and solution vectors x can be
29 * handled in a single call; they are stored as the columns of the
30 * M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
31 * X.
32 *
33 * The effective rank of A is determined by treating as zero those
34 * singular values which are less than RCOND times the largest singular
35 * value.
36 *
37 * Arguments
38 * =========
39 *
40 * M (input) INTEGER
41 * The number of rows of the matrix A. M >= 0.
42 *
43 * N (input) INTEGER
44 * The number of columns of the matrix A. N >= 0.
45 *
46 * NRHS (input) INTEGER
47 * The number of right hand sides, i.e., the number of columns
48 * of the matrices B and X. NRHS >= 0.
49 *
50 * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
51 * On entry, the M-by-N matrix A.
52 * On exit, the first min(m,n) rows of A are overwritten with
53 * its right singular vectors, stored rowwise.
54 *
55 * LDA (input) INTEGER
56 * The leading dimension of the array A. LDA >= max(1,M).
57 *
58 * B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
59 * On entry, the M-by-NRHS right hand side matrix B.
60 * On exit, B is overwritten by the N-by-NRHS solution
61 * matrix X. If m >= n and RANK = n, the residual
62 * sum-of-squares for the solution in the i-th column is given
63 * by the sum of squares of elements n+1:m in that column.
64 *
65 * LDB (input) INTEGER
66 * The leading dimension of the array B. LDB >= max(1,max(M,N)).
67 *
68 * S (output) DOUBLE PRECISION array, dimension (min(M,N))
69 * The singular values of A in decreasing order.
70 * The condition number of A in the 2-norm = S(1)/S(min(m,n)).
71 *
72 * RCOND (input) DOUBLE PRECISION
73 * RCOND is used to determine the effective rank of A.
74 * Singular values S(i) <= RCOND*S(1) are treated as zero.
75 * If RCOND < 0, machine precision is used instead.
76 *
77 * RANK (output) INTEGER
78 * The effective rank of A, i.e., the number of singular values
79 * which are greater than RCOND*S(1).
80 *
81 * WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
82 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
83 *
84 * LWORK (input) INTEGER
85 * The dimension of the array WORK. LWORK >= 1, and also:
86 * LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
87 * For good performance, LWORK should generally be larger.
88 *
89 * If LWORK = -1, then a workspace query is assumed; the routine
90 * only calculates the optimal size of the WORK array, returns
91 * this value as the first entry of the WORK array, and no error
92 * message related to LWORK is issued by XERBLA.
93 *
94 * INFO (output) INTEGER
95 * = 0: successful exit
96 * < 0: if INFO = -i, the i-th argument had an illegal value.
97 * > 0: the algorithm for computing the SVD failed to converge;
98 * if INFO = i, i off-diagonal elements of an intermediate
99 * bidiagonal form did not converge to zero.
100 *
101 * =====================================================================
102 *
103 * .. Parameters ..
104 DOUBLE PRECISION ZERO, ONE
105 PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
106 * ..
107 * .. Local Scalars ..
108 LOGICAL LQUERY
109 INTEGER BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL,
110 $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
111 $ MAXWRK, MINMN, MINWRK, MM, MNTHR
112 DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
113 * ..
114 * .. Local Arrays ..
115 DOUBLE PRECISION VDUM( 1 )
116 * ..
117 * .. External Subroutines ..
118 EXTERNAL DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV,
119 $ DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR,
120 $ DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA
121 * ..
122 * .. External Functions ..
123 INTEGER ILAENV
124 DOUBLE PRECISION DLAMCH, DLANGE
125 EXTERNAL ILAENV, DLAMCH, DLANGE
126 * ..
127 * .. Intrinsic Functions ..
128 INTRINSIC MAX, MIN
129 * ..
130 * .. Executable Statements ..
131 *
132 * Test the input arguments
133 *
134 INFO = 0
135 MINMN = MIN( M, N )
136 MAXMN = MAX( M, N )
137 LQUERY = ( LWORK.EQ.-1 )
138 IF( M.LT.0 ) THEN
139 INFO = -1
140 ELSE IF( N.LT.0 ) THEN
141 INFO = -2
142 ELSE IF( NRHS.LT.0 ) THEN
143 INFO = -3
144 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
145 INFO = -5
146 ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
147 INFO = -7
148 END IF
149 *
150 * Compute workspace
151 * (Note: Comments in the code beginning "Workspace:" describe the
152 * minimal amount of workspace needed at that point in the code,
153 * as well as the preferred amount for good performance.
154 * NB refers to the optimal block size for the immediately
155 * following subroutine, as returned by ILAENV.)
156 *
157 IF( INFO.EQ.0 ) THEN
158 MINWRK = 1
159 MAXWRK = 1
160 IF( MINMN.GT.0 ) THEN
161 MM = M
162 MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 )
163 IF( M.GE.N .AND. M.GE.MNTHR ) THEN
164 *
165 * Path 1a - overdetermined, with many more rows than
166 * columns
167 *
168 MM = N
169 MAXWRK = MAX( MAXWRK, N + N*ILAENV( 1, 'DGEQRF', ' ', M,
170 $ N, -1, -1 ) )
171 MAXWRK = MAX( MAXWRK, N + NRHS*ILAENV( 1, 'DORMQR', 'LT',
172 $ M, NRHS, N, -1 ) )
173 END IF
174 IF( M.GE.N ) THEN
175 *
176 * Path 1 - overdetermined or exactly determined
177 *
178 * Compute workspace needed for DBDSQR
179 *
180 BDSPAC = MAX( 1, 5*N )
181 MAXWRK = MAX( MAXWRK, 3*N + ( MM + N )*ILAENV( 1,
182 $ 'DGEBRD', ' ', MM, N, -1, -1 ) )
183 MAXWRK = MAX( MAXWRK, 3*N + NRHS*ILAENV( 1, 'DORMBR',
184 $ 'QLT', MM, NRHS, N, -1 ) )
185 MAXWRK = MAX( MAXWRK, 3*N + ( N - 1 )*ILAENV( 1,
186 $ 'DORGBR', 'P', N, N, N, -1 ) )
187 MAXWRK = MAX( MAXWRK, BDSPAC )
188 MAXWRK = MAX( MAXWRK, N*NRHS )
189 MINWRK = MAX( 3*N + MM, 3*N + NRHS, BDSPAC )
190 MAXWRK = MAX( MINWRK, MAXWRK )
191 END IF
192 IF( N.GT.M ) THEN
193 *
194 * Compute workspace needed for DBDSQR
195 *
196 BDSPAC = MAX( 1, 5*M )
197 MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC )
198 IF( N.GE.MNTHR ) THEN
199 *
200 * Path 2a - underdetermined, with many more columns
201 * than rows
202 *
203 MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1,
204 $ -1 )
205 MAXWRK = MAX( MAXWRK, M*M + 4*M + 2*M*ILAENV( 1,
206 $ 'DGEBRD', ' ', M, M, -1, -1 ) )
207 MAXWRK = MAX( MAXWRK, M*M + 4*M + NRHS*ILAENV( 1,
208 $ 'DORMBR', 'QLT', M, NRHS, M, -1 ) )
209 MAXWRK = MAX( MAXWRK, M*M + 4*M +
210 $ ( M - 1 )*ILAENV( 1, 'DORGBR', 'P', M,
211 $ M, M, -1 ) )
212 MAXWRK = MAX( MAXWRK, M*M + M + BDSPAC )
213 IF( NRHS.GT.1 ) THEN
214 MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS )
215 ELSE
216 MAXWRK = MAX( MAXWRK, M*M + 2*M )
217 END IF
218 MAXWRK = MAX( MAXWRK, M + NRHS*ILAENV( 1, 'DORMLQ',
219 $ 'LT', N, NRHS, M, -1 ) )
220 ELSE
221 *
222 * Path 2 - underdetermined
223 *
224 MAXWRK = 3*M + ( N + M )*ILAENV( 1, 'DGEBRD', ' ', M,
225 $ N, -1, -1 )
226 MAXWRK = MAX( MAXWRK, 3*M + NRHS*ILAENV( 1, 'DORMBR',
227 $ 'QLT', M, NRHS, M, -1 ) )
228 MAXWRK = MAX( MAXWRK, 3*M + M*ILAENV( 1, 'DORGBR',
229 $ 'P', M, N, M, -1 ) )
230 MAXWRK = MAX( MAXWRK, BDSPAC )
231 MAXWRK = MAX( MAXWRK, N*NRHS )
232 END IF
233 END IF
234 MAXWRK = MAX( MINWRK, MAXWRK )
235 END IF
236 WORK( 1 ) = MAXWRK
237 *
238 IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
239 $ INFO = -12
240 END IF
241 *
242 IF( INFO.NE.0 ) THEN
243 CALL XERBLA( 'DGELSS', -INFO )
244 RETURN
245 ELSE IF( LQUERY ) THEN
246 RETURN
247 END IF
248 *
249 * Quick return if possible
250 *
251 IF( M.EQ.0 .OR. N.EQ.0 ) THEN
252 RANK = 0
253 RETURN
254 END IF
255 *
256 * Get machine parameters
257 *
258 EPS = DLAMCH( 'P' )
259 SFMIN = DLAMCH( 'S' )
260 SMLNUM = SFMIN / EPS
261 BIGNUM = ONE / SMLNUM
262 CALL DLABAD( SMLNUM, BIGNUM )
263 *
264 * Scale A if max element outside range [SMLNUM,BIGNUM]
265 *
266 ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
267 IASCL = 0
268 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
269 *
270 * Scale matrix norm up to SMLNUM
271 *
272 CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
273 IASCL = 1
274 ELSE IF( ANRM.GT.BIGNUM ) THEN
275 *
276 * Scale matrix norm down to BIGNUM
277 *
278 CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
279 IASCL = 2
280 ELSE IF( ANRM.EQ.ZERO ) THEN
281 *
282 * Matrix all zero. Return zero solution.
283 *
284 CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
285 CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN )
286 RANK = 0
287 GO TO 70
288 END IF
289 *
290 * Scale B if max element outside range [SMLNUM,BIGNUM]
291 *
292 BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
293 IBSCL = 0
294 IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
295 *
296 * Scale matrix norm up to SMLNUM
297 *
298 CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
299 IBSCL = 1
300 ELSE IF( BNRM.GT.BIGNUM ) THEN
301 *
302 * Scale matrix norm down to BIGNUM
303 *
304 CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
305 IBSCL = 2
306 END IF
307 *
308 * Overdetermined case
309 *
310 IF( M.GE.N ) THEN
311 *
312 * Path 1 - overdetermined or exactly determined
313 *
314 MM = M
315 IF( M.GE.MNTHR ) THEN
316 *
317 * Path 1a - overdetermined, with many more rows than columns
318 *
319 MM = N
320 ITAU = 1
321 IWORK = ITAU + N
322 *
323 * Compute A=Q*R
324 * (Workspace: need 2*N, prefer N+N*NB)
325 *
326 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
327 $ LWORK-IWORK+1, INFO )
328 *
329 * Multiply B by transpose(Q)
330 * (Workspace: need N+NRHS, prefer N+NRHS*NB)
331 *
332 CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
333 $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
334 *
335 * Zero out below R
336 *
337 IF( N.GT.1 )
338 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
339 END IF
340 *
341 IE = 1
342 ITAUQ = IE + N
343 ITAUP = ITAUQ + N
344 IWORK = ITAUP + N
345 *
346 * Bidiagonalize R in A
347 * (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
348 *
349 CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
350 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
351 $ INFO )
352 *
353 * Multiply B by transpose of left bidiagonalizing vectors of R
354 * (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
355 *
356 CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
357 $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
358 *
359 * Generate right bidiagonalizing vectors of R in A
360 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
361 *
362 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
363 $ WORK( IWORK ), LWORK-IWORK+1, INFO )
364 IWORK = IE + N
365 *
366 * Perform bidiagonal QR iteration
367 * multiply B by transpose of left singular vectors
368 * compute right singular vectors in A
369 * (Workspace: need BDSPAC)
370 *
371 CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM,
372 $ 1, B, LDB, WORK( IWORK ), INFO )
373 IF( INFO.NE.0 )
374 $ GO TO 70
375 *
376 * Multiply B by reciprocals of singular values
377 *
378 THR = MAX( RCOND*S( 1 ), SFMIN )
379 IF( RCOND.LT.ZERO )
380 $ THR = MAX( EPS*S( 1 ), SFMIN )
381 RANK = 0
382 DO 10 I = 1, N
383 IF( S( I ).GT.THR ) THEN
384 CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
385 RANK = RANK + 1
386 ELSE
387 CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
388 END IF
389 10 CONTINUE
390 *
391 * Multiply B by right singular vectors
392 * (Workspace: need N, prefer N*NRHS)
393 *
394 IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
395 CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO,
396 $ WORK, LDB )
397 CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
398 ELSE IF( NRHS.GT.1 ) THEN
399 CHUNK = LWORK / N
400 DO 20 I = 1, NRHS, CHUNK
401 BL = MIN( NRHS-I+1, CHUNK )
402 CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ),
403 $ LDB, ZERO, WORK, N )
404 CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
405 20 CONTINUE
406 ELSE
407 CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
408 CALL DCOPY( N, WORK, 1, B, 1 )
409 END IF
410 *
411 ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
412 $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
413 *
414 * Path 2a - underdetermined, with many more columns than rows
415 * and sufficient workspace for an efficient algorithm
416 *
417 LDWORK = M
418 IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
419 $ M*LDA+M+M*NRHS ) )LDWORK = LDA
420 ITAU = 1
421 IWORK = M + 1
422 *
423 * Compute A=L*Q
424 * (Workspace: need 2*M, prefer M+M*NB)
425 *
426 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
427 $ LWORK-IWORK+1, INFO )
428 IL = IWORK
429 *
430 * Copy L to WORK(IL), zeroing out above it
431 *
432 CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
433 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
434 $ LDWORK )
435 IE = IL + LDWORK*M
436 ITAUQ = IE + M
437 ITAUP = ITAUQ + M
438 IWORK = ITAUP + M
439 *
440 * Bidiagonalize L in WORK(IL)
441 * (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
442 *
443 CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
444 $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
445 $ LWORK-IWORK+1, INFO )
446 *
447 * Multiply B by transpose of left bidiagonalizing vectors of L
448 * (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
449 *
450 CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
451 $ WORK( ITAUQ ), B, LDB, WORK( IWORK ),
452 $ LWORK-IWORK+1, INFO )
453 *
454 * Generate right bidiagonalizing vectors of R in WORK(IL)
455 * (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
456 *
457 CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
458 $ WORK( IWORK ), LWORK-IWORK+1, INFO )
459 IWORK = IE + M
460 *
461 * Perform bidiagonal QR iteration,
462 * computing right singular vectors of L in WORK(IL) and
463 * multiplying B by transpose of left singular vectors
464 * (Workspace: need M*M+M+BDSPAC)
465 *
466 CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ),
467 $ LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO )
468 IF( INFO.NE.0 )
469 $ GO TO 70
470 *
471 * Multiply B by reciprocals of singular values
472 *
473 THR = MAX( RCOND*S( 1 ), SFMIN )
474 IF( RCOND.LT.ZERO )
475 $ THR = MAX( EPS*S( 1 ), SFMIN )
476 RANK = 0
477 DO 30 I = 1, M
478 IF( S( I ).GT.THR ) THEN
479 CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
480 RANK = RANK + 1
481 ELSE
482 CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
483 END IF
484 30 CONTINUE
485 IWORK = IE
486 *
487 * Multiply B by right singular vectors of L in WORK(IL)
488 * (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
489 *
490 IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
491 CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK,
492 $ B, LDB, ZERO, WORK( IWORK ), LDB )
493 CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
494 ELSE IF( NRHS.GT.1 ) THEN
495 CHUNK = ( LWORK-IWORK+1 ) / M
496 DO 40 I = 1, NRHS, CHUNK
497 BL = MIN( NRHS-I+1, CHUNK )
498 CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK,
499 $ B( 1, I ), LDB, ZERO, WORK( IWORK ), M )
500 CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
501 $ LDB )
502 40 CONTINUE
503 ELSE
504 CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
505 $ 1, ZERO, WORK( IWORK ), 1 )
506 CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
507 END IF
508 *
509 * Zero out below first M rows of B
510 *
511 CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
512 IWORK = ITAU + M
513 *
514 * Multiply transpose(Q) by B
515 * (Workspace: need M+NRHS, prefer M+NRHS*NB)
516 *
517 CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
518 $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
519 *
520 ELSE
521 *
522 * Path 2 - remaining underdetermined cases
523 *
524 IE = 1
525 ITAUQ = IE + M
526 ITAUP = ITAUQ + M
527 IWORK = ITAUP + M
528 *
529 * Bidiagonalize A
530 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
531 *
532 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
533 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
534 $ INFO )
535 *
536 * Multiply B by transpose of left bidiagonalizing vectors
537 * (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
538 *
539 CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
540 $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
541 *
542 * Generate right bidiagonalizing vectors in A
543 * (Workspace: need 4*M, prefer 3*M+M*NB)
544 *
545 CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
546 $ WORK( IWORK ), LWORK-IWORK+1, INFO )
547 IWORK = IE + M
548 *
549 * Perform bidiagonal QR iteration,
550 * computing right singular vectors of A in A and
551 * multiplying B by transpose of left singular vectors
552 * (Workspace: need BDSPAC)
553 *
554 CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM,
555 $ 1, B, LDB, WORK( IWORK ), INFO )
556 IF( INFO.NE.0 )
557 $ GO TO 70
558 *
559 * Multiply B by reciprocals of singular values
560 *
561 THR = MAX( RCOND*S( 1 ), SFMIN )
562 IF( RCOND.LT.ZERO )
563 $ THR = MAX( EPS*S( 1 ), SFMIN )
564 RANK = 0
565 DO 50 I = 1, M
566 IF( S( I ).GT.THR ) THEN
567 CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
568 RANK = RANK + 1
569 ELSE
570 CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
571 END IF
572 50 CONTINUE
573 *
574 * Multiply B by right singular vectors of A
575 * (Workspace: need N, prefer N*NRHS)
576 *
577 IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
578 CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO,
579 $ WORK, LDB )
580 CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB )
581 ELSE IF( NRHS.GT.1 ) THEN
582 CHUNK = LWORK / N
583 DO 60 I = 1, NRHS, CHUNK
584 BL = MIN( NRHS-I+1, CHUNK )
585 CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ),
586 $ LDB, ZERO, WORK, N )
587 CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
588 60 CONTINUE
589 ELSE
590 CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
591 CALL DCOPY( N, WORK, 1, B, 1 )
592 END IF
593 END IF
594 *
595 * Undo scaling
596 *
597 IF( IASCL.EQ.1 ) THEN
598 CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
599 CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
600 $ INFO )
601 ELSE IF( IASCL.EQ.2 ) THEN
602 CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
603 CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
604 $ INFO )
605 END IF
606 IF( IBSCL.EQ.1 ) THEN
607 CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
608 ELSE IF( IBSCL.EQ.2 ) THEN
609 CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
610 END IF
611 *
612 70 CONTINUE
613 WORK( 1 ) = MAXWRK
614 RETURN
615 *
616 * End of DGELSS
617 *
618 END
2 $ WORK, LWORK, INFO )
3 *
4 * -- LAPACK driver routine (version 3.2.2) --
5 * -- LAPACK is a software package provided by Univ. of Tennessee, --
6 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
7 * June 2010
8 *
9 * .. Scalar Arguments ..
10 INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
11 DOUBLE PRECISION RCOND
12 * ..
13 * .. Array Arguments ..
14 DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * DGELSS computes the minimum norm solution to a real linear least
21 * squares problem:
22 *
23 * Minimize 2-norm(| b - A*x |).
24 *
25 * using the singular value decomposition (SVD) of A. A is an M-by-N
26 * matrix which may be rank-deficient.
27 *
28 * Several right hand side vectors b and solution vectors x can be
29 * handled in a single call; they are stored as the columns of the
30 * M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
31 * X.
32 *
33 * The effective rank of A is determined by treating as zero those
34 * singular values which are less than RCOND times the largest singular
35 * value.
36 *
37 * Arguments
38 * =========
39 *
40 * M (input) INTEGER
41 * The number of rows of the matrix A. M >= 0.
42 *
43 * N (input) INTEGER
44 * The number of columns of the matrix A. N >= 0.
45 *
46 * NRHS (input) INTEGER
47 * The number of right hand sides, i.e., the number of columns
48 * of the matrices B and X. NRHS >= 0.
49 *
50 * A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
51 * On entry, the M-by-N matrix A.
52 * On exit, the first min(m,n) rows of A are overwritten with
53 * its right singular vectors, stored rowwise.
54 *
55 * LDA (input) INTEGER
56 * The leading dimension of the array A. LDA >= max(1,M).
57 *
58 * B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
59 * On entry, the M-by-NRHS right hand side matrix B.
60 * On exit, B is overwritten by the N-by-NRHS solution
61 * matrix X. If m >= n and RANK = n, the residual
62 * sum-of-squares for the solution in the i-th column is given
63 * by the sum of squares of elements n+1:m in that column.
64 *
65 * LDB (input) INTEGER
66 * The leading dimension of the array B. LDB >= max(1,max(M,N)).
67 *
68 * S (output) DOUBLE PRECISION array, dimension (min(M,N))
69 * The singular values of A in decreasing order.
70 * The condition number of A in the 2-norm = S(1)/S(min(m,n)).
71 *
72 * RCOND (input) DOUBLE PRECISION
73 * RCOND is used to determine the effective rank of A.
74 * Singular values S(i) <= RCOND*S(1) are treated as zero.
75 * If RCOND < 0, machine precision is used instead.
76 *
77 * RANK (output) INTEGER
78 * The effective rank of A, i.e., the number of singular values
79 * which are greater than RCOND*S(1).
80 *
81 * WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
82 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
83 *
84 * LWORK (input) INTEGER
85 * The dimension of the array WORK. LWORK >= 1, and also:
86 * LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
87 * For good performance, LWORK should generally be larger.
88 *
89 * If LWORK = -1, then a workspace query is assumed; the routine
90 * only calculates the optimal size of the WORK array, returns
91 * this value as the first entry of the WORK array, and no error
92 * message related to LWORK is issued by XERBLA.
93 *
94 * INFO (output) INTEGER
95 * = 0: successful exit
96 * < 0: if INFO = -i, the i-th argument had an illegal value.
97 * > 0: the algorithm for computing the SVD failed to converge;
98 * if INFO = i, i off-diagonal elements of an intermediate
99 * bidiagonal form did not converge to zero.
100 *
101 * =====================================================================
102 *
103 * .. Parameters ..
104 DOUBLE PRECISION ZERO, ONE
105 PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
106 * ..
107 * .. Local Scalars ..
108 LOGICAL LQUERY
109 INTEGER BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL,
110 $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
111 $ MAXWRK, MINMN, MINWRK, MM, MNTHR
112 DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
113 * ..
114 * .. Local Arrays ..
115 DOUBLE PRECISION VDUM( 1 )
116 * ..
117 * .. External Subroutines ..
118 EXTERNAL DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV,
119 $ DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR,
120 $ DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA
121 * ..
122 * .. External Functions ..
123 INTEGER ILAENV
124 DOUBLE PRECISION DLAMCH, DLANGE
125 EXTERNAL ILAENV, DLAMCH, DLANGE
126 * ..
127 * .. Intrinsic Functions ..
128 INTRINSIC MAX, MIN
129 * ..
130 * .. Executable Statements ..
131 *
132 * Test the input arguments
133 *
134 INFO = 0
135 MINMN = MIN( M, N )
136 MAXMN = MAX( M, N )
137 LQUERY = ( LWORK.EQ.-1 )
138 IF( M.LT.0 ) THEN
139 INFO = -1
140 ELSE IF( N.LT.0 ) THEN
141 INFO = -2
142 ELSE IF( NRHS.LT.0 ) THEN
143 INFO = -3
144 ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
145 INFO = -5
146 ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
147 INFO = -7
148 END IF
149 *
150 * Compute workspace
151 * (Note: Comments in the code beginning "Workspace:" describe the
152 * minimal amount of workspace needed at that point in the code,
153 * as well as the preferred amount for good performance.
154 * NB refers to the optimal block size for the immediately
155 * following subroutine, as returned by ILAENV.)
156 *
157 IF( INFO.EQ.0 ) THEN
158 MINWRK = 1
159 MAXWRK = 1
160 IF( MINMN.GT.0 ) THEN
161 MM = M
162 MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 )
163 IF( M.GE.N .AND. M.GE.MNTHR ) THEN
164 *
165 * Path 1a - overdetermined, with many more rows than
166 * columns
167 *
168 MM = N
169 MAXWRK = MAX( MAXWRK, N + N*ILAENV( 1, 'DGEQRF', ' ', M,
170 $ N, -1, -1 ) )
171 MAXWRK = MAX( MAXWRK, N + NRHS*ILAENV( 1, 'DORMQR', 'LT',
172 $ M, NRHS, N, -1 ) )
173 END IF
174 IF( M.GE.N ) THEN
175 *
176 * Path 1 - overdetermined or exactly determined
177 *
178 * Compute workspace needed for DBDSQR
179 *
180 BDSPAC = MAX( 1, 5*N )
181 MAXWRK = MAX( MAXWRK, 3*N + ( MM + N )*ILAENV( 1,
182 $ 'DGEBRD', ' ', MM, N, -1, -1 ) )
183 MAXWRK = MAX( MAXWRK, 3*N + NRHS*ILAENV( 1, 'DORMBR',
184 $ 'QLT', MM, NRHS, N, -1 ) )
185 MAXWRK = MAX( MAXWRK, 3*N + ( N - 1 )*ILAENV( 1,
186 $ 'DORGBR', 'P', N, N, N, -1 ) )
187 MAXWRK = MAX( MAXWRK, BDSPAC )
188 MAXWRK = MAX( MAXWRK, N*NRHS )
189 MINWRK = MAX( 3*N + MM, 3*N + NRHS, BDSPAC )
190 MAXWRK = MAX( MINWRK, MAXWRK )
191 END IF
192 IF( N.GT.M ) THEN
193 *
194 * Compute workspace needed for DBDSQR
195 *
196 BDSPAC = MAX( 1, 5*M )
197 MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC )
198 IF( N.GE.MNTHR ) THEN
199 *
200 * Path 2a - underdetermined, with many more columns
201 * than rows
202 *
203 MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1,
204 $ -1 )
205 MAXWRK = MAX( MAXWRK, M*M + 4*M + 2*M*ILAENV( 1,
206 $ 'DGEBRD', ' ', M, M, -1, -1 ) )
207 MAXWRK = MAX( MAXWRK, M*M + 4*M + NRHS*ILAENV( 1,
208 $ 'DORMBR', 'QLT', M, NRHS, M, -1 ) )
209 MAXWRK = MAX( MAXWRK, M*M + 4*M +
210 $ ( M - 1 )*ILAENV( 1, 'DORGBR', 'P', M,
211 $ M, M, -1 ) )
212 MAXWRK = MAX( MAXWRK, M*M + M + BDSPAC )
213 IF( NRHS.GT.1 ) THEN
214 MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS )
215 ELSE
216 MAXWRK = MAX( MAXWRK, M*M + 2*M )
217 END IF
218 MAXWRK = MAX( MAXWRK, M + NRHS*ILAENV( 1, 'DORMLQ',
219 $ 'LT', N, NRHS, M, -1 ) )
220 ELSE
221 *
222 * Path 2 - underdetermined
223 *
224 MAXWRK = 3*M + ( N + M )*ILAENV( 1, 'DGEBRD', ' ', M,
225 $ N, -1, -1 )
226 MAXWRK = MAX( MAXWRK, 3*M + NRHS*ILAENV( 1, 'DORMBR',
227 $ 'QLT', M, NRHS, M, -1 ) )
228 MAXWRK = MAX( MAXWRK, 3*M + M*ILAENV( 1, 'DORGBR',
229 $ 'P', M, N, M, -1 ) )
230 MAXWRK = MAX( MAXWRK, BDSPAC )
231 MAXWRK = MAX( MAXWRK, N*NRHS )
232 END IF
233 END IF
234 MAXWRK = MAX( MINWRK, MAXWRK )
235 END IF
236 WORK( 1 ) = MAXWRK
237 *
238 IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
239 $ INFO = -12
240 END IF
241 *
242 IF( INFO.NE.0 ) THEN
243 CALL XERBLA( 'DGELSS', -INFO )
244 RETURN
245 ELSE IF( LQUERY ) THEN
246 RETURN
247 END IF
248 *
249 * Quick return if possible
250 *
251 IF( M.EQ.0 .OR. N.EQ.0 ) THEN
252 RANK = 0
253 RETURN
254 END IF
255 *
256 * Get machine parameters
257 *
258 EPS = DLAMCH( 'P' )
259 SFMIN = DLAMCH( 'S' )
260 SMLNUM = SFMIN / EPS
261 BIGNUM = ONE / SMLNUM
262 CALL DLABAD( SMLNUM, BIGNUM )
263 *
264 * Scale A if max element outside range [SMLNUM,BIGNUM]
265 *
266 ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
267 IASCL = 0
268 IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
269 *
270 * Scale matrix norm up to SMLNUM
271 *
272 CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
273 IASCL = 1
274 ELSE IF( ANRM.GT.BIGNUM ) THEN
275 *
276 * Scale matrix norm down to BIGNUM
277 *
278 CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
279 IASCL = 2
280 ELSE IF( ANRM.EQ.ZERO ) THEN
281 *
282 * Matrix all zero. Return zero solution.
283 *
284 CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
285 CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN )
286 RANK = 0
287 GO TO 70
288 END IF
289 *
290 * Scale B if max element outside range [SMLNUM,BIGNUM]
291 *
292 BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
293 IBSCL = 0
294 IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
295 *
296 * Scale matrix norm up to SMLNUM
297 *
298 CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
299 IBSCL = 1
300 ELSE IF( BNRM.GT.BIGNUM ) THEN
301 *
302 * Scale matrix norm down to BIGNUM
303 *
304 CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
305 IBSCL = 2
306 END IF
307 *
308 * Overdetermined case
309 *
310 IF( M.GE.N ) THEN
311 *
312 * Path 1 - overdetermined or exactly determined
313 *
314 MM = M
315 IF( M.GE.MNTHR ) THEN
316 *
317 * Path 1a - overdetermined, with many more rows than columns
318 *
319 MM = N
320 ITAU = 1
321 IWORK = ITAU + N
322 *
323 * Compute A=Q*R
324 * (Workspace: need 2*N, prefer N+N*NB)
325 *
326 CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
327 $ LWORK-IWORK+1, INFO )
328 *
329 * Multiply B by transpose(Q)
330 * (Workspace: need N+NRHS, prefer N+NRHS*NB)
331 *
332 CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
333 $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
334 *
335 * Zero out below R
336 *
337 IF( N.GT.1 )
338 $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
339 END IF
340 *
341 IE = 1
342 ITAUQ = IE + N
343 ITAUP = ITAUQ + N
344 IWORK = ITAUP + N
345 *
346 * Bidiagonalize R in A
347 * (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
348 *
349 CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
350 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
351 $ INFO )
352 *
353 * Multiply B by transpose of left bidiagonalizing vectors of R
354 * (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
355 *
356 CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
357 $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
358 *
359 * Generate right bidiagonalizing vectors of R in A
360 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
361 *
362 CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
363 $ WORK( IWORK ), LWORK-IWORK+1, INFO )
364 IWORK = IE + N
365 *
366 * Perform bidiagonal QR iteration
367 * multiply B by transpose of left singular vectors
368 * compute right singular vectors in A
369 * (Workspace: need BDSPAC)
370 *
371 CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM,
372 $ 1, B, LDB, WORK( IWORK ), INFO )
373 IF( INFO.NE.0 )
374 $ GO TO 70
375 *
376 * Multiply B by reciprocals of singular values
377 *
378 THR = MAX( RCOND*S( 1 ), SFMIN )
379 IF( RCOND.LT.ZERO )
380 $ THR = MAX( EPS*S( 1 ), SFMIN )
381 RANK = 0
382 DO 10 I = 1, N
383 IF( S( I ).GT.THR ) THEN
384 CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
385 RANK = RANK + 1
386 ELSE
387 CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
388 END IF
389 10 CONTINUE
390 *
391 * Multiply B by right singular vectors
392 * (Workspace: need N, prefer N*NRHS)
393 *
394 IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
395 CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO,
396 $ WORK, LDB )
397 CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
398 ELSE IF( NRHS.GT.1 ) THEN
399 CHUNK = LWORK / N
400 DO 20 I = 1, NRHS, CHUNK
401 BL = MIN( NRHS-I+1, CHUNK )
402 CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ),
403 $ LDB, ZERO, WORK, N )
404 CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
405 20 CONTINUE
406 ELSE
407 CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
408 CALL DCOPY( N, WORK, 1, B, 1 )
409 END IF
410 *
411 ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
412 $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
413 *
414 * Path 2a - underdetermined, with many more columns than rows
415 * and sufficient workspace for an efficient algorithm
416 *
417 LDWORK = M
418 IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
419 $ M*LDA+M+M*NRHS ) )LDWORK = LDA
420 ITAU = 1
421 IWORK = M + 1
422 *
423 * Compute A=L*Q
424 * (Workspace: need 2*M, prefer M+M*NB)
425 *
426 CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
427 $ LWORK-IWORK+1, INFO )
428 IL = IWORK
429 *
430 * Copy L to WORK(IL), zeroing out above it
431 *
432 CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
433 CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
434 $ LDWORK )
435 IE = IL + LDWORK*M
436 ITAUQ = IE + M
437 ITAUP = ITAUQ + M
438 IWORK = ITAUP + M
439 *
440 * Bidiagonalize L in WORK(IL)
441 * (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
442 *
443 CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
444 $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
445 $ LWORK-IWORK+1, INFO )
446 *
447 * Multiply B by transpose of left bidiagonalizing vectors of L
448 * (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
449 *
450 CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
451 $ WORK( ITAUQ ), B, LDB, WORK( IWORK ),
452 $ LWORK-IWORK+1, INFO )
453 *
454 * Generate right bidiagonalizing vectors of R in WORK(IL)
455 * (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
456 *
457 CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
458 $ WORK( IWORK ), LWORK-IWORK+1, INFO )
459 IWORK = IE + M
460 *
461 * Perform bidiagonal QR iteration,
462 * computing right singular vectors of L in WORK(IL) and
463 * multiplying B by transpose of left singular vectors
464 * (Workspace: need M*M+M+BDSPAC)
465 *
466 CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ),
467 $ LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO )
468 IF( INFO.NE.0 )
469 $ GO TO 70
470 *
471 * Multiply B by reciprocals of singular values
472 *
473 THR = MAX( RCOND*S( 1 ), SFMIN )
474 IF( RCOND.LT.ZERO )
475 $ THR = MAX( EPS*S( 1 ), SFMIN )
476 RANK = 0
477 DO 30 I = 1, M
478 IF( S( I ).GT.THR ) THEN
479 CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
480 RANK = RANK + 1
481 ELSE
482 CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
483 END IF
484 30 CONTINUE
485 IWORK = IE
486 *
487 * Multiply B by right singular vectors of L in WORK(IL)
488 * (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
489 *
490 IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
491 CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK,
492 $ B, LDB, ZERO, WORK( IWORK ), LDB )
493 CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
494 ELSE IF( NRHS.GT.1 ) THEN
495 CHUNK = ( LWORK-IWORK+1 ) / M
496 DO 40 I = 1, NRHS, CHUNK
497 BL = MIN( NRHS-I+1, CHUNK )
498 CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK,
499 $ B( 1, I ), LDB, ZERO, WORK( IWORK ), M )
500 CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
501 $ LDB )
502 40 CONTINUE
503 ELSE
504 CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
505 $ 1, ZERO, WORK( IWORK ), 1 )
506 CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
507 END IF
508 *
509 * Zero out below first M rows of B
510 *
511 CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
512 IWORK = ITAU + M
513 *
514 * Multiply transpose(Q) by B
515 * (Workspace: need M+NRHS, prefer M+NRHS*NB)
516 *
517 CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
518 $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
519 *
520 ELSE
521 *
522 * Path 2 - remaining underdetermined cases
523 *
524 IE = 1
525 ITAUQ = IE + M
526 ITAUP = ITAUQ + M
527 IWORK = ITAUP + M
528 *
529 * Bidiagonalize A
530 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
531 *
532 CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
533 $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
534 $ INFO )
535 *
536 * Multiply B by transpose of left bidiagonalizing vectors
537 * (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
538 *
539 CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
540 $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
541 *
542 * Generate right bidiagonalizing vectors in A
543 * (Workspace: need 4*M, prefer 3*M+M*NB)
544 *
545 CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
546 $ WORK( IWORK ), LWORK-IWORK+1, INFO )
547 IWORK = IE + M
548 *
549 * Perform bidiagonal QR iteration,
550 * computing right singular vectors of A in A and
551 * multiplying B by transpose of left singular vectors
552 * (Workspace: need BDSPAC)
553 *
554 CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM,
555 $ 1, B, LDB, WORK( IWORK ), INFO )
556 IF( INFO.NE.0 )
557 $ GO TO 70
558 *
559 * Multiply B by reciprocals of singular values
560 *
561 THR = MAX( RCOND*S( 1 ), SFMIN )
562 IF( RCOND.LT.ZERO )
563 $ THR = MAX( EPS*S( 1 ), SFMIN )
564 RANK = 0
565 DO 50 I = 1, M
566 IF( S( I ).GT.THR ) THEN
567 CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
568 RANK = RANK + 1
569 ELSE
570 CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
571 END IF
572 50 CONTINUE
573 *
574 * Multiply B by right singular vectors of A
575 * (Workspace: need N, prefer N*NRHS)
576 *
577 IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
578 CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO,
579 $ WORK, LDB )
580 CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB )
581 ELSE IF( NRHS.GT.1 ) THEN
582 CHUNK = LWORK / N
583 DO 60 I = 1, NRHS, CHUNK
584 BL = MIN( NRHS-I+1, CHUNK )
585 CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ),
586 $ LDB, ZERO, WORK, N )
587 CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
588 60 CONTINUE
589 ELSE
590 CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
591 CALL DCOPY( N, WORK, 1, B, 1 )
592 END IF
593 END IF
594 *
595 * Undo scaling
596 *
597 IF( IASCL.EQ.1 ) THEN
598 CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
599 CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
600 $ INFO )
601 ELSE IF( IASCL.EQ.2 ) THEN
602 CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
603 CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
604 $ INFO )
605 END IF
606 IF( IBSCL.EQ.1 ) THEN
607 CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
608 ELSE IF( IBSCL.EQ.2 ) THEN
609 CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
610 END IF
611 *
612 70 CONTINUE
613 WORK( 1 ) = MAXWRK
614 RETURN
615 *
616 * End of DGELSS
617 *
618 END