1 SUBROUTINE ZLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
2 $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
3 $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK,
4 $ IWORK, INFO )
5 *
6 * -- LAPACK routine (version 3.2) --
7 * -- LAPACK is a software package provided by Univ. of Tennessee, --
8 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
9 * November 2006
10 *
11 * .. Scalar Arguments ..
12 INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
13 $ SMLSIZ
14 * ..
15 * .. Array Arguments ..
16 INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
17 $ K( * ), PERM( LDGCOL, * )
18 DOUBLE PRECISION C( * ), DIFL( LDU, * ), DIFR( LDU, * ),
19 $ GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ),
20 $ S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * )
21 COMPLEX*16 B( LDB, * ), BX( LDBX, * )
22 * ..
23 *
24 * Purpose
25 * =======
26 *
27 * ZLALSA is an itermediate step in solving the least squares problem
28 * by computing the SVD of the coefficient matrix in compact form (The
29 * singular vectors are computed as products of simple orthorgonal
30 * matrices.).
31 *
32 * If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector
33 * matrix of an upper bidiagonal matrix to the right hand side; and if
34 * ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the
35 * right hand side. The singular vector matrices were generated in
36 * compact form by ZLALSA.
37 *
38 * Arguments
39 * =========
40 *
41 * ICOMPQ (input) INTEGER
42 * Specifies whether the left or the right singular vector
43 * matrix is involved.
44 * = 0: Left singular vector matrix
45 * = 1: Right singular vector matrix
46 *
47 * SMLSIZ (input) INTEGER
48 * The maximum size of the subproblems at the bottom of the
49 * computation tree.
50 *
51 * N (input) INTEGER
52 * The row and column dimensions of the upper bidiagonal matrix.
53 *
54 * NRHS (input) INTEGER
55 * The number of columns of B and BX. NRHS must be at least 1.
56 *
57 * B (input/output) COMPLEX*16 array, dimension ( LDB, NRHS )
58 * On input, B contains the right hand sides of the least
59 * squares problem in rows 1 through M.
60 * On output, B contains the solution X in rows 1 through N.
61 *
62 * LDB (input) INTEGER
63 * The leading dimension of B in the calling subprogram.
64 * LDB must be at least max(1,MAX( M, N ) ).
65 *
66 * BX (output) COMPLEX*16 array, dimension ( LDBX, NRHS )
67 * On exit, the result of applying the left or right singular
68 * vector matrix to B.
69 *
70 * LDBX (input) INTEGER
71 * The leading dimension of BX.
72 *
73 * U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
74 * On entry, U contains the left singular vector matrices of all
75 * subproblems at the bottom level.
76 *
77 * LDU (input) INTEGER, LDU = > N.
78 * The leading dimension of arrays U, VT, DIFL, DIFR,
79 * POLES, GIVNUM, and Z.
80 *
81 * VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
82 * On entry, VT**H contains the right singular vector matrices of
83 * all subproblems at the bottom level.
84 *
85 * K (input) INTEGER array, dimension ( N ).
86 *
87 * DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
88 * where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
89 *
90 * DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
91 * On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
92 * distances between singular values on the I-th level and
93 * singular values on the (I -1)-th level, and DIFR(*, 2 * I)
94 * record the normalizing factors of the right singular vectors
95 * matrices of subproblems on I-th level.
96 *
97 * Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
98 * On entry, Z(1, I) contains the components of the deflation-
99 * adjusted updating row vector for subproblems on the I-th
100 * level.
101 *
102 * POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
103 * On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
104 * singular values involved in the secular equations on the I-th
105 * level.
106 *
107 * GIVPTR (input) INTEGER array, dimension ( N ).
108 * On entry, GIVPTR( I ) records the number of Givens
109 * rotations performed on the I-th problem on the computation
110 * tree.
111 *
112 * GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
113 * On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
114 * locations of Givens rotations performed on the I-th level on
115 * the computation tree.
116 *
117 * LDGCOL (input) INTEGER, LDGCOL = > N.
118 * The leading dimension of arrays GIVCOL and PERM.
119 *
120 * PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ).
121 * On entry, PERM(*, I) records permutations done on the I-th
122 * level of the computation tree.
123 *
124 * GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
125 * On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
126 * values of Givens rotations performed on the I-th level on the
127 * computation tree.
128 *
129 * C (input) DOUBLE PRECISION array, dimension ( N ).
130 * On entry, if the I-th subproblem is not square,
131 * C( I ) contains the C-value of a Givens rotation related to
132 * the right null space of the I-th subproblem.
133 *
134 * S (input) DOUBLE PRECISION array, dimension ( N ).
135 * On entry, if the I-th subproblem is not square,
136 * S( I ) contains the S-value of a Givens rotation related to
137 * the right null space of the I-th subproblem.
138 *
139 * RWORK (workspace) DOUBLE PRECISION array, dimension at least
140 * MAX( (SMLSZ+1)*NRHS*3, N*(1+NRHS) + 2*NRHS ).
141 *
142 * IWORK (workspace) INTEGER array.
143 * The dimension must be at least 3 * N
144 *
145 * INFO (output) INTEGER
146 * = 0: successful exit.
147 * < 0: if INFO = -i, the i-th argument had an illegal value.
148 *
149 * Further Details
150 * ===============
151 *
152 * Based on contributions by
153 * Ming Gu and Ren-Cang Li, Computer Science Division, University of
154 * California at Berkeley, USA
155 * Osni Marques, LBNL/NERSC, USA
156 *
157 * =====================================================================
158 *
159 * .. Parameters ..
160 DOUBLE PRECISION ZERO, ONE
161 PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
162 * ..
163 * .. Local Scalars ..
164 INTEGER I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL,
165 $ JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML,
166 $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE
167 * ..
168 * .. External Subroutines ..
169 EXTERNAL DGEMM, DLASDT, XERBLA, ZCOPY, ZLALS0
170 * ..
171 * .. Intrinsic Functions ..
172 INTRINSIC DBLE, DCMPLX, DIMAG
173 * ..
174 * .. Executable Statements ..
175 *
176 * Test the input parameters.
177 *
178 INFO = 0
179 *
180 IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
181 INFO = -1
182 ELSE IF( SMLSIZ.LT.3 ) THEN
183 INFO = -2
184 ELSE IF( N.LT.SMLSIZ ) THEN
185 INFO = -3
186 ELSE IF( NRHS.LT.1 ) THEN
187 INFO = -4
188 ELSE IF( LDB.LT.N ) THEN
189 INFO = -6
190 ELSE IF( LDBX.LT.N ) THEN
191 INFO = -8
192 ELSE IF( LDU.LT.N ) THEN
193 INFO = -10
194 ELSE IF( LDGCOL.LT.N ) THEN
195 INFO = -19
196 END IF
197 IF( INFO.NE.0 ) THEN
198 CALL XERBLA( 'ZLALSA', -INFO )
199 RETURN
200 END IF
201 *
202 * Book-keeping and setting up the computation tree.
203 *
204 INODE = 1
205 NDIML = INODE + N
206 NDIMR = NDIML + N
207 *
208 CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
209 $ IWORK( NDIMR ), SMLSIZ )
210 *
211 * The following code applies back the left singular vector factors.
212 * For applying back the right singular vector factors, go to 170.
213 *
214 IF( ICOMPQ.EQ.1 ) THEN
215 GO TO 170
216 END IF
217 *
218 * The nodes on the bottom level of the tree were solved
219 * by DLASDQ. The corresponding left and right singular vector
220 * matrices are in explicit form. First apply back the left
221 * singular vector matrices.
222 *
223 NDB1 = ( ND+1 ) / 2
224 DO 130 I = NDB1, ND
225 *
226 * IC : center row of each node
227 * NL : number of rows of left subproblem
228 * NR : number of rows of right subproblem
229 * NLF: starting row of the left subproblem
230 * NRF: starting row of the right subproblem
231 *
232 I1 = I - 1
233 IC = IWORK( INODE+I1 )
234 NL = IWORK( NDIML+I1 )
235 NR = IWORK( NDIMR+I1 )
236 NLF = IC - NL
237 NRF = IC + 1
238 *
239 * Since B and BX are complex, the following call to DGEMM
240 * is performed in two steps (real and imaginary parts).
241 *
242 * CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
243 * $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
244 *
245 J = NL*NRHS*2
246 DO 20 JCOL = 1, NRHS
247 DO 10 JROW = NLF, NLF + NL - 1
248 J = J + 1
249 RWORK( J ) = DBLE( B( JROW, JCOL ) )
250 10 CONTINUE
251 20 CONTINUE
252 CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
253 $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL )
254 J = NL*NRHS*2
255 DO 40 JCOL = 1, NRHS
256 DO 30 JROW = NLF, NLF + NL - 1
257 J = J + 1
258 RWORK( J ) = DIMAG( B( JROW, JCOL ) )
259 30 CONTINUE
260 40 CONTINUE
261 CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
262 $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ),
263 $ NL )
264 JREAL = 0
265 JIMAG = NL*NRHS
266 DO 60 JCOL = 1, NRHS
267 DO 50 JROW = NLF, NLF + NL - 1
268 JREAL = JREAL + 1
269 JIMAG = JIMAG + 1
270 BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
271 $ RWORK( JIMAG ) )
272 50 CONTINUE
273 60 CONTINUE
274 *
275 * Since B and BX are complex, the following call to DGEMM
276 * is performed in two steps (real and imaginary parts).
277 *
278 * CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
279 * $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
280 *
281 J = NR*NRHS*2
282 DO 80 JCOL = 1, NRHS
283 DO 70 JROW = NRF, NRF + NR - 1
284 J = J + 1
285 RWORK( J ) = DBLE( B( JROW, JCOL ) )
286 70 CONTINUE
287 80 CONTINUE
288 CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
289 $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR )
290 J = NR*NRHS*2
291 DO 100 JCOL = 1, NRHS
292 DO 90 JROW = NRF, NRF + NR - 1
293 J = J + 1
294 RWORK( J ) = DIMAG( B( JROW, JCOL ) )
295 90 CONTINUE
296 100 CONTINUE
297 CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
298 $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ),
299 $ NR )
300 JREAL = 0
301 JIMAG = NR*NRHS
302 DO 120 JCOL = 1, NRHS
303 DO 110 JROW = NRF, NRF + NR - 1
304 JREAL = JREAL + 1
305 JIMAG = JIMAG + 1
306 BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
307 $ RWORK( JIMAG ) )
308 110 CONTINUE
309 120 CONTINUE
310 *
311 130 CONTINUE
312 *
313 * Next copy the rows of B that correspond to unchanged rows
314 * in the bidiagonal matrix to BX.
315 *
316 DO 140 I = 1, ND
317 IC = IWORK( INODE+I-1 )
318 CALL ZCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
319 140 CONTINUE
320 *
321 * Finally go through the left singular vector matrices of all
322 * the other subproblems bottom-up on the tree.
323 *
324 J = 2**NLVL
325 SQRE = 0
326 *
327 DO 160 LVL = NLVL, 1, -1
328 LVL2 = 2*LVL - 1
329 *
330 * find the first node LF and last node LL on
331 * the current level LVL
332 *
333 IF( LVL.EQ.1 ) THEN
334 LF = 1
335 LL = 1
336 ELSE
337 LF = 2**( LVL-1 )
338 LL = 2*LF - 1
339 END IF
340 DO 150 I = LF, LL
341 IM1 = I - 1
342 IC = IWORK( INODE+IM1 )
343 NL = IWORK( NDIML+IM1 )
344 NR = IWORK( NDIMR+IM1 )
345 NLF = IC - NL
346 NRF = IC + 1
347 J = J - 1
348 CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
349 $ B( NLF, 1 ), LDB, PERM( NLF, LVL ),
350 $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
351 $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
352 $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
353 $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
354 $ INFO )
355 150 CONTINUE
356 160 CONTINUE
357 GO TO 330
358 *
359 * ICOMPQ = 1: applying back the right singular vector factors.
360 *
361 170 CONTINUE
362 *
363 * First now go through the right singular vector matrices of all
364 * the tree nodes top-down.
365 *
366 J = 0
367 DO 190 LVL = 1, NLVL
368 LVL2 = 2*LVL - 1
369 *
370 * Find the first node LF and last node LL on
371 * the current level LVL.
372 *
373 IF( LVL.EQ.1 ) THEN
374 LF = 1
375 LL = 1
376 ELSE
377 LF = 2**( LVL-1 )
378 LL = 2*LF - 1
379 END IF
380 DO 180 I = LL, LF, -1
381 IM1 = I - 1
382 IC = IWORK( INODE+IM1 )
383 NL = IWORK( NDIML+IM1 )
384 NR = IWORK( NDIMR+IM1 )
385 NLF = IC - NL
386 NRF = IC + 1
387 IF( I.EQ.LL ) THEN
388 SQRE = 0
389 ELSE
390 SQRE = 1
391 END IF
392 J = J + 1
393 CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
394 $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
395 $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
396 $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
397 $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
398 $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
399 $ INFO )
400 180 CONTINUE
401 190 CONTINUE
402 *
403 * The nodes on the bottom level of the tree were solved
404 * by DLASDQ. The corresponding right singular vector
405 * matrices are in explicit form. Apply them back.
406 *
407 NDB1 = ( ND+1 ) / 2
408 DO 320 I = NDB1, ND
409 I1 = I - 1
410 IC = IWORK( INODE+I1 )
411 NL = IWORK( NDIML+I1 )
412 NR = IWORK( NDIMR+I1 )
413 NLP1 = NL + 1
414 IF( I.EQ.ND ) THEN
415 NRP1 = NR
416 ELSE
417 NRP1 = NR + 1
418 END IF
419 NLF = IC - NL
420 NRF = IC + 1
421 *
422 * Since B and BX are complex, the following call to DGEMM is
423 * performed in two steps (real and imaginary parts).
424 *
425 * CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
426 * $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
427 *
428 J = NLP1*NRHS*2
429 DO 210 JCOL = 1, NRHS
430 DO 200 JROW = NLF, NLF + NLP1 - 1
431 J = J + 1
432 RWORK( J ) = DBLE( B( JROW, JCOL ) )
433 200 CONTINUE
434 210 CONTINUE
435 CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
436 $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ),
437 $ NLP1 )
438 J = NLP1*NRHS*2
439 DO 230 JCOL = 1, NRHS
440 DO 220 JROW = NLF, NLF + NLP1 - 1
441 J = J + 1
442 RWORK( J ) = DIMAG( B( JROW, JCOL ) )
443 220 CONTINUE
444 230 CONTINUE
445 CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
446 $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO,
447 $ RWORK( 1+NLP1*NRHS ), NLP1 )
448 JREAL = 0
449 JIMAG = NLP1*NRHS
450 DO 250 JCOL = 1, NRHS
451 DO 240 JROW = NLF, NLF + NLP1 - 1
452 JREAL = JREAL + 1
453 JIMAG = JIMAG + 1
454 BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
455 $ RWORK( JIMAG ) )
456 240 CONTINUE
457 250 CONTINUE
458 *
459 * Since B and BX are complex, the following call to DGEMM is
460 * performed in two steps (real and imaginary parts).
461 *
462 * CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
463 * $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
464 *
465 J = NRP1*NRHS*2
466 DO 270 JCOL = 1, NRHS
467 DO 260 JROW = NRF, NRF + NRP1 - 1
468 J = J + 1
469 RWORK( J ) = DBLE( B( JROW, JCOL ) )
470 260 CONTINUE
471 270 CONTINUE
472 CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
473 $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ),
474 $ NRP1 )
475 J = NRP1*NRHS*2
476 DO 290 JCOL = 1, NRHS
477 DO 280 JROW = NRF, NRF + NRP1 - 1
478 J = J + 1
479 RWORK( J ) = DIMAG( B( JROW, JCOL ) )
480 280 CONTINUE
481 290 CONTINUE
482 CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
483 $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO,
484 $ RWORK( 1+NRP1*NRHS ), NRP1 )
485 JREAL = 0
486 JIMAG = NRP1*NRHS
487 DO 310 JCOL = 1, NRHS
488 DO 300 JROW = NRF, NRF + NRP1 - 1
489 JREAL = JREAL + 1
490 JIMAG = JIMAG + 1
491 BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
492 $ RWORK( JIMAG ) )
493 300 CONTINUE
494 310 CONTINUE
495 *
496 320 CONTINUE
497 *
498 330 CONTINUE
499 *
500 RETURN
501 *
502 * End of ZLALSA
503 *
504 END
2 $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
3 $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK,
4 $ IWORK, INFO )
5 *
6 * -- LAPACK routine (version 3.2) --
7 * -- LAPACK is a software package provided by Univ. of Tennessee, --
8 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
9 * November 2006
10 *
11 * .. Scalar Arguments ..
12 INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
13 $ SMLSIZ
14 * ..
15 * .. Array Arguments ..
16 INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
17 $ K( * ), PERM( LDGCOL, * )
18 DOUBLE PRECISION C( * ), DIFL( LDU, * ), DIFR( LDU, * ),
19 $ GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ),
20 $ S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * )
21 COMPLEX*16 B( LDB, * ), BX( LDBX, * )
22 * ..
23 *
24 * Purpose
25 * =======
26 *
27 * ZLALSA is an itermediate step in solving the least squares problem
28 * by computing the SVD of the coefficient matrix in compact form (The
29 * singular vectors are computed as products of simple orthorgonal
30 * matrices.).
31 *
32 * If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector
33 * matrix of an upper bidiagonal matrix to the right hand side; and if
34 * ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the
35 * right hand side. The singular vector matrices were generated in
36 * compact form by ZLALSA.
37 *
38 * Arguments
39 * =========
40 *
41 * ICOMPQ (input) INTEGER
42 * Specifies whether the left or the right singular vector
43 * matrix is involved.
44 * = 0: Left singular vector matrix
45 * = 1: Right singular vector matrix
46 *
47 * SMLSIZ (input) INTEGER
48 * The maximum size of the subproblems at the bottom of the
49 * computation tree.
50 *
51 * N (input) INTEGER
52 * The row and column dimensions of the upper bidiagonal matrix.
53 *
54 * NRHS (input) INTEGER
55 * The number of columns of B and BX. NRHS must be at least 1.
56 *
57 * B (input/output) COMPLEX*16 array, dimension ( LDB, NRHS )
58 * On input, B contains the right hand sides of the least
59 * squares problem in rows 1 through M.
60 * On output, B contains the solution X in rows 1 through N.
61 *
62 * LDB (input) INTEGER
63 * The leading dimension of B in the calling subprogram.
64 * LDB must be at least max(1,MAX( M, N ) ).
65 *
66 * BX (output) COMPLEX*16 array, dimension ( LDBX, NRHS )
67 * On exit, the result of applying the left or right singular
68 * vector matrix to B.
69 *
70 * LDBX (input) INTEGER
71 * The leading dimension of BX.
72 *
73 * U (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
74 * On entry, U contains the left singular vector matrices of all
75 * subproblems at the bottom level.
76 *
77 * LDU (input) INTEGER, LDU = > N.
78 * The leading dimension of arrays U, VT, DIFL, DIFR,
79 * POLES, GIVNUM, and Z.
80 *
81 * VT (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
82 * On entry, VT**H contains the right singular vector matrices of
83 * all subproblems at the bottom level.
84 *
85 * K (input) INTEGER array, dimension ( N ).
86 *
87 * DIFL (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
88 * where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
89 *
90 * DIFR (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
91 * On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
92 * distances between singular values on the I-th level and
93 * singular values on the (I -1)-th level, and DIFR(*, 2 * I)
94 * record the normalizing factors of the right singular vectors
95 * matrices of subproblems on I-th level.
96 *
97 * Z (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
98 * On entry, Z(1, I) contains the components of the deflation-
99 * adjusted updating row vector for subproblems on the I-th
100 * level.
101 *
102 * POLES (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
103 * On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
104 * singular values involved in the secular equations on the I-th
105 * level.
106 *
107 * GIVPTR (input) INTEGER array, dimension ( N ).
108 * On entry, GIVPTR( I ) records the number of Givens
109 * rotations performed on the I-th problem on the computation
110 * tree.
111 *
112 * GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
113 * On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
114 * locations of Givens rotations performed on the I-th level on
115 * the computation tree.
116 *
117 * LDGCOL (input) INTEGER, LDGCOL = > N.
118 * The leading dimension of arrays GIVCOL and PERM.
119 *
120 * PERM (input) INTEGER array, dimension ( LDGCOL, NLVL ).
121 * On entry, PERM(*, I) records permutations done on the I-th
122 * level of the computation tree.
123 *
124 * GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
125 * On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
126 * values of Givens rotations performed on the I-th level on the
127 * computation tree.
128 *
129 * C (input) DOUBLE PRECISION array, dimension ( N ).
130 * On entry, if the I-th subproblem is not square,
131 * C( I ) contains the C-value of a Givens rotation related to
132 * the right null space of the I-th subproblem.
133 *
134 * S (input) DOUBLE PRECISION array, dimension ( N ).
135 * On entry, if the I-th subproblem is not square,
136 * S( I ) contains the S-value of a Givens rotation related to
137 * the right null space of the I-th subproblem.
138 *
139 * RWORK (workspace) DOUBLE PRECISION array, dimension at least
140 * MAX( (SMLSZ+1)*NRHS*3, N*(1+NRHS) + 2*NRHS ).
141 *
142 * IWORK (workspace) INTEGER array.
143 * The dimension must be at least 3 * N
144 *
145 * INFO (output) INTEGER
146 * = 0: successful exit.
147 * < 0: if INFO = -i, the i-th argument had an illegal value.
148 *
149 * Further Details
150 * ===============
151 *
152 * Based on contributions by
153 * Ming Gu and Ren-Cang Li, Computer Science Division, University of
154 * California at Berkeley, USA
155 * Osni Marques, LBNL/NERSC, USA
156 *
157 * =====================================================================
158 *
159 * .. Parameters ..
160 DOUBLE PRECISION ZERO, ONE
161 PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
162 * ..
163 * .. Local Scalars ..
164 INTEGER I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL,
165 $ JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML,
166 $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE
167 * ..
168 * .. External Subroutines ..
169 EXTERNAL DGEMM, DLASDT, XERBLA, ZCOPY, ZLALS0
170 * ..
171 * .. Intrinsic Functions ..
172 INTRINSIC DBLE, DCMPLX, DIMAG
173 * ..
174 * .. Executable Statements ..
175 *
176 * Test the input parameters.
177 *
178 INFO = 0
179 *
180 IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
181 INFO = -1
182 ELSE IF( SMLSIZ.LT.3 ) THEN
183 INFO = -2
184 ELSE IF( N.LT.SMLSIZ ) THEN
185 INFO = -3
186 ELSE IF( NRHS.LT.1 ) THEN
187 INFO = -4
188 ELSE IF( LDB.LT.N ) THEN
189 INFO = -6
190 ELSE IF( LDBX.LT.N ) THEN
191 INFO = -8
192 ELSE IF( LDU.LT.N ) THEN
193 INFO = -10
194 ELSE IF( LDGCOL.LT.N ) THEN
195 INFO = -19
196 END IF
197 IF( INFO.NE.0 ) THEN
198 CALL XERBLA( 'ZLALSA', -INFO )
199 RETURN
200 END IF
201 *
202 * Book-keeping and setting up the computation tree.
203 *
204 INODE = 1
205 NDIML = INODE + N
206 NDIMR = NDIML + N
207 *
208 CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
209 $ IWORK( NDIMR ), SMLSIZ )
210 *
211 * The following code applies back the left singular vector factors.
212 * For applying back the right singular vector factors, go to 170.
213 *
214 IF( ICOMPQ.EQ.1 ) THEN
215 GO TO 170
216 END IF
217 *
218 * The nodes on the bottom level of the tree were solved
219 * by DLASDQ. The corresponding left and right singular vector
220 * matrices are in explicit form. First apply back the left
221 * singular vector matrices.
222 *
223 NDB1 = ( ND+1 ) / 2
224 DO 130 I = NDB1, ND
225 *
226 * IC : center row of each node
227 * NL : number of rows of left subproblem
228 * NR : number of rows of right subproblem
229 * NLF: starting row of the left subproblem
230 * NRF: starting row of the right subproblem
231 *
232 I1 = I - 1
233 IC = IWORK( INODE+I1 )
234 NL = IWORK( NDIML+I1 )
235 NR = IWORK( NDIMR+I1 )
236 NLF = IC - NL
237 NRF = IC + 1
238 *
239 * Since B and BX are complex, the following call to DGEMM
240 * is performed in two steps (real and imaginary parts).
241 *
242 * CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
243 * $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
244 *
245 J = NL*NRHS*2
246 DO 20 JCOL = 1, NRHS
247 DO 10 JROW = NLF, NLF + NL - 1
248 J = J + 1
249 RWORK( J ) = DBLE( B( JROW, JCOL ) )
250 10 CONTINUE
251 20 CONTINUE
252 CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
253 $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL )
254 J = NL*NRHS*2
255 DO 40 JCOL = 1, NRHS
256 DO 30 JROW = NLF, NLF + NL - 1
257 J = J + 1
258 RWORK( J ) = DIMAG( B( JROW, JCOL ) )
259 30 CONTINUE
260 40 CONTINUE
261 CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
262 $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ),
263 $ NL )
264 JREAL = 0
265 JIMAG = NL*NRHS
266 DO 60 JCOL = 1, NRHS
267 DO 50 JROW = NLF, NLF + NL - 1
268 JREAL = JREAL + 1
269 JIMAG = JIMAG + 1
270 BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
271 $ RWORK( JIMAG ) )
272 50 CONTINUE
273 60 CONTINUE
274 *
275 * Since B and BX are complex, the following call to DGEMM
276 * is performed in two steps (real and imaginary parts).
277 *
278 * CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
279 * $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
280 *
281 J = NR*NRHS*2
282 DO 80 JCOL = 1, NRHS
283 DO 70 JROW = NRF, NRF + NR - 1
284 J = J + 1
285 RWORK( J ) = DBLE( B( JROW, JCOL ) )
286 70 CONTINUE
287 80 CONTINUE
288 CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
289 $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR )
290 J = NR*NRHS*2
291 DO 100 JCOL = 1, NRHS
292 DO 90 JROW = NRF, NRF + NR - 1
293 J = J + 1
294 RWORK( J ) = DIMAG( B( JROW, JCOL ) )
295 90 CONTINUE
296 100 CONTINUE
297 CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
298 $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ),
299 $ NR )
300 JREAL = 0
301 JIMAG = NR*NRHS
302 DO 120 JCOL = 1, NRHS
303 DO 110 JROW = NRF, NRF + NR - 1
304 JREAL = JREAL + 1
305 JIMAG = JIMAG + 1
306 BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
307 $ RWORK( JIMAG ) )
308 110 CONTINUE
309 120 CONTINUE
310 *
311 130 CONTINUE
312 *
313 * Next copy the rows of B that correspond to unchanged rows
314 * in the bidiagonal matrix to BX.
315 *
316 DO 140 I = 1, ND
317 IC = IWORK( INODE+I-1 )
318 CALL ZCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
319 140 CONTINUE
320 *
321 * Finally go through the left singular vector matrices of all
322 * the other subproblems bottom-up on the tree.
323 *
324 J = 2**NLVL
325 SQRE = 0
326 *
327 DO 160 LVL = NLVL, 1, -1
328 LVL2 = 2*LVL - 1
329 *
330 * find the first node LF and last node LL on
331 * the current level LVL
332 *
333 IF( LVL.EQ.1 ) THEN
334 LF = 1
335 LL = 1
336 ELSE
337 LF = 2**( LVL-1 )
338 LL = 2*LF - 1
339 END IF
340 DO 150 I = LF, LL
341 IM1 = I - 1
342 IC = IWORK( INODE+IM1 )
343 NL = IWORK( NDIML+IM1 )
344 NR = IWORK( NDIMR+IM1 )
345 NLF = IC - NL
346 NRF = IC + 1
347 J = J - 1
348 CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
349 $ B( NLF, 1 ), LDB, PERM( NLF, LVL ),
350 $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
351 $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
352 $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
353 $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
354 $ INFO )
355 150 CONTINUE
356 160 CONTINUE
357 GO TO 330
358 *
359 * ICOMPQ = 1: applying back the right singular vector factors.
360 *
361 170 CONTINUE
362 *
363 * First now go through the right singular vector matrices of all
364 * the tree nodes top-down.
365 *
366 J = 0
367 DO 190 LVL = 1, NLVL
368 LVL2 = 2*LVL - 1
369 *
370 * Find the first node LF and last node LL on
371 * the current level LVL.
372 *
373 IF( LVL.EQ.1 ) THEN
374 LF = 1
375 LL = 1
376 ELSE
377 LF = 2**( LVL-1 )
378 LL = 2*LF - 1
379 END IF
380 DO 180 I = LL, LF, -1
381 IM1 = I - 1
382 IC = IWORK( INODE+IM1 )
383 NL = IWORK( NDIML+IM1 )
384 NR = IWORK( NDIMR+IM1 )
385 NLF = IC - NL
386 NRF = IC + 1
387 IF( I.EQ.LL ) THEN
388 SQRE = 0
389 ELSE
390 SQRE = 1
391 END IF
392 J = J + 1
393 CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
394 $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
395 $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
396 $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
397 $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
398 $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
399 $ INFO )
400 180 CONTINUE
401 190 CONTINUE
402 *
403 * The nodes on the bottom level of the tree were solved
404 * by DLASDQ. The corresponding right singular vector
405 * matrices are in explicit form. Apply them back.
406 *
407 NDB1 = ( ND+1 ) / 2
408 DO 320 I = NDB1, ND
409 I1 = I - 1
410 IC = IWORK( INODE+I1 )
411 NL = IWORK( NDIML+I1 )
412 NR = IWORK( NDIMR+I1 )
413 NLP1 = NL + 1
414 IF( I.EQ.ND ) THEN
415 NRP1 = NR
416 ELSE
417 NRP1 = NR + 1
418 END IF
419 NLF = IC - NL
420 NRF = IC + 1
421 *
422 * Since B and BX are complex, the following call to DGEMM is
423 * performed in two steps (real and imaginary parts).
424 *
425 * CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
426 * $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
427 *
428 J = NLP1*NRHS*2
429 DO 210 JCOL = 1, NRHS
430 DO 200 JROW = NLF, NLF + NLP1 - 1
431 J = J + 1
432 RWORK( J ) = DBLE( B( JROW, JCOL ) )
433 200 CONTINUE
434 210 CONTINUE
435 CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
436 $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ),
437 $ NLP1 )
438 J = NLP1*NRHS*2
439 DO 230 JCOL = 1, NRHS
440 DO 220 JROW = NLF, NLF + NLP1 - 1
441 J = J + 1
442 RWORK( J ) = DIMAG( B( JROW, JCOL ) )
443 220 CONTINUE
444 230 CONTINUE
445 CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
446 $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO,
447 $ RWORK( 1+NLP1*NRHS ), NLP1 )
448 JREAL = 0
449 JIMAG = NLP1*NRHS
450 DO 250 JCOL = 1, NRHS
451 DO 240 JROW = NLF, NLF + NLP1 - 1
452 JREAL = JREAL + 1
453 JIMAG = JIMAG + 1
454 BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
455 $ RWORK( JIMAG ) )
456 240 CONTINUE
457 250 CONTINUE
458 *
459 * Since B and BX are complex, the following call to DGEMM is
460 * performed in two steps (real and imaginary parts).
461 *
462 * CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
463 * $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
464 *
465 J = NRP1*NRHS*2
466 DO 270 JCOL = 1, NRHS
467 DO 260 JROW = NRF, NRF + NRP1 - 1
468 J = J + 1
469 RWORK( J ) = DBLE( B( JROW, JCOL ) )
470 260 CONTINUE
471 270 CONTINUE
472 CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
473 $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ),
474 $ NRP1 )
475 J = NRP1*NRHS*2
476 DO 290 JCOL = 1, NRHS
477 DO 280 JROW = NRF, NRF + NRP1 - 1
478 J = J + 1
479 RWORK( J ) = DIMAG( B( JROW, JCOL ) )
480 280 CONTINUE
481 290 CONTINUE
482 CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
483 $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO,
484 $ RWORK( 1+NRP1*NRHS ), NRP1 )
485 JREAL = 0
486 JIMAG = NRP1*NRHS
487 DO 310 JCOL = 1, NRHS
488 DO 300 JROW = NRF, NRF + NRP1 - 1
489 JREAL = JREAL + 1
490 JIMAG = JIMAG + 1
491 BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
492 $ RWORK( JIMAG ) )
493 300 CONTINUE
494 310 CONTINUE
495 *
496 320 CONTINUE
497 *
498 330 CONTINUE
499 *
500 RETURN
501 *
502 * End of ZLALSA
503 *
504 END