1       SUBROUTINE ZLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
  2      $                   LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
  3      $                   GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK,
  4      $                   IWORK, INFO )
  5 *
  6 *  -- LAPACK routine (version 3.2) --
  7 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  8 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  9 *     November 2006
 10 *
 11 *     .. Scalar Arguments ..
 12       INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
 13      $                   SMLSIZ
 14 *     ..
 15 *     .. Array Arguments ..
 16       INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
 17      $                   K( * ), PERM( LDGCOL, * )
 18       DOUBLE PRECISION   C( * ), DIFL( LDU, * ), DIFR( LDU, * ),
 19      $                   GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ),
 20      $                   S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * )
 21       COMPLEX*16         B( LDB, * ), BX( LDBX, * )
 22 *     ..
 23 *
 24 *  Purpose
 25 *  =======
 26 *
 27 *  ZLALSA is an itermediate step in solving the least squares problem
 28 *  by computing the SVD of the coefficient matrix in compact form (The
 29 *  singular vectors are computed as products of simple orthorgonal
 30 *  matrices.).
 31 *
 32 *  If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector
 33 *  matrix of an upper bidiagonal matrix to the right hand side; and if
 34 *  ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the
 35 *  right hand side. The singular vector matrices were generated in
 36 *  compact form by ZLALSA.
 37 *
 38 *  Arguments
 39 *  =========
 40 *
 41 *  ICOMPQ (input) INTEGER
 42 *         Specifies whether the left or the right singular vector
 43 *         matrix is involved.
 44 *         = 0: Left singular vector matrix
 45 *         = 1: Right singular vector matrix
 46 *
 47 *  SMLSIZ (input) INTEGER
 48 *         The maximum size of the subproblems at the bottom of the
 49 *         computation tree.
 50 *
 51 *  N      (input) INTEGER
 52 *         The row and column dimensions of the upper bidiagonal matrix.
 53 *
 54 *  NRHS   (input) INTEGER
 55 *         The number of columns of B and BX. NRHS must be at least 1.
 56 *
 57 *  B      (input/output) COMPLEX*16 array, dimension ( LDB, NRHS )
 58 *         On input, B contains the right hand sides of the least
 59 *         squares problem in rows 1 through M.
 60 *         On output, B contains the solution X in rows 1 through N.
 61 *
 62 *  LDB    (input) INTEGER
 63 *         The leading dimension of B in the calling subprogram.
 64 *         LDB must be at least max(1,MAX( M, N ) ).
 65 *
 66 *  BX     (output) COMPLEX*16 array, dimension ( LDBX, NRHS )
 67 *         On exit, the result of applying the left or right singular
 68 *         vector matrix to B.
 69 *
 70 *  LDBX   (input) INTEGER
 71 *         The leading dimension of BX.
 72 *
 73 *  U      (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
 74 *         On entry, U contains the left singular vector matrices of all
 75 *         subproblems at the bottom level.
 76 *
 77 *  LDU    (input) INTEGER, LDU = > N.
 78 *         The leading dimension of arrays U, VT, DIFL, DIFR,
 79 *         POLES, GIVNUM, and Z.
 80 *
 81 *  VT     (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
 82 *         On entry, VT**H contains the right singular vector matrices of
 83 *         all subproblems at the bottom level.
 84 *
 85 *  K      (input) INTEGER array, dimension ( N ).
 86 *
 87 *  DIFL   (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
 88 *         where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
 89 *
 90 *  DIFR   (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
 91 *         On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
 92 *         distances between singular values on the I-th level and
 93 *         singular values on the (I -1)-th level, and DIFR(*, 2 * I)
 94 *         record the normalizing factors of the right singular vectors
 95 *         matrices of subproblems on I-th level.
 96 *
 97 *  Z      (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
 98 *         On entry, Z(1, I) contains the components of the deflation-
 99 *         adjusted updating row vector for subproblems on the I-th
100 *         level.
101 *
102 *  POLES  (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
103 *         On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
104 *         singular values involved in the secular equations on the I-th
105 *         level.
106 *
107 *  GIVPTR (input) INTEGER array, dimension ( N ).
108 *         On entry, GIVPTR( I ) records the number of Givens
109 *         rotations performed on the I-th problem on the computation
110 *         tree.
111 *
112 *  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
113 *         On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
114 *         locations of Givens rotations performed on the I-th level on
115 *         the computation tree.
116 *
117 *  LDGCOL (input) INTEGER, LDGCOL = > N.
118 *         The leading dimension of arrays GIVCOL and PERM.
119 *
120 *  PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
121 *         On entry, PERM(*, I) records permutations done on the I-th
122 *         level of the computation tree.
123 *
124 *  GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
125 *         On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
126 *         values of Givens rotations performed on the I-th level on the
127 *         computation tree.
128 *
129 *  C      (input) DOUBLE PRECISION array, dimension ( N ).
130 *         On entry, if the I-th subproblem is not square,
131 *         C( I ) contains the C-value of a Givens rotation related to
132 *         the right null space of the I-th subproblem.
133 *
134 *  S      (input) DOUBLE PRECISION array, dimension ( N ).
135 *         On entry, if the I-th subproblem is not square,
136 *         S( I ) contains the S-value of a Givens rotation related to
137 *         the right null space of the I-th subproblem.
138 *
139 *  RWORK  (workspace) DOUBLE PRECISION array, dimension at least
140 *         MAX( (SMLSZ+1)*NRHS*3, N*(1+NRHS) + 2*NRHS ).
141 *
142 *  IWORK  (workspace) INTEGER array.
143 *         The dimension must be at least 3 * N
144 *
145 *  INFO   (output) INTEGER
146 *          = 0:  successful exit.
147 *          < 0:  if INFO = -i, the i-th argument had an illegal value.
148 *
149 *  Further Details
150 *  ===============
151 *
152 *  Based on contributions by
153 *     Ming Gu and Ren-Cang Li, Computer Science Division, University of
154 *       California at Berkeley, USA
155 *     Osni Marques, LBNL/NERSC, USA
156 *
157 *  =====================================================================
158 *
159 *     .. Parameters ..
160       DOUBLE PRECISION   ZERO, ONE
161       PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
162 *     ..
163 *     .. Local Scalars ..
164       INTEGER            I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL,
165      $                   JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML,
166      $                   NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE
167 *     ..
168 *     .. External Subroutines ..
169       EXTERNAL           DGEMM, DLASDT, XERBLA, ZCOPY, ZLALS0
170 *     ..
171 *     .. Intrinsic Functions ..
172       INTRINSIC          DBLEDCMPLXDIMAG
173 *     ..
174 *     .. Executable Statements ..
175 *
176 *     Test the input parameters.
177 *
178       INFO = 0
179 *
180       IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
181          INFO = -1
182       ELSE IF( SMLSIZ.LT.3 ) THEN
183          INFO = -2
184       ELSE IF( N.LT.SMLSIZ ) THEN
185          INFO = -3
186       ELSE IF( NRHS.LT.1 ) THEN
187          INFO = -4
188       ELSE IF( LDB.LT.N ) THEN
189          INFO = -6
190       ELSE IF( LDBX.LT.N ) THEN
191          INFO = -8
192       ELSE IF( LDU.LT.N ) THEN
193          INFO = -10
194       ELSE IF( LDGCOL.LT.N ) THEN
195          INFO = -19
196       END IF
197       IF( INFO.NE.0 ) THEN
198          CALL XERBLA( 'ZLALSA'-INFO )
199          RETURN
200       END IF
201 *
202 *     Book-keeping and  setting up the computation tree.
203 *
204       INODE = 1
205       NDIML = INODE + N
206       NDIMR = NDIML + N
207 *
208       CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
209      $             IWORK( NDIMR ), SMLSIZ )
210 *
211 *     The following code applies back the left singular vector factors.
212 *     For applying back the right singular vector factors, go to 170.
213 *
214       IF( ICOMPQ.EQ.1 ) THEN
215          GO TO 170
216       END IF
217 *
218 *     The nodes on the bottom level of the tree were solved
219 *     by DLASDQ. The corresponding left and right singular vector
220 *     matrices are in explicit form. First apply back the left
221 *     singular vector matrices.
222 *
223       NDB1 = ( ND+1 ) / 2
224       DO 130 I = NDB1, ND
225 *
226 *        IC : center row of each node
227 *        NL : number of rows of left  subproblem
228 *        NR : number of rows of right subproblem
229 *        NLF: starting row of the left   subproblem
230 *        NRF: starting row of the right  subproblem
231 *
232          I1 = I - 1
233          IC = IWORK( INODE+I1 )
234          NL = IWORK( NDIML+I1 )
235          NR = IWORK( NDIMR+I1 )
236          NLF = IC - NL
237          NRF = IC + 1
238 *
239 *        Since B and BX are complex, the following call to DGEMM
240 *        is performed in two steps (real and imaginary parts).
241 *
242 *        CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
243 *     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
244 *
245          J = NL*NRHS*2
246          DO 20 JCOL = 1, NRHS
247             DO 10 JROW = NLF, NLF + NL - 1
248                J = J + 1
249                RWORK( J ) = DBLE( B( JROW, JCOL ) )
250    10       CONTINUE
251    20    CONTINUE
252          CALL DGEMM( 'T''N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
253      $               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL )
254          J = NL*NRHS*2
255          DO 40 JCOL = 1, NRHS
256             DO 30 JROW = NLF, NLF + NL - 1
257                J = J + 1
258                RWORK( J ) = DIMAG( B( JROW, JCOL ) )
259    30       CONTINUE
260    40    CONTINUE
261          CALL DGEMM( 'T''N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
262      $               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ),
263      $               NL )
264          JREAL = 0
265          JIMAG = NL*NRHS
266          DO 60 JCOL = 1, NRHS
267             DO 50 JROW = NLF, NLF + NL - 1
268                JREAL = JREAL + 1
269                JIMAG = JIMAG + 1
270                BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
271      $                            RWORK( JIMAG ) )
272    50       CONTINUE
273    60    CONTINUE
274 *
275 *        Since B and BX are complex, the following call to DGEMM
276 *        is performed in two steps (real and imaginary parts).
277 *
278 *        CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
279 *    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
280 *
281          J = NR*NRHS*2
282          DO 80 JCOL = 1, NRHS
283             DO 70 JROW = NRF, NRF + NR - 1
284                J = J + 1
285                RWORK( J ) = DBLE( B( JROW, JCOL ) )
286    70       CONTINUE
287    80    CONTINUE
288          CALL DGEMM( 'T''N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
289      $               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR )
290          J = NR*NRHS*2
291          DO 100 JCOL = 1, NRHS
292             DO 90 JROW = NRF, NRF + NR - 1
293                J = J + 1
294                RWORK( J ) = DIMAG( B( JROW, JCOL ) )
295    90       CONTINUE
296   100    CONTINUE
297          CALL DGEMM( 'T''N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
298      $               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ),
299      $               NR )
300          JREAL = 0
301          JIMAG = NR*NRHS
302          DO 120 JCOL = 1, NRHS
303             DO 110 JROW = NRF, NRF + NR - 1
304                JREAL = JREAL + 1
305                JIMAG = JIMAG + 1
306                BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
307      $                            RWORK( JIMAG ) )
308   110       CONTINUE
309   120    CONTINUE
310 *
311   130 CONTINUE
312 *
313 *     Next copy the rows of B that correspond to unchanged rows
314 *     in the bidiagonal matrix to BX.
315 *
316       DO 140 I = 1, ND
317          IC = IWORK( INODE+I-1 )
318          CALL ZCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
319   140 CONTINUE
320 *
321 *     Finally go through the left singular vector matrices of all
322 *     the other subproblems bottom-up on the tree.
323 *
324       J = 2**NLVL
325       SQRE = 0
326 *
327       DO 160 LVL = NLVL, 1-1
328          LVL2 = 2*LVL - 1
329 *
330 *        find the first node LF and last node LL on
331 *        the current level LVL
332 *
333          IF( LVL.EQ.1 ) THEN
334             LF = 1
335             LL = 1
336          ELSE
337             LF = 2**( LVL-1 )
338             LL = 2*LF - 1
339          END IF
340          DO 150 I = LF, LL
341             IM1 = I - 1
342             IC = IWORK( INODE+IM1 )
343             NL = IWORK( NDIML+IM1 )
344             NR = IWORK( NDIMR+IM1 )
345             NLF = IC - NL
346             NRF = IC + 1
347             J = J - 1
348             CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
349      $                   B( NLF, 1 ), LDB, PERM( NLF, LVL ),
350      $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
351      $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
352      $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
353      $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
354      $                   INFO )
355   150    CONTINUE
356   160 CONTINUE
357       GO TO 330
358 *
359 *     ICOMPQ = 1: applying back the right singular vector factors.
360 *
361   170 CONTINUE
362 *
363 *     First now go through the right singular vector matrices of all
364 *     the tree nodes top-down.
365 *
366       J = 0
367       DO 190 LVL = 1, NLVL
368          LVL2 = 2*LVL - 1
369 *
370 *        Find the first node LF and last node LL on
371 *        the current level LVL.
372 *
373          IF( LVL.EQ.1 ) THEN
374             LF = 1
375             LL = 1
376          ELSE
377             LF = 2**( LVL-1 )
378             LL = 2*LF - 1
379          END IF
380          DO 180 I = LL, LF, -1
381             IM1 = I - 1
382             IC = IWORK( INODE+IM1 )
383             NL = IWORK( NDIML+IM1 )
384             NR = IWORK( NDIMR+IM1 )
385             NLF = IC - NL
386             NRF = IC + 1
387             IF( I.EQ.LL ) THEN
388                SQRE = 0
389             ELSE
390                SQRE = 1
391             END IF
392             J = J + 1
393             CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
394      $                   BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
395      $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
396      $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
397      $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
398      $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
399      $                   INFO )
400   180    CONTINUE
401   190 CONTINUE
402 *
403 *     The nodes on the bottom level of the tree were solved
404 *     by DLASDQ. The corresponding right singular vector
405 *     matrices are in explicit form. Apply them back.
406 *
407       NDB1 = ( ND+1 ) / 2
408       DO 320 I = NDB1, ND
409          I1 = I - 1
410          IC = IWORK( INODE+I1 )
411          NL = IWORK( NDIML+I1 )
412          NR = IWORK( NDIMR+I1 )
413          NLP1 = NL + 1
414          IF( I.EQ.ND ) THEN
415             NRP1 = NR
416          ELSE
417             NRP1 = NR + 1
418          END IF
419          NLF = IC - NL
420          NRF = IC + 1
421 *
422 *        Since B and BX are complex, the following call to DGEMM is
423 *        performed in two steps (real and imaginary parts).
424 *
425 *        CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
426 *    $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
427 *
428          J = NLP1*NRHS*2
429          DO 210 JCOL = 1, NRHS
430             DO 200 JROW = NLF, NLF + NLP1 - 1
431                J = J + 1
432                RWORK( J ) = DBLE( B( JROW, JCOL ) )
433   200       CONTINUE
434   210    CONTINUE
435          CALL DGEMM( 'T''N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
436      $               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ),
437      $               NLP1 )
438          J = NLP1*NRHS*2
439          DO 230 JCOL = 1, NRHS
440             DO 220 JROW = NLF, NLF + NLP1 - 1
441                J = J + 1
442                RWORK( J ) = DIMAG( B( JROW, JCOL ) )
443   220       CONTINUE
444   230    CONTINUE
445          CALL DGEMM( 'T''N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
446      $               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO,
447      $               RWORK( 1+NLP1*NRHS ), NLP1 )
448          JREAL = 0
449          JIMAG = NLP1*NRHS
450          DO 250 JCOL = 1, NRHS
451             DO 240 JROW = NLF, NLF + NLP1 - 1
452                JREAL = JREAL + 1
453                JIMAG = JIMAG + 1
454                BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
455      $                            RWORK( JIMAG ) )
456   240       CONTINUE
457   250    CONTINUE
458 *
459 *        Since B and BX are complex, the following call to DGEMM is
460 *        performed in two steps (real and imaginary parts).
461 *
462 *        CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
463 *    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
464 *
465          J = NRP1*NRHS*2
466          DO 270 JCOL = 1, NRHS
467             DO 260 JROW = NRF, NRF + NRP1 - 1
468                J = J + 1
469                RWORK( J ) = DBLE( B( JROW, JCOL ) )
470   260       CONTINUE
471   270    CONTINUE
472          CALL DGEMM( 'T''N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
473      $               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ),
474      $               NRP1 )
475          J = NRP1*NRHS*2
476          DO 290 JCOL = 1, NRHS
477             DO 280 JROW = NRF, NRF + NRP1 - 1
478                J = J + 1
479                RWORK( J ) = DIMAG( B( JROW, JCOL ) )
480   280       CONTINUE
481   290    CONTINUE
482          CALL DGEMM( 'T''N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
483      $               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO,
484      $               RWORK( 1+NRP1*NRHS ), NRP1 )
485          JREAL = 0
486          JIMAG = NRP1*NRHS
487          DO 310 JCOL = 1, NRHS
488             DO 300 JROW = NRF, NRF + NRP1 - 1
489                JREAL = JREAL + 1
490                JIMAG = JIMAG + 1
491                BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
492      $                            RWORK( JIMAG ) )
493   300       CONTINUE
494   310    CONTINUE
495 *
496   320 CONTINUE
497 *
498   330 CONTINUE
499 *
500       RETURN
501 *
502 *     End of ZLALSA
503 *
504       END