1       SUBROUTINE DLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
  2      $                   LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
  3      $                   GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK,
  4      $                   IWORK, INFO )
  5 *
  6 *  -- LAPACK routine (version 3.2) --
  7 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  8 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  9 *     November 2006
 10 *
 11 *     .. Scalar Arguments ..
 12       INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
 13      $                   SMLSIZ
 14 *     ..
 15 *     .. Array Arguments ..
 16       INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
 17      $                   K( * ), PERM( LDGCOL, * )
 18       DOUBLE PRECISION   B( LDB, * ), BX( LDBX, * ), C( * ),
 19      $                   DIFL( LDU, * ), DIFR( LDU, * ),
 20      $                   GIVNUM( LDU, * ), POLES( LDU, * ), S( * ),
 21      $                   U( LDU, * ), VT( LDU, * ), WORK( * ),
 22      $                   Z( LDU, * )
 23 *     ..
 24 *
 25 *  Purpose
 26 *  =======
 27 *
 28 *  DLALSA is an itermediate step in solving the least squares problem
 29 *  by computing the SVD of the coefficient matrix in compact form (The
 30 *  singular vectors are computed as products of simple orthorgonal
 31 *  matrices.).
 32 *
 33 *  If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector
 34 *  matrix of an upper bidiagonal matrix to the right hand side; and if
 35 *  ICOMPQ = 1, DLALSA applies the right singular vector matrix to the
 36 *  right hand side. The singular vector matrices were generated in
 37 *  compact form by DLALSA.
 38 *
 39 *  Arguments
 40 *  =========
 41 *
 42 *
 43 *  ICOMPQ (input) INTEGER
 44 *         Specifies whether the left or the right singular vector
 45 *         matrix is involved.
 46 *         = 0: Left singular vector matrix
 47 *         = 1: Right singular vector matrix
 48 *
 49 *  SMLSIZ (input) INTEGER
 50 *         The maximum size of the subproblems at the bottom of the
 51 *         computation tree.
 52 *
 53 *  N      (input) INTEGER
 54 *         The row and column dimensions of the upper bidiagonal matrix.
 55 *
 56 *  NRHS   (input) INTEGER
 57 *         The number of columns of B and BX. NRHS must be at least 1.
 58 *
 59 *  B      (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS )
 60 *         On input, B contains the right hand sides of the least
 61 *         squares problem in rows 1 through M.
 62 *         On output, B contains the solution X in rows 1 through N.
 63 *
 64 *  LDB    (input) INTEGER
 65 *         The leading dimension of B in the calling subprogram.
 66 *         LDB must be at least max(1,MAX( M, N ) ).
 67 *
 68 *  BX     (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
 69 *         On exit, the result of applying the left or right singular
 70 *         vector matrix to B.
 71 *
 72 *  LDBX   (input) INTEGER
 73 *         The leading dimension of BX.
 74 *
 75 *  U      (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
 76 *         On entry, U contains the left singular vector matrices of all
 77 *         subproblems at the bottom level.
 78 *
 79 *  LDU    (input) INTEGER, LDU = > N.
 80 *         The leading dimension of arrays U, VT, DIFL, DIFR,
 81 *         POLES, GIVNUM, and Z.
 82 *
 83 *  VT     (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
 84 *         On entry, VT**T contains the right singular vector matrices of
 85 *         all subproblems at the bottom level.
 86 *
 87 *  K      (input) INTEGER array, dimension ( N ).
 88 *
 89 *  DIFL   (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
 90 *         where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
 91 *
 92 *  DIFR   (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
 93 *         On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
 94 *         distances between singular values on the I-th level and
 95 *         singular values on the (I -1)-th level, and DIFR(*, 2 * I)
 96 *         record the normalizing factors of the right singular vectors
 97 *         matrices of subproblems on I-th level.
 98 *
 99 *  Z      (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
100 *         On entry, Z(1, I) contains the components of the deflation-
101 *         adjusted updating row vector for subproblems on the I-th
102 *         level.
103 *
104 *  POLES  (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
105 *         On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
106 *         singular values involved in the secular equations on the I-th
107 *         level.
108 *
109 *  GIVPTR (input) INTEGER array, dimension ( N ).
110 *         On entry, GIVPTR( I ) records the number of Givens
111 *         rotations performed on the I-th problem on the computation
112 *         tree.
113 *
114 *  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
115 *         On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
116 *         locations of Givens rotations performed on the I-th level on
117 *         the computation tree.
118 *
119 *  LDGCOL (input) INTEGER, LDGCOL = > N.
120 *         The leading dimension of arrays GIVCOL and PERM.
121 *
122 *  PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
123 *         On entry, PERM(*, I) records permutations done on the I-th
124 *         level of the computation tree.
125 *
126 *  GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
127 *         On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
128 *         values of Givens rotations performed on the I-th level on the
129 *         computation tree.
130 *
131 *  C      (input) DOUBLE PRECISION array, dimension ( N ).
132 *         On entry, if the I-th subproblem is not square,
133 *         C( I ) contains the C-value of a Givens rotation related to
134 *         the right null space of the I-th subproblem.
135 *
136 *  S      (input) DOUBLE PRECISION array, dimension ( N ).
137 *         On entry, if the I-th subproblem is not square,
138 *         S( I ) contains the S-value of a Givens rotation related to
139 *         the right null space of the I-th subproblem.
140 *
141 *  WORK   (workspace) DOUBLE PRECISION array.
142 *         The dimension must be at least N.
143 *
144 *  IWORK  (workspace) INTEGER array.
145 *         The dimension must be at least 3 * N
146 *
147 *  INFO   (output) INTEGER
148 *          = 0:  successful exit.
149 *          < 0:  if INFO = -i, the i-th argument had an illegal value.
150 *
151 *  Further Details
152 *  ===============
153 *
154 *  Based on contributions by
155 *     Ming Gu and Ren-Cang Li, Computer Science Division, University of
156 *       California at Berkeley, USA
157 *     Osni Marques, LBNL/NERSC, USA
158 *
159 *  =====================================================================
160 *
161 *     .. Parameters ..
162       DOUBLE PRECISION   ZERO, ONE
163       PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
164 *     ..
165 *     .. Local Scalars ..
166       INTEGER            I, I1, IC, IM1, INODE, J, LF, LL, LVL, LVL2,
167      $                   ND, NDB1, NDIML, NDIMR, NL, NLF, NLP1, NLVL,
168      $                   NR, NRF, NRP1, SQRE
169 *     ..
170 *     .. External Subroutines ..
171       EXTERNAL           DCOPY, DGEMM, DLALS0, DLASDT, XERBLA
172 *     ..
173 *     .. Executable Statements ..
174 *
175 *     Test the input parameters.
176 *
177       INFO = 0
178 *
179       IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
180          INFO = -1
181       ELSE IF( SMLSIZ.LT.3 ) THEN
182          INFO = -2
183       ELSE IF( N.LT.SMLSIZ ) THEN
184          INFO = -3
185       ELSE IF( NRHS.LT.1 ) THEN
186          INFO = -4
187       ELSE IF( LDB.LT.N ) THEN
188          INFO = -6
189       ELSE IF( LDBX.LT.N ) THEN
190          INFO = -8
191       ELSE IF( LDU.LT.N ) THEN
192          INFO = -10
193       ELSE IF( LDGCOL.LT.N ) THEN
194          INFO = -19
195       END IF
196       IF( INFO.NE.0 ) THEN
197          CALL XERBLA( 'DLALSA'-INFO )
198          RETURN
199       END IF
200 *
201 *     Book-keeping and  setting up the computation tree.
202 *
203       INODE = 1
204       NDIML = INODE + N
205       NDIMR = NDIML + N
206 *
207       CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
208      $             IWORK( NDIMR ), SMLSIZ )
209 *
210 *     The following code applies back the left singular vector factors.
211 *     For applying back the right singular vector factors, go to 50.
212 *
213       IF( ICOMPQ.EQ.1 ) THEN
214          GO TO 50
215       END IF
216 *
217 *     The nodes on the bottom level of the tree were solved
218 *     by DLASDQ. The corresponding left and right singular vector
219 *     matrices are in explicit form. First apply back the left
220 *     singular vector matrices.
221 *
222       NDB1 = ( ND+1 ) / 2
223       DO 10 I = NDB1, ND
224 *
225 *        IC : center row of each node
226 *        NL : number of rows of left  subproblem
227 *        NR : number of rows of right subproblem
228 *        NLF: starting row of the left   subproblem
229 *        NRF: starting row of the right  subproblem
230 *
231          I1 = I - 1
232          IC = IWORK( INODE+I1 )
233          NL = IWORK( NDIML+I1 )
234          NR = IWORK( NDIMR+I1 )
235          NLF = IC - NL
236          NRF = IC + 1
237          CALL DGEMM( 'T''N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
238      $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
239          CALL DGEMM( 'T''N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
240      $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
241    10 CONTINUE
242 *
243 *     Next copy the rows of B that correspond to unchanged rows
244 *     in the bidiagonal matrix to BX.
245 *
246       DO 20 I = 1, ND
247          IC = IWORK( INODE+I-1 )
248          CALL DCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
249    20 CONTINUE
250 *
251 *     Finally go through the left singular vector matrices of all
252 *     the other subproblems bottom-up on the tree.
253 *
254       J = 2**NLVL
255       SQRE = 0
256 *
257       DO 40 LVL = NLVL, 1-1
258          LVL2 = 2*LVL - 1
259 *
260 *        find the first node LF and last node LL on
261 *        the current level LVL
262 *
263          IF( LVL.EQ.1 ) THEN
264             LF = 1
265             LL = 1
266          ELSE
267             LF = 2**( LVL-1 )
268             LL = 2*LF - 1
269          END IF
270          DO 30 I = LF, LL
271             IM1 = I - 1
272             IC = IWORK( INODE+IM1 )
273             NL = IWORK( NDIML+IM1 )
274             NR = IWORK( NDIMR+IM1 )
275             NLF = IC - NL
276             NRF = IC + 1
277             J = J - 1
278             CALL DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
279      $                   B( NLF, 1 ), LDB, PERM( NLF, LVL ),
280      $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
281      $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
282      $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
283      $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK,
284      $                   INFO )
285    30    CONTINUE
286    40 CONTINUE
287       GO TO 90
288 *
289 *     ICOMPQ = 1: applying back the right singular vector factors.
290 *
291    50 CONTINUE
292 *
293 *     First now go through the right singular vector matrices of all
294 *     the tree nodes top-down.
295 *
296       J = 0
297       DO 70 LVL = 1, NLVL
298          LVL2 = 2*LVL - 1
299 *
300 *        Find the first node LF and last node LL on
301 *        the current level LVL.
302 *
303          IF( LVL.EQ.1 ) THEN
304             LF = 1
305             LL = 1
306          ELSE
307             LF = 2**( LVL-1 )
308             LL = 2*LF - 1
309          END IF
310          DO 60 I = LL, LF, -1
311             IM1 = I - 1
312             IC = IWORK( INODE+IM1 )
313             NL = IWORK( NDIML+IM1 )
314             NR = IWORK( NDIMR+IM1 )
315             NLF = IC - NL
316             NRF = IC + 1
317             IF( I.EQ.LL ) THEN
318                SQRE = 0
319             ELSE
320                SQRE = 1
321             END IF
322             J = J + 1
323             CALL DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
324      $                   BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
325      $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
326      $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
327      $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
328      $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK,
329      $                   INFO )
330    60    CONTINUE
331    70 CONTINUE
332 *
333 *     The nodes on the bottom level of the tree were solved
334 *     by DLASDQ. The corresponding right singular vector
335 *     matrices are in explicit form. Apply them back.
336 *
337       NDB1 = ( ND+1 ) / 2
338       DO 80 I = NDB1, ND
339          I1 = I - 1
340          IC = IWORK( INODE+I1 )
341          NL = IWORK( NDIML+I1 )
342          NR = IWORK( NDIMR+I1 )
343          NLP1 = NL + 1
344          IF( I.EQ.ND ) THEN
345             NRP1 = NR
346          ELSE
347             NRP1 = NR + 1
348          END IF
349          NLF = IC - NL
350          NRF = IC + 1
351          CALL DGEMM( 'T''N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
352      $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
353          CALL DGEMM( 'T''N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
354      $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
355    80 CONTINUE
356 *
357    90 CONTINUE
358 *
359       RETURN
360 *
361 *     End of DLALSA
362 *
363       END