zlalsa.f (flens/lapack/interface/ref

  1       SUBROUTINE ZLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,

  2      $                   LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,

  3      $                   GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK,

  4      $                   IWORK, INFO )

  5 *

  6 *  -- LAPACK routine (version 3.2) --

  7 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

  8 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

  9 *     November 2006

 10 *

 11 *     .. Scalar Arguments ..

 12       INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,

 13      $                   SMLSIZ

 14 *     ..

 15 *     .. Array Arguments ..

 16       INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),

 17      $                   K( * ), PERM( LDGCOL, * )

 18       DOUBLE PRECISION   C( * ), DIFL( LDU, * ), DIFR( LDU, * ),

 19      $                   GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ),

 20      $                   S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * )

 21       COMPLEX*16         B( LDB, * ), BX( LDBX, * )

 22 *     ..

 23 *

 24 *  Purpose

 25 *  =======

 26 *

 27 *  ZLALSA is an itermediate step in solving the least squares problem

 28 *  by computing the SVD of the coefficient matrix in compact form (The

 29 *  singular vectors are computed as products of simple orthorgonal

 30 *  matrices.).

 31 *

 32 *  If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector

 33 *  matrix of an upper bidiagonal matrix to the right hand side; and if

 34 *  ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the

 35 *  right hand side. The singular vector matrices were generated in

 36 *  compact form by ZLALSA.

 37 *

 38 *  Arguments

 39 *  =========

 40 *

 41 *  ICOMPQ (input) INTEGER

 42 *         Specifies whether the left or the right singular vector

 43 *         matrix is involved.

 44 *         = 0: Left singular vector matrix

 45 *         = 1: Right singular vector matrix

 46 *

 47 *  SMLSIZ (input) INTEGER

 48 *         The maximum size of the subproblems at the bottom of the

 49 *         computation tree.

 50 *

 51 *  N      (input) INTEGER

 52 *         The row and column dimensions of the upper bidiagonal matrix.

 53 *

 54 *  NRHS   (input) INTEGER

 55 *         The number of columns of B and BX. NRHS must be at least 1.

 56 *

 57 *  B      (input/output) COMPLEX*16 array, dimension ( LDB, NRHS )

 58 *         On input, B contains the right hand sides of the least

 59 *         squares problem in rows 1 through M.

 60 *         On output, B contains the solution X in rows 1 through N.

 61 *

 62 *  LDB    (input) INTEGER

 63 *         The leading dimension of B in the calling subprogram.

 64 *         LDB must be at least max(1,MAX( M, N ) ).

 65 *

 66 *  BX     (output) COMPLEX*16 array, dimension ( LDBX, NRHS )

 67 *         On exit, the result of applying the left or right singular

 68 *         vector matrix to B.

 69 *

 70 *  LDBX   (input) INTEGER

 71 *         The leading dimension of BX.

 72 *

 73 *  U      (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).

 74 *         On entry, U contains the left singular vector matrices of all

 75 *         subproblems at the bottom level.

 76 *

 77 *  LDU    (input) INTEGER, LDU = > N.

 78 *         The leading dimension of arrays U, VT, DIFL, DIFR,

 79 *         POLES, GIVNUM, and Z.

 80 *

 81 *  VT     (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).

 82 *         On entry, VT**H contains the right singular vector matrices of

 83 *         all subproblems at the bottom level.

 84 *

 85 *  K      (input) INTEGER array, dimension ( N ).

 86 *

 87 *  DIFL   (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).

 88 *         where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.

 89 *

 90 *  DIFR   (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).

 91 *         On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record

 92 *         distances between singular values on the I-th level and

 93 *         singular values on the (I -1)-th level, and DIFR(*, 2 * I)

 94 *         record the normalizing factors of the right singular vectors

 95 *         matrices of subproblems on I-th level.

 96 *

 97 *  Z      (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).

 98 *         On entry, Z(1, I) contains the components of the deflation-

 99 *         adjusted updating row vector for subproblems on the I-th

100 *         level.

101 *

102 *  POLES  (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).

103 *         On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old

104 *         singular values involved in the secular equations on the I-th

105 *         level.

106 *

107 *  GIVPTR (input) INTEGER array, dimension ( N ).

108 *         On entry, GIVPTR( I ) records the number of Givens

109 *         rotations performed on the I-th problem on the computation

110 *         tree.

111 *

112 *  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).

113 *         On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the

114 *         locations of Givens rotations performed on the I-th level on

115 *         the computation tree.

116 *

117 *  LDGCOL (input) INTEGER, LDGCOL = > N.

118 *         The leading dimension of arrays GIVCOL and PERM.

119 *

120 *  PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).

121 *         On entry, PERM(*, I) records permutations done on the I-th

122 *         level of the computation tree.

123 *

124 *  GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).

125 *         On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-

126 *         values of Givens rotations performed on the I-th level on the

127 *         computation tree.

128 *

129 *  C      (input) DOUBLE PRECISION array, dimension ( N ).

130 *         On entry, if the I-th subproblem is not square,

131 *         C( I ) contains the C-value of a Givens rotation related to

132 *         the right null space of the I-th subproblem.

133 *

134 *  S      (input) DOUBLE PRECISION array, dimension ( N ).

135 *         On entry, if the I-th subproblem is not square,

136 *         S( I ) contains the S-value of a Givens rotation related to

137 *         the right null space of the I-th subproblem.

138 *

139 *  RWORK  (workspace) DOUBLE PRECISION array, dimension at least

140 *         MAX( (SMLSZ+1)*NRHS*3, N*(1+NRHS) + 2*NRHS ).

141 *

142 *  IWORK  (workspace) INTEGER array.

143 *         The dimension must be at least 3 * N

144 *

145 *  INFO   (output) INTEGER

146 *          = 0:  successful exit.

147 *          < 0:  if INFO = -i, the i-th argument had an illegal value.

148 *

149 *  Further Details

150 *  ===============

151 *

152 *  Based on contributions by

153 *     Ming Gu and Ren-Cang Li, Computer Science Division, University of

154 *       California at Berkeley, USA

155 *     Osni Marques, LBNL/NERSC, USA

156 *

157 *  =====================================================================

158 *

159 *     .. Parameters ..

160       DOUBLE PRECISION   ZERO, ONE

161       PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )

162 *     ..

163 *     .. Local Scalars ..

164       INTEGER            I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL,

165      $                   JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML,

166      $                   NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE

167 *     ..

168 *     .. External Subroutines ..

169       EXTERNAL           DGEMM, DLASDT, XERBLA, ZCOPY, ZLALS0

170 *     ..

171 *     .. Intrinsic Functions ..

172       INTRINSIC          DBLE, DCMPLX, DIMAG

173 *     ..

174 *     .. Executable Statements ..

175 *

176 *     Test the input parameters.

177 *

178       INFO = 0

179 *

180       IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN

181          INFO = -1

182       ELSE IF( SMLSIZ.LT.3 ) THEN

183          INFO = -2

184       ELSE IF( N.LT.SMLSIZ ) THEN

185          INFO = -3

186       ELSE IF( NRHS.LT.1 ) THEN

187          INFO = -4

188       ELSE IF( LDB.LT.N ) THEN

189          INFO = -6

190       ELSE IF( LDBX.LT.N ) THEN

191          INFO = -8

192       ELSE IF( LDU.LT.N ) THEN

193          INFO = -10

194       ELSE IF( LDGCOL.LT.N ) THEN

195          INFO = -19

196       END IF

197       IF( INFO.NE.0 ) THEN

198          CALL XERBLA( 'ZLALSA', -INFO )

199          RETURN

200       END IF

201 *

202 *     Book-keeping and  setting up the computation tree.

203 *

204       INODE = 1

205       NDIML = INODE + N

206       NDIMR = NDIML + N

207 *

208       CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),

209      $             IWORK( NDIMR ), SMLSIZ )

210 *

211 *     The following code applies back the left singular vector factors.

212 *     For applying back the right singular vector factors, go to 170.

213 *

214       IF( ICOMPQ.EQ.1 ) THEN

215          GO TO 170

216       END IF

217 *

218 *     The nodes on the bottom level of the tree were solved

219 *     by DLASDQ. The corresponding left and right singular vector

220 *     matrices are in explicit form. First apply back the left

221 *     singular vector matrices.

222 *

223       NDB1 = ( ND+1 ) / 2

224       DO 130 I = NDB1, ND

225 *

226 *        IC : center row of each node

227 *        NL : number of rows of left  subproblem

228 *        NR : number of rows of right subproblem

229 *        NLF: starting row of the left   subproblem

230 *        NRF: starting row of the right  subproblem

231 *

232          I1 = I - 1

233          IC = IWORK( INODE+I1 )

234          NL = IWORK( NDIML+I1 )

235          NR = IWORK( NDIMR+I1 )

236          NLF = IC - NL

237          NRF = IC + 1

238 *

239 *        Since B and BX are complex, the following call to DGEMM

240 *        is performed in two steps (real and imaginary parts).

241 *

242 *        CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,

243 *     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )

244 *

245          J = NL*NRHS*2

246          DO 20 JCOL = 1, NRHS

247             DO 10 JROW = NLF, NLF + NL - 1

248                J = J + 1

249                RWORK( J ) = DBLE( B( JROW, JCOL ) )

250    10       CONTINUE

251    20    CONTINUE

252          CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,

253      $               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL )

254          J = NL*NRHS*2

255          DO 40 JCOL = 1, NRHS

256             DO 30 JROW = NLF, NLF + NL - 1

257                J = J + 1

258                RWORK( J ) = DIMAG( B( JROW, JCOL ) )

259    30       CONTINUE

260    40    CONTINUE

261          CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,

262      $               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ),

263      $               NL )

264          JREAL = 0

265          JIMAG = NL*NRHS

266          DO 60 JCOL = 1, NRHS

267             DO 50 JROW = NLF, NLF + NL - 1

268                JREAL = JREAL + 1

269                JIMAG = JIMAG + 1

270                BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),

271      $                            RWORK( JIMAG ) )

272    50       CONTINUE

273    60    CONTINUE

274 *

275 *        Since B and BX are complex, the following call to DGEMM

276 *        is performed in two steps (real and imaginary parts).

277 *

278 *        CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,

279 *    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )

280 *

281          J = NR*NRHS*2

282          DO 80 JCOL = 1, NRHS

283             DO 70 JROW = NRF, NRF + NR - 1

284                J = J + 1

285                RWORK( J ) = DBLE( B( JROW, JCOL ) )

286    70       CONTINUE

287    80    CONTINUE

288          CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,

289      $               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR )

290          J = NR*NRHS*2

291          DO 100 JCOL = 1, NRHS

292             DO 90 JROW = NRF, NRF + NR - 1

293                J = J + 1

294                RWORK( J ) = DIMAG( B( JROW, JCOL ) )

295    90       CONTINUE

296   100    CONTINUE

297          CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,

298      $               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ),

299      $               NR )

300          JREAL = 0

301          JIMAG = NR*NRHS

302          DO 120 JCOL = 1, NRHS

303             DO 110 JROW = NRF, NRF + NR - 1

304                JREAL = JREAL + 1

305                JIMAG = JIMAG + 1

306                BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),

307      $                            RWORK( JIMAG ) )

308   110       CONTINUE

309   120    CONTINUE

310 *

311   130 CONTINUE

312 *

313 *     Next copy the rows of B that correspond to unchanged rows

314 *     in the bidiagonal matrix to BX.

315 *

316       DO 140 I = 1, ND

317          IC = IWORK( INODE+I-1 )

318          CALL ZCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )

319   140 CONTINUE

320 *

321 *     Finally go through the left singular vector matrices of all

322 *     the other subproblems bottom-up on the tree.

323 *

324       J = 2**NLVL

325       SQRE = 0

326 *

327       DO 160 LVL = NLVL, 1, -1

328          LVL2 = 2*LVL - 1

329 *

330 *        find the first node LF and last node LL on

331 *        the current level LVL

332 *

333          IF( LVL.EQ.1 ) THEN

334             LF = 1

335             LL = 1

336          ELSE

337             LF = 2**( LVL-1 )

338             LL = 2*LF - 1

339          END IF

340          DO 150 I = LF, LL

341             IM1 = I - 1

342             IC = IWORK( INODE+IM1 )

343             NL = IWORK( NDIML+IM1 )

344             NR = IWORK( NDIMR+IM1 )

345             NLF = IC - NL

346             NRF = IC + 1

347             J = J - 1

348             CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,

349      $                   B( NLF, 1 ), LDB, PERM( NLF, LVL ),

350      $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,

351      $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),

352      $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),

353      $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,

354      $                   INFO )

355   150    CONTINUE

356   160 CONTINUE

357       GO TO 330

358 *

359 *     ICOMPQ = 1: applying back the right singular vector factors.

360 *

361   170 CONTINUE

362 *

363 *     First now go through the right singular vector matrices of all

364 *     the tree nodes top-down.

365 *

366       J = 0

367       DO 190 LVL = 1, NLVL

368          LVL2 = 2*LVL - 1

369 *

370 *        Find the first node LF and last node LL on

371 *        the current level LVL.

372 *

373          IF( LVL.EQ.1 ) THEN

374             LF = 1

375             LL = 1

376          ELSE

377             LF = 2**( LVL-1 )

378             LL = 2*LF - 1

379          END IF

380          DO 180 I = LL, LF, -1

381             IM1 = I - 1

382             IC = IWORK( INODE+IM1 )

383             NL = IWORK( NDIML+IM1 )

384             NR = IWORK( NDIMR+IM1 )

385             NLF = IC - NL

386             NRF = IC + 1

387             IF( I.EQ.LL ) THEN

388                SQRE = 0

389             ELSE

390                SQRE = 1

391             END IF

392             J = J + 1

393             CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,

394      $                   BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),

395      $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,

396      $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),

397      $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),

398      $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,

399      $                   INFO )

400   180    CONTINUE

401   190 CONTINUE

402 *

403 *     The nodes on the bottom level of the tree were solved

404 *     by DLASDQ. The corresponding right singular vector

405 *     matrices are in explicit form. Apply them back.

406 *

407       NDB1 = ( ND+1 ) / 2

408       DO 320 I = NDB1, ND

409          I1 = I - 1

410          IC = IWORK( INODE+I1 )

411          NL = IWORK( NDIML+I1 )

412          NR = IWORK( NDIMR+I1 )

413          NLP1 = NL + 1

414          IF( I.EQ.ND ) THEN

415             NRP1 = NR

416          ELSE

417             NRP1 = NR + 1

418          END IF

419          NLF = IC - NL

420          NRF = IC + 1

421 *

422 *        Since B and BX are complex, the following call to DGEMM is

423 *        performed in two steps (real and imaginary parts).

424 *

425 *        CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,

426 *    $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )

427 *

428          J = NLP1*NRHS*2

429          DO 210 JCOL = 1, NRHS

430             DO 200 JROW = NLF, NLF + NLP1 - 1

431                J = J + 1

432                RWORK( J ) = DBLE( B( JROW, JCOL ) )

433   200       CONTINUE

434   210    CONTINUE

435          CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,

436      $               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ),

437      $               NLP1 )

438          J = NLP1*NRHS*2

439          DO 230 JCOL = 1, NRHS

440             DO 220 JROW = NLF, NLF + NLP1 - 1

441                J = J + 1

442                RWORK( J ) = DIMAG( B( JROW, JCOL ) )

443   220       CONTINUE

444   230    CONTINUE

445          CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,

446      $               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO,

447      $               RWORK( 1+NLP1*NRHS ), NLP1 )

448          JREAL = 0

449          JIMAG = NLP1*NRHS

450          DO 250 JCOL = 1, NRHS

451             DO 240 JROW = NLF, NLF + NLP1 - 1

452                JREAL = JREAL + 1

453                JIMAG = JIMAG + 1

454                BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),

455      $                            RWORK( JIMAG ) )

456   240       CONTINUE

457   250    CONTINUE

458 *

459 *        Since B and BX are complex, the following call to DGEMM is

460 *        performed in two steps (real and imaginary parts).

461 *

462 *        CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,

463 *    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )

464 *

465          J = NRP1*NRHS*2

466          DO 270 JCOL = 1, NRHS

467             DO 260 JROW = NRF, NRF + NRP1 - 1

468                J = J + 1

469                RWORK( J ) = DBLE( B( JROW, JCOL ) )

470   260       CONTINUE

471   270    CONTINUE

472          CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,

473      $               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ),

474      $               NRP1 )

475          J = NRP1*NRHS*2

476          DO 290 JCOL = 1, NRHS

477             DO 280 JROW = NRF, NRF + NRP1 - 1

478                J = J + 1

479                RWORK( J ) = DIMAG( B( JROW, JCOL ) )

480   280       CONTINUE

481   290    CONTINUE

482          CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,

483      $               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO,

484      $               RWORK( 1+NRP1*NRHS ), NRP1 )

485          JREAL = 0

486          JIMAG = NRP1*NRHS

487          DO 310 JCOL = 1, NRHS

488             DO 300 JROW = NRF, NRF + NRP1 - 1

489                JREAL = JREAL + 1

490                JIMAG = JIMAG + 1

491                BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),

492      $                            RWORK( JIMAG ) )

493   300       CONTINUE

494   310    CONTINUE

495 *

496   320 CONTINUE

497 *

498   330 CONTINUE

499 *

500       RETURN

501 *

502 *     End of ZLALSA

503 *

504       END