dlasd2.f (flens/lapack/interface/ref

  1       SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,

  2      $                   LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,

  3      $                   IDXC, IDXQ, COLTYP, INFO )

  4 *

  5 *  -- LAPACK auxiliary routine (version 3.2) --

  6 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

  7 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

  8 *     November 2006

  9 *

 10 *     .. Scalar Arguments ..

 11       INTEGER            INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE

 12       DOUBLE PRECISION   ALPHA, BETA

 13 *     ..

 14 *     .. Array Arguments ..

 15       INTEGER            COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),

 16      $                   IDXQ( * )

 17       DOUBLE PRECISION   D( * ), DSIGMA( * ), U( LDU, * ),

 18      $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),

 19      $                   Z( * )

 20 *     ..

 21 *

 22 *  Purpose

 23 *  =======

 24 *

 25 *  DLASD2 merges the two sets of singular values together into a single

 26 *  sorted set.  Then it tries to deflate the size of the problem.

 27 *  There are two ways in which deflation can occur:  when two or more

 28 *  singular values are close together or if there is a tiny entry in the

 29 *  Z vector.  For each such occurrence the order of the related secular

 30 *  equation problem is reduced by one.

 31 *

 32 *  DLASD2 is called from DLASD1.

 33 *

 34 *  Arguments

 35 *  =========

 36 *

 37 *  NL     (input) INTEGER

 38 *         The row dimension of the upper block.  NL >= 1.

 39 *

 40 *  NR     (input) INTEGER

 41 *         The row dimension of the lower block.  NR >= 1.

 42 *

 43 *  SQRE   (input) INTEGER

 44 *         = 0: the lower block is an NR-by-NR square matrix.

 45 *         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.

 46 *

 47 *         The bidiagonal matrix has N = NL + NR + 1 rows and

 48 *         M = N + SQRE >= N columns.

 49 *

 50 *  K      (output) INTEGER

 51 *         Contains the dimension of the non-deflated matrix,

 52 *         This is the order of the related secular equation. 1 <= K <=N.

 53 *

 54 *  D      (input/output) DOUBLE PRECISION array, dimension(N)

 55 *         On entry D contains the singular values of the two submatrices

 56 *         to be combined.  On exit D contains the trailing (N-K) updated

 57 *         singular values (those which were deflated) sorted into

 58 *         increasing order.

 59 *

 60 *  Z      (output) DOUBLE PRECISION array, dimension(N)

 61 *         On exit Z contains the updating row vector in the secular

 62 *         equation.

 63 *

 64 *  ALPHA  (input) DOUBLE PRECISION

 65 *         Contains the diagonal element associated with the added row.

 66 *

 67 *  BETA   (input) DOUBLE PRECISION

 68 *         Contains the off-diagonal element associated with the added

 69 *         row.

 70 *

 71 *  U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)

 72 *         On entry U contains the left singular vectors of two

 73 *         submatrices in the two square blocks with corners at (1,1),

 74 *         (NL, NL), and (NL+2, NL+2), (N,N).

 75 *         On exit U contains the trailing (N-K) updated left singular

 76 *         vectors (those which were deflated) in its last N-K columns.

 77 *

 78 *  LDU    (input) INTEGER

 79 *         The leading dimension of the array U.  LDU >= N.

 80 *

 81 *  VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)

 82 *         On entry VT**T contains the right singular vectors of two

 83 *         submatrices in the two square blocks with corners at (1,1),

 84 *         (NL+1, NL+1), and (NL+2, NL+2), (M,M).

 85 *         On exit VT**T contains the trailing (N-K) updated right singular

 86 *         vectors (those which were deflated) in its last N-K columns.

 87 *         In case SQRE =1, the last row of VT spans the right null

 88 *         space.

 89 *

 90 *  LDVT   (input) INTEGER

 91 *         The leading dimension of the array VT.  LDVT >= M.

 92 *

 93 *  DSIGMA (output) DOUBLE PRECISION array, dimension (N)

 94 *         Contains a copy of the diagonal elements (K-1 singular values

 95 *         and one zero) in the secular equation.

 96 *

 97 *  U2     (output) DOUBLE PRECISION array, dimension(LDU2,N)

 98 *         Contains a copy of the first K-1 left singular vectors which

 99 *         will be used by DLASD3 in a matrix multiply (DGEMM) to solve

100 *         for the new left singular vectors. U2 is arranged into four

101 *         blocks. The first block contains a column with 1 at NL+1 and

102 *         zero everywhere else; the second block contains non-zero

103 *         entries only at and above NL; the third contains non-zero

104 *         entries only below NL+1; and the fourth is dense.

105 *

106 *  LDU2   (input) INTEGER

107 *         The leading dimension of the array U2.  LDU2 >= N.

108 *

109 *  VT2    (output) DOUBLE PRECISION array, dimension(LDVT2,N)

110 *         VT2**T contains a copy of the first K right singular vectors

111 *         which will be used by DLASD3 in a matrix multiply (DGEMM) to

112 *         solve for the new right singular vectors. VT2 is arranged into

113 *         three blocks. The first block contains a row that corresponds

114 *         to the special 0 diagonal element in SIGMA; the second block

115 *         contains non-zeros only at and before NL +1; the third block

116 *         contains non-zeros only at and after  NL +2.

117 *

118 *  LDVT2  (input) INTEGER

119 *         The leading dimension of the array VT2.  LDVT2 >= M.

120 *

121 *  IDXP   (workspace) INTEGER array dimension(N)

122 *         This will contain the permutation used to place deflated

123 *         values of D at the end of the array. On output IDXP(2:K)

124 *         points to the nondeflated D-values and IDXP(K+1:N)

125 *         points to the deflated singular values.

126 *

127 *  IDX    (workspace) INTEGER array dimension(N)

128 *         This will contain the permutation used to sort the contents of

129 *         D into ascending order.

130 *

131 *  IDXC   (output) INTEGER array dimension(N)

132 *         This will contain the permutation used to arrange the columns

133 *         of the deflated U matrix into three groups:  the first group

134 *         contains non-zero entries only at and above NL, the second

135 *         contains non-zero entries only below NL+2, and the third is

136 *         dense.

137 *

138 *  IDXQ   (input/output) INTEGER array dimension(N)

139 *         This contains the permutation which separately sorts the two

140 *         sub-problems in D into ascending order.  Note that entries in

141 *         the first hlaf of this permutation must first be moved one

142 *         position backward; and entries in the second half

143 *         must first have NL+1 added to their values.

144 *

145 *  COLTYP (workspace/output) INTEGER array dimension(N)

146 *         As workspace, this will contain a label which will indicate

147 *         which of the following types a column in the U2 matrix or a

148 *         row in the VT2 matrix is:

149 *         1 : non-zero in the upper half only

150 *         2 : non-zero in the lower half only

151 *         3 : dense

152 *         4 : deflated

153 *

154 *         On exit, it is an array of dimension 4, with COLTYP(I) being

155 *         the dimension of the I-th type columns.

156 *

157 *  INFO   (output) INTEGER

158 *          = 0:  successful exit.

159 *          < 0:  if INFO = -i, the i-th argument had an illegal value.

160 *

161 *  Further Details

162 *  ===============

163 *

164 *  Based on contributions by

165 *     Ming Gu and Huan Ren, Computer Science Division, University of

166 *     California at Berkeley, USA

167 *

168 *  =====================================================================

169 *

170 *     .. Parameters ..

171       DOUBLE PRECISION   ZERO, ONE, TWO, EIGHT

172       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,

173      $                   EIGHT = 8.0D+0 )

174 *     ..

175 *     .. Local Arrays ..

176       INTEGER            CTOT( 4 ), PSM( 4 )

177 *     ..

178 *     .. Local Scalars ..

179       INTEGER            CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,

180      $                   N, NLP1, NLP2

181       DOUBLE PRECISION   C, EPS, HLFTOL, S, TAU, TOL, Z1

182 *     ..

183 *     .. External Functions ..

184       DOUBLE PRECISION   DLAMCH, DLAPY2

185       EXTERNAL           DLAMCH, DLAPY2

186 *     ..

187 *     .. External Subroutines ..

188       EXTERNAL           DCOPY, DLACPY, DLAMRG, DLASET, DROT, XERBLA

189 *     ..

190 *     .. Intrinsic Functions ..

191       INTRINSIC          ABS, MAX

192 *     ..

193 *     .. Executable Statements ..

194 *

195 *     Test the input parameters.

196 *

197       INFO = 0

198 *

199       IF( NL.LT.1 ) THEN

200          INFO = -1

201       ELSE IF( NR.LT.1 ) THEN

202          INFO = -2

203       ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN

204          INFO = -3

205       END IF

206 *

207       N = NL + NR + 1

208       M = N + SQRE

209 *

210       IF( LDU.LT.N ) THEN

211          INFO = -10

212       ELSE IF( LDVT.LT.M ) THEN

213          INFO = -12

214       ELSE IF( LDU2.LT.N ) THEN

215          INFO = -15

216       ELSE IF( LDVT2.LT.M ) THEN

217          INFO = -17

218       END IF

219       IF( INFO.NE.0 ) THEN

220          CALL XERBLA( 'DLASD2', -INFO )

221          RETURN

222       END IF

223 *

224       NLP1 = NL + 1

225       NLP2 = NL + 2

226 *

227 *     Generate the first part of the vector Z; and move the singular

228 *     values in the first part of D one position backward.

229 *

230       Z1 = ALPHA*VT( NLP1, NLP1 )

231       Z( 1 ) = Z1

232       DO 10 I = NL, 1, -1

233          Z( I+1 ) = ALPHA*VT( I, NLP1 )

234          D( I+1 ) = D( I )

235          IDXQ( I+1 ) = IDXQ( I ) + 1

236    10 CONTINUE

237 *

238 *     Generate the second part of the vector Z.

239 *

240       DO 20 I = NLP2, M

241          Z( I ) = BETA*VT( I, NLP2 )

242    20 CONTINUE

243 *

244 *     Initialize some reference arrays.

245 *

246       DO 30 I = 2, NLP1

247          COLTYP( I ) = 1

248    30 CONTINUE

249       DO 40 I = NLP2, N

250          COLTYP( I ) = 2

251    40 CONTINUE

252 *

253 *     Sort the singular values into increasing order

254 *

255       DO 50 I = NLP2, N

256          IDXQ( I ) = IDXQ( I ) + NLP1

257    50 CONTINUE

258 *

259 *     DSIGMA, IDXC, IDXC, and the first column of U2

260 *     are used as storage space.

261 *

262       DO 60 I = 2, N

263          DSIGMA( I ) = D( IDXQ( I ) )

264          U2( I, 1 ) = Z( IDXQ( I ) )

265          IDXC( I ) = COLTYP( IDXQ( I ) )

266    60 CONTINUE

267 *

268       CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )

269 *

270       DO 70 I = 2, N

271          IDXI = 1 + IDX( I )

272          D( I ) = DSIGMA( IDXI )

273          Z( I ) = U2( IDXI, 1 )

274          COLTYP( I ) = IDXC( IDXI )

275    70 CONTINUE

276 *

277 *     Calculate the allowable deflation tolerance

278 *

279       EPS = DLAMCH( 'Epsilon' )

280       TOL = MAX( ABS( ALPHA ), ABS( BETA ) )

281       TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )

282 *

283 *     There are 2 kinds of deflation -- first a value in the z-vector

284 *     is small, second two (or more) singular values are very close

285 *     together (their difference is small).

286 *

287 *     If the value in the z-vector is small, we simply permute the

288 *     array so that the corresponding singular value is moved to the

289 *     end.

290 *

291 *     If two values in the D-vector are close, we perform a two-sided

292 *     rotation designed to make one of the corresponding z-vector

293 *     entries zero, and then permute the array so that the deflated

294 *     singular value is moved to the end.

295 *

296 *     If there are multiple singular values then the problem deflates.

297 *     Here the number of equal singular values are found.  As each equal

298 *     singular value is found, an elementary reflector is computed to

299 *     rotate the corresponding singular subspace so that the

300 *     corresponding components of Z are zero in this new basis.

301 *

302       K = 1

303       K2 = N + 1

304       DO 80 J = 2, N

305          IF( ABS( Z( J ) ).LE.TOL ) THEN

306 *

307 *           Deflate due to small z component.

308 *

309             K2 = K2 - 1

310             IDXP( K2 ) = J

311             COLTYP( J ) = 4

312             IF( J.EQ.N )

313      $         GO TO 120

314          ELSE

315             JPREV = J

316             GO TO 90

317          END IF

318    80 CONTINUE

319    90 CONTINUE

320       J = JPREV

321   100 CONTINUE

322       J = J + 1

323       IF( J.GT.N )

324      $   GO TO 110

325       IF( ABS( Z( J ) ).LE.TOL ) THEN

326 *

327 *        Deflate due to small z component.

328 *

329          K2 = K2 - 1

330          IDXP( K2 ) = J

331          COLTYP( J ) = 4

332       ELSE

333 *

334 *        Check if singular values are close enough to allow deflation.

335 *

336          IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN

337 *

338 *           Deflation is possible.

339 *

340             S = Z( JPREV )

341             C = Z( J )

342 *

343 *           Find sqrt(a**2+b**2) without overflow or

344 *           destructive underflow.

345 *

346             TAU = DLAPY2( C, S )

347             C = C / TAU

348             S = -S / TAU

349             Z( J ) = TAU

350             Z( JPREV ) = ZERO

351 *

352 *           Apply back the Givens rotation to the left and right

353 *           singular vector matrices.

354 *

355             IDXJP = IDXQ( IDX( JPREV )+1 )

356             IDXJ = IDXQ( IDX( J )+1 )

357             IF( IDXJP.LE.NLP1 ) THEN

358                IDXJP = IDXJP - 1

359             END IF

360             IF( IDXJ.LE.NLP1 ) THEN

361                IDXJ = IDXJ - 1

362             END IF

363             CALL DROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )

364             CALL DROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,

365      $                 S )

366             IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN

367                COLTYP( J ) = 3

368             END IF

369             COLTYP( JPREV ) = 4

370             K2 = K2 - 1

371             IDXP( K2 ) = JPREV

372             JPREV = J

373          ELSE

374             K = K + 1

375             U2( K, 1 ) = Z( JPREV )

376             DSIGMA( K ) = D( JPREV )

377             IDXP( K ) = JPREV

378             JPREV = J

379          END IF

380       END IF

381       GO TO 100

382   110 CONTINUE

383 *

384 *     Record the last singular value.

385 *

386       K = K + 1

387       U2( K, 1 ) = Z( JPREV )

388       DSIGMA( K ) = D( JPREV )

389       IDXP( K ) = JPREV

390 *

391   120 CONTINUE

392 *

393 *     Count up the total number of the various types of columns, then

394 *     form a permutation which positions the four column types into

395 *     four groups of uniform structure (although one or more of these

396 *     groups may be empty).

397 *

398       DO 130 J = 1, 4

399          CTOT( J ) = 0

400   130 CONTINUE

401       DO 140 J = 2, N

402          CT = COLTYP( J )

403          CTOT( CT ) = CTOT( CT ) + 1

404   140 CONTINUE

405 *

406 *     PSM(*) = Position in SubMatrix (of types 1 through 4)

407 *

408       PSM( 1 ) = 2

409       PSM( 2 ) = 2 + CTOT( 1 )

410       PSM( 3 ) = PSM( 2 ) + CTOT( 2 )

411       PSM( 4 ) = PSM( 3 ) + CTOT( 3 )

412 *

413 *     Fill out the IDXC array so that the permutation which it induces

414 *     will place all type-1 columns first, all type-2 columns next,

415 *     then all type-3's, and finally all type-4's, starting from the

416 *     second column. This applies similarly to the rows of VT.

417 *

418       DO 150 J = 2, N

419          JP = IDXP( J )

420          CT = COLTYP( JP )

421          IDXC( PSM( CT ) ) = J

422          PSM( CT ) = PSM( CT ) + 1

423   150 CONTINUE

424 *

425 *     Sort the singular values and corresponding singular vectors into

426 *     DSIGMA, U2, and VT2 respectively.  The singular values/vectors

427 *     which were not deflated go into the first K slots of DSIGMA, U2,

428 *     and VT2 respectively, while those which were deflated go into the

429 *     last N - K slots, except that the first column/row will be treated

430 *     separately.

431 *

432       DO 160 J = 2, N

433          JP = IDXP( J )

434          DSIGMA( J ) = D( JP )

435          IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )

436          IF( IDXJ.LE.NLP1 ) THEN

437             IDXJ = IDXJ - 1

438          END IF

439          CALL DCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )

440          CALL DCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )

441   160 CONTINUE

442 *

443 *     Determine DSIGMA(1), DSIGMA(2) and Z(1)

444 *

445       DSIGMA( 1 ) = ZERO

446       HLFTOL = TOL / TWO

447       IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )

448      $   DSIGMA( 2 ) = HLFTOL

449       IF( M.GT.N ) THEN

450          Z( 1 ) = DLAPY2( Z1, Z( M ) )

451          IF( Z( 1 ).LE.TOL ) THEN

452             C = ONE

453             S = ZERO

454             Z( 1 ) = TOL

455          ELSE

456             C = Z1 / Z( 1 )

457             S = Z( M ) / Z( 1 )

458          END IF

459       ELSE

460          IF( ABS( Z1 ).LE.TOL ) THEN

461             Z( 1 ) = TOL

462          ELSE

463             Z( 1 ) = Z1

464          END IF

465       END IF

466 *

467 *     Move the rest of the updating row to Z.

468 *

469       CALL DCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )

470 *

471 *     Determine the first column of U2, the first row of VT2 and the

472 *     last row of VT.

473 *

474       CALL DLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )

475       U2( NLP1, 1 ) = ONE

476       IF( M.GT.N ) THEN

477          DO 170 I = 1, NLP1

478             VT( M, I ) = -S*VT( NLP1, I )

479             VT2( 1, I ) = C*VT( NLP1, I )

480   170    CONTINUE

481          DO 180 I = NLP2, M

482             VT2( 1, I ) = S*VT( M, I )

483             VT( M, I ) = C*VT( M, I )

484   180    CONTINUE

485       ELSE

486          CALL DCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )

487       END IF

488       IF( M.GT.N ) THEN

489          CALL DCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )

490       END IF

491 *

492 *     The deflated singular values and their corresponding vectors go

493 *     into the back of D, U, and V respectively.

494 *

495       IF( N.GT.K ) THEN

496          CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )

497          CALL DLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),

498      $                LDU )

499          CALL DLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),

500      $                LDVT )

501       END IF

502 *

503 *     Copy CTOT into COLTYP for referencing in DLASD3.

504 *

505       DO 190 J = 1, 4

506          COLTYP( J ) = CTOT( J )

507   190 CONTINUE

508 *

509       RETURN

510 *

511 *     End of DLASD2

512 *

513       END