dlasd7.f (flens/lapack/interface/ref

  1       SUBROUTINE DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL,

  2      $                   VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ,

  3      $                   PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,

  4      $                   C, S, INFO )

  5 *

  6 *  -- LAPACK auxiliary routine (version 3.2) --

  7 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

  8 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

  9 *     November 2006

 10 *

 11 *     .. Scalar Arguments ..

 12       INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,

 13      $                   NR, SQRE

 14       DOUBLE PRECISION   ALPHA, BETA, C, S

 15 *     ..

 16 *     .. Array Arguments ..

 17       INTEGER            GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ),

 18      $                   IDXQ( * ), PERM( * )

 19       DOUBLE PRECISION   D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ),

 20      $                   VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ),

 21      $                   ZW( * )

 22 *     ..

 23 *

 24 *  Purpose

 25 *  =======

 26 *

 27 *  DLASD7 merges the two sets of singular values together into a single

 28 *  sorted set. Then it tries to deflate the size of the problem. There

 29 *  are two ways in which deflation can occur:  when two or more singular

 30 *  values are close together or if there is a tiny entry in the Z

 31 *  vector. For each such occurrence the order of the related

 32 *  secular equation problem is reduced by one.

 33 *

 34 *  DLASD7 is called from DLASD6.

 35 *

 36 *  Arguments

 37 *  =========

 38 *

 39 *  ICOMPQ  (input) INTEGER

 40 *          Specifies whether singular vectors are to be computed

 41 *          in compact form, as follows:

 42 *          = 0: Compute singular values only.

 43 *          = 1: Compute singular vectors of upper

 44 *               bidiagonal matrix in compact form.

 45 *

 46 *  NL     (input) INTEGER

 47 *         The row dimension of the upper block. NL >= 1.

 48 *

 49 *  NR     (input) INTEGER

 50 *         The row dimension of the lower block. NR >= 1.

 51 *

 52 *  SQRE   (input) INTEGER

 53 *         = 0: the lower block is an NR-by-NR square matrix.

 54 *         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.

 55 *

 56 *         The bidiagonal matrix has

 57 *         N = NL + NR + 1 rows and

 58 *         M = N + SQRE >= N columns.

 59 *

 60 *  K      (output) INTEGER

 61 *         Contains the dimension of the non-deflated matrix, this is

 62 *         the order of the related secular equation. 1 <= K <=N.

 63 *

 64 *  D      (input/output) DOUBLE PRECISION array, dimension ( N )

 65 *         On entry D contains the singular values of the two submatrices

 66 *         to be combined. On exit D contains the trailing (N-K) updated

 67 *         singular values (those which were deflated) sorted into

 68 *         increasing order.

 69 *

 70 *  Z      (output) DOUBLE PRECISION array, dimension ( M )

 71 *         On exit Z contains the updating row vector in the secular

 72 *         equation.

 73 *

 74 *  ZW     (workspace) DOUBLE PRECISION array, dimension ( M )

 75 *         Workspace for Z.

 76 *

 77 *  VF     (input/output) DOUBLE PRECISION array, dimension ( M )

 78 *         On entry, VF(1:NL+1) contains the first components of all

 79 *         right singular vectors of the upper block; and VF(NL+2:M)

 80 *         contains the first components of all right singular vectors

 81 *         of the lower block. On exit, VF contains the first components

 82 *         of all right singular vectors of the bidiagonal matrix.

 83 *

 84 *  VFW    (workspace) DOUBLE PRECISION array, dimension ( M )

 85 *         Workspace for VF.

 86 *

 87 *  VL     (input/output) DOUBLE PRECISION array, dimension ( M )

 88 *         On entry, VL(1:NL+1) contains the  last components of all

 89 *         right singular vectors of the upper block; and VL(NL+2:M)

 90 *         contains the last components of all right singular vectors

 91 *         of the lower block. On exit, VL contains the last components

 92 *         of all right singular vectors of the bidiagonal matrix.

 93 *

 94 *  VLW    (workspace) DOUBLE PRECISION array, dimension ( M )

 95 *         Workspace for VL.

 96 *

 97 *  ALPHA  (input) DOUBLE PRECISION

 98 *         Contains the diagonal element associated with the added row.

 99 *

100 *  BETA   (input) DOUBLE PRECISION

101 *         Contains the off-diagonal element associated with the added

102 *         row.

103 *

104 *  DSIGMA (output) DOUBLE PRECISION array, dimension ( N )

105 *         Contains a copy of the diagonal elements (K-1 singular values

106 *         and one zero) in the secular equation.

107 *

108 *  IDX    (workspace) INTEGER array, dimension ( N )

109 *         This will contain the permutation used to sort the contents of

110 *         D into ascending order.

111 *

112 *  IDXP   (workspace) INTEGER array, dimension ( N )

113 *         This will contain the permutation used to place deflated

114 *         values of D at the end of the array. On output IDXP(2:K)

115 *         points to the nondeflated D-values and IDXP(K+1:N)

116 *         points to the deflated singular values.

117 *

118 *  IDXQ   (input) INTEGER array, dimension ( N )

119 *         This contains the permutation which separately sorts the two

120 *         sub-problems in D into ascending order.  Note that entries in

121 *         the first half of this permutation must first be moved one

122 *         position backward; and entries in the second half

123 *         must first have NL+1 added to their values.

124 *

125 *  PERM   (output) INTEGER array, dimension ( N )

126 *         The permutations (from deflation and sorting) to be applied

127 *         to each singular block. Not referenced if ICOMPQ = 0.

128 *

129 *  GIVPTR (output) INTEGER

130 *         The number of Givens rotations which took place in this

131 *         subproblem. Not referenced if ICOMPQ = 0.

132 *

133 *  GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )

134 *         Each pair of numbers indicates a pair of columns to take place

135 *         in a Givens rotation. Not referenced if ICOMPQ = 0.

136 *

137 *  LDGCOL (input) INTEGER

138 *         The leading dimension of GIVCOL, must be at least N.

139 *

140 *  GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )

141 *         Each number indicates the C or S value to be used in the

142 *         corresponding Givens rotation. Not referenced if ICOMPQ = 0.

143 *

144 *  LDGNUM (input) INTEGER

145 *         The leading dimension of GIVNUM, must be at least N.

146 *

147 *  C      (output) DOUBLE PRECISION

148 *         C contains garbage if SQRE =0 and the C-value of a Givens

149 *         rotation related to the right null space if SQRE = 1.

150 *

151 *  S      (output) DOUBLE PRECISION

152 *         S contains garbage if SQRE =0 and the S-value of a Givens

153 *         rotation related to the right null space if SQRE = 1.

154 *

155 *  INFO   (output) INTEGER

156 *         = 0:  successful exit.

157 *         < 0:  if INFO = -i, the i-th argument had an illegal value.

158 *

159 *  Further Details

160 *  ===============

161 *

162 *  Based on contributions by

163 *     Ming Gu and Huan Ren, Computer Science Division, University of

164 *     California at Berkeley, USA

165 *

166 *  =====================================================================

167 *

168 *     .. Parameters ..

169       DOUBLE PRECISION   ZERO, ONE, TWO, EIGHT

170       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,

171      $                   EIGHT = 8.0D+0 )

172 *     ..

173 *     .. Local Scalars ..

174 *

175       INTEGER            I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, N,

176      $                   NLP1, NLP2

177       DOUBLE PRECISION   EPS, HLFTOL, TAU, TOL, Z1

178 *     ..

179 *     .. External Subroutines ..

180       EXTERNAL           DCOPY, DLAMRG, DROT, XERBLA

181 *     ..

182 *     .. External Functions ..

183       DOUBLE PRECISION   DLAMCH, DLAPY2

184       EXTERNAL           DLAMCH, DLAPY2

185 *     ..

186 *     .. Intrinsic Functions ..

187       INTRINSIC          ABS, MAX

188 *     ..

189 *     .. Executable Statements ..

190 *

191 *     Test the input parameters.

192 *

193       INFO = 0

194       N = NL + NR + 1

195       M = N + SQRE

196 *

197       IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN

198          INFO = -1

199       ELSE IF( NL.LT.1 ) THEN

200          INFO = -2

201       ELSE IF( NR.LT.1 ) THEN

202          INFO = -3

203       ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN

204          INFO = -4

205       ELSE IF( LDGCOL.LT.N ) THEN

206          INFO = -22

207       ELSE IF( LDGNUM.LT.N ) THEN

208          INFO = -24

209       END IF

210       IF( INFO.NE.0 ) THEN

211          CALL XERBLA( 'DLASD7', -INFO )

212          RETURN

213       END IF

214 *

215       NLP1 = NL + 1

216       NLP2 = NL + 2

217       IF( ICOMPQ.EQ.1 ) THEN

218          GIVPTR = 0

219       END IF

220 *

221 *     Generate the first part of the vector Z and move the singular

222 *     values in the first part of D one position backward.

223 *

224       Z1 = ALPHA*VL( NLP1 )

225       VL( NLP1 ) = ZERO

226       TAU = VF( NLP1 )

227       DO 10 I = NL, 1, -1

228          Z( I+1 ) = ALPHA*VL( I )

229          VL( I ) = ZERO

230          VF( I+1 ) = VF( I )

231          D( I+1 ) = D( I )

232          IDXQ( I+1 ) = IDXQ( I ) + 1

233    10 CONTINUE

234       VF( 1 ) = TAU

235 *

236 *     Generate the second part of the vector Z.

237 *

238       DO 20 I = NLP2, M

239          Z( I ) = BETA*VF( I )

240          VF( I ) = ZERO

241    20 CONTINUE

242 *

243 *     Sort the singular values into increasing order

244 *

245       DO 30 I = NLP2, N

246          IDXQ( I ) = IDXQ( I ) + NLP1

247    30 CONTINUE

248 *

249 *     DSIGMA, IDXC, IDXC, and ZW are used as storage space.

250 *

251       DO 40 I = 2, N

252          DSIGMA( I ) = D( IDXQ( I ) )

253          ZW( I ) = Z( IDXQ( I ) )

254          VFW( I ) = VF( IDXQ( I ) )

255          VLW( I ) = VL( IDXQ( I ) )

256    40 CONTINUE

257 *

258       CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )

259 *

260       DO 50 I = 2, N

261          IDXI = 1 + IDX( I )

262          D( I ) = DSIGMA( IDXI )

263          Z( I ) = ZW( IDXI )

264          VF( I ) = VFW( IDXI )

265          VL( I ) = VLW( IDXI )

266    50 CONTINUE

267 *

268 *     Calculate the allowable deflation tolerence

269 *

270       EPS = DLAMCH( 'Epsilon' )

271       TOL = MAX( ABS( ALPHA ), ABS( BETA ) )

272       TOL = EIGHT*EIGHT*EPS*MAX( ABS( D( N ) ), TOL )

273 *

274 *     There are 2 kinds of deflation -- first a value in the z-vector

275 *     is small, second two (or more) singular values are very close

276 *     together (their difference is small).

277 *

278 *     If the value in the z-vector is small, we simply permute the

279 *     array so that the corresponding singular value is moved to the

280 *     end.

281 *

282 *     If two values in the D-vector are close, we perform a two-sided

283 *     rotation designed to make one of the corresponding z-vector

284 *     entries zero, and then permute the array so that the deflated

285 *     singular value is moved to the end.

286 *

287 *     If there are multiple singular values then the problem deflates.

288 *     Here the number of equal singular values are found.  As each equal

289 *     singular value is found, an elementary reflector is computed to

290 *     rotate the corresponding singular subspace so that the

291 *     corresponding components of Z are zero in this new basis.

292 *

293       K = 1

294       K2 = N + 1

295       DO 60 J = 2, N

296          IF( ABS( Z( J ) ).LE.TOL ) THEN

297 *

298 *           Deflate due to small z component.

299 *

300             K2 = K2 - 1

301             IDXP( K2 ) = J

302             IF( J.EQ.N )

303      $         GO TO 100

304          ELSE

305             JPREV = J

306             GO TO 70

307          END IF

308    60 CONTINUE

309    70 CONTINUE

310       J = JPREV

311    80 CONTINUE

312       J = J + 1

313       IF( J.GT.N )

314      $   GO TO 90

315       IF( ABS( Z( J ) ).LE.TOL ) THEN

316 *

317 *        Deflate due to small z component.

318 *

319          K2 = K2 - 1

320          IDXP( K2 ) = J

321       ELSE

322 *

323 *        Check if singular values are close enough to allow deflation.

324 *

325          IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN

326 *

327 *           Deflation is possible.

328 *

329             S = Z( JPREV )

330             C = Z( J )

331 *

332 *           Find sqrt(a**2+b**2) without overflow or

333 *           destructive underflow.

334 *

335             TAU = DLAPY2( C, S )

336             Z( J ) = TAU

337             Z( JPREV ) = ZERO

338             C = C / TAU

339             S = -S / TAU

340 *

341 *           Record the appropriate Givens rotation

342 *

343             IF( ICOMPQ.EQ.1 ) THEN

344                GIVPTR = GIVPTR + 1

345                IDXJP = IDXQ( IDX( JPREV )+1 )

346                IDXJ = IDXQ( IDX( J )+1 )

347                IF( IDXJP.LE.NLP1 ) THEN

348                   IDXJP = IDXJP - 1

349                END IF

350                IF( IDXJ.LE.NLP1 ) THEN

351                   IDXJ = IDXJ - 1

352                END IF

353                GIVCOL( GIVPTR, 2 ) = IDXJP

354                GIVCOL( GIVPTR, 1 ) = IDXJ

355                GIVNUM( GIVPTR, 2 ) = C

356                GIVNUM( GIVPTR, 1 ) = S

357             END IF

358             CALL DROT( 1, VF( JPREV ), 1, VF( J ), 1, C, S )

359             CALL DROT( 1, VL( JPREV ), 1, VL( J ), 1, C, S )

360             K2 = K2 - 1

361             IDXP( K2 ) = JPREV

362             JPREV = J

363          ELSE

364             K = K + 1

365             ZW( K ) = Z( JPREV )

366             DSIGMA( K ) = D( JPREV )

367             IDXP( K ) = JPREV

368             JPREV = J

369          END IF

370       END IF

371       GO TO 80

372    90 CONTINUE

373 *

374 *     Record the last singular value.

375 *

376       K = K + 1

377       ZW( K ) = Z( JPREV )

378       DSIGMA( K ) = D( JPREV )

379       IDXP( K ) = JPREV

380 *

381   100 CONTINUE

382 *

383 *     Sort the singular values into DSIGMA. The singular values which

384 *     were not deflated go into the first K slots of DSIGMA, except

385 *     that DSIGMA(1) is treated separately.

386 *

387       DO 110 J = 2, N

388          JP = IDXP( J )

389          DSIGMA( J ) = D( JP )

390          VFW( J ) = VF( JP )

391          VLW( J ) = VL( JP )

392   110 CONTINUE

393       IF( ICOMPQ.EQ.1 ) THEN

394          DO 120 J = 2, N

395             JP = IDXP( J )

396             PERM( J ) = IDXQ( IDX( JP )+1 )

397             IF( PERM( J ).LE.NLP1 ) THEN

398                PERM( J ) = PERM( J ) - 1

399             END IF

400   120    CONTINUE

401       END IF

402 *

403 *     The deflated singular values go back into the last N - K slots of

404 *     D.

405 *

406       CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )

407 *

408 *     Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and

409 *     VL(M).

410 *

411       DSIGMA( 1 ) = ZERO

412       HLFTOL = TOL / TWO

413       IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )

414      $   DSIGMA( 2 ) = HLFTOL

415       IF( M.GT.N ) THEN

416          Z( 1 ) = DLAPY2( Z1, Z( M ) )

417          IF( Z( 1 ).LE.TOL ) THEN

418             C = ONE

419             S = ZERO

420             Z( 1 ) = TOL

421          ELSE

422             C = Z1 / Z( 1 )

423             S = -Z( M ) / Z( 1 )

424          END IF

425          CALL DROT( 1, VF( M ), 1, VF( 1 ), 1, C, S )

426          CALL DROT( 1, VL( M ), 1, VL( 1 ), 1, C, S )

427       ELSE

428          IF( ABS( Z1 ).LE.TOL ) THEN

429             Z( 1 ) = TOL

430          ELSE

431             Z( 1 ) = Z1

432          END IF

433       END IF

434 *

435 *     Restore Z, VF, and VL.

436 *

437       CALL DCOPY( K-1, ZW( 2 ), 1, Z( 2 ), 1 )

438       CALL DCOPY( N-1, VFW( 2 ), 1, VF( 2 ), 1 )

439       CALL DCOPY( N-1, VLW( 2 ), 1, VL( 2 ), 1 )

440 *

441       RETURN

442 *

443 *     End of DLASD7

444 *

445       END