dlaqr2.f (flens/lapack/interface/ref

  1       SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,

  2      $                   IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,

  3      $                   LDT, NV, WV, LDWV, WORK, LWORK )

  4 *

  5 *  -- LAPACK auxiliary routine (version 3.2.2)                        --

  6 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..

  7 *  -- June 2010                                                       --

  8 *

  9 *     .. Scalar Arguments ..

 10       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,

 11      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW

 12       LOGICAL            WANTT, WANTZ

 13 *     ..

 14 *     .. Array Arguments ..

 15       DOUBLE PRECISION   H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),

 16      $                   V( LDV, * ), WORK( * ), WV( LDWV, * ),

 17      $                   Z( LDZ, * )

 18 *     ..

 19 *

 20 *     This subroutine is identical to DLAQR3 except that it avoids

 21 *     recursion by calling DLAHQR instead of DLAQR4.

 22 *

 23 *

 24 *     ******************************************************************

 25 *     Aggressive early deflation:

 26 *

 27 *     This subroutine accepts as input an upper Hessenberg matrix

 28 *     H and performs an orthogonal similarity transformation

 29 *     designed to detect and deflate fully converged eigenvalues from

 30 *     a trailing principal submatrix.  On output H has been over-

 31 *     written by a new Hessenberg matrix that is a perturbation of

 32 *     an orthogonal similarity transformation of H.  It is to be

 33 *     hoped that the final version of H has many zero subdiagonal

 34 *     entries.

 35 *

 36 *     ******************************************************************

 37 *     WANTT   (input) LOGICAL

 38 *          If .TRUE., then the Hessenberg matrix H is fully updated

 39 *          so that the quasi-triangular Schur factor may be

 40 *          computed (in cooperation with the calling subroutine).

 41 *          If .FALSE., then only enough of H is updated to preserve

 42 *          the eigenvalues.

 43 *

 44 *     WANTZ   (input) LOGICAL

 45 *          If .TRUE., then the orthogonal matrix Z is updated so

 46 *          so that the orthogonal Schur factor may be computed

 47 *          (in cooperation with the calling subroutine).

 48 *          If .FALSE., then Z is not referenced.

 49 *

 50 *     N       (input) INTEGER

 51 *          The order of the matrix H and (if WANTZ is .TRUE.) the

 52 *          order of the orthogonal matrix Z.

 53 *

 54 *     KTOP    (input) INTEGER

 55 *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.

 56 *          KBOT and KTOP together determine an isolated block

 57 *          along the diagonal of the Hessenberg matrix.

 58 *

 59 *     KBOT    (input) INTEGER

 60 *          It is assumed without a check that either

 61 *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together

 62 *          determine an isolated block along the diagonal of the

 63 *          Hessenberg matrix.

 64 *

 65 *     NW      (input) INTEGER

 66 *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).

 67 *

 68 *     H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)

 69 *          On input the initial N-by-N section of H stores the

 70 *          Hessenberg matrix undergoing aggressive early deflation.

 71 *          On output H has been transformed by an orthogonal

 72 *          similarity transformation, perturbed, and the returned

 73 *          to Hessenberg form that (it is to be hoped) has some

 74 *          zero subdiagonal entries.

 75 *

 76 *     LDH     (input) integer

 77 *          Leading dimension of H just as declared in the calling

 78 *          subroutine.  N .LE. LDH

 79 *

 80 *     ILOZ    (input) INTEGER

 81 *     IHIZ    (input) INTEGER

 82 *          Specify the rows of Z to which transformations must be

 83 *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.

 84 *

 85 *     Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)

 86 *          IF WANTZ is .TRUE., then on output, the orthogonal

 87 *          similarity transformation mentioned above has been

 88 *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.

 89 *          If WANTZ is .FALSE., then Z is unreferenced.

 90 *

 91 *     LDZ     (input) integer

 92 *          The leading dimension of Z just as declared in the

 93 *          calling subroutine.  1 .LE. LDZ.

 94 *

 95 *     NS      (output) integer

 96 *          The number of unconverged (ie approximate) eigenvalues

 97 *          returned in SR and SI that may be used as shifts by the

 98 *          calling subroutine.

 99 *

100 *     ND      (output) integer

101 *          The number of converged eigenvalues uncovered by this

102 *          subroutine.

103 *

104 *     SR      (output) DOUBLE PRECISION array, dimension (KBOT)

105 *     SI      (output) DOUBLE PRECISION array, dimension (KBOT)

106 *          On output, the real and imaginary parts of approximate

107 *          eigenvalues that may be used for shifts are stored in

108 *          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and

109 *          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.

110 *          The real and imaginary parts of converged eigenvalues

111 *          are stored in SR(KBOT-ND+1) through SR(KBOT) and

112 *          SI(KBOT-ND+1) through SI(KBOT), respectively.

113 *

114 *     V       (workspace) DOUBLE PRECISION array, dimension (LDV,NW)

115 *          An NW-by-NW work array.

116 *

117 *     LDV     (input) integer scalar

118 *          The leading dimension of V just as declared in the

119 *          calling subroutine.  NW .LE. LDV

120 *

121 *     NH      (input) integer scalar

122 *          The number of columns of T.  NH.GE.NW.

123 *

124 *     T       (workspace) DOUBLE PRECISION array, dimension (LDT,NW)

125 *

126 *     LDT     (input) integer

127 *          The leading dimension of T just as declared in the

128 *          calling subroutine.  NW .LE. LDT

129 *

130 *     NV      (input) integer

131 *          The number of rows of work array WV available for

132 *          workspace.  NV.GE.NW.

133 *

134 *     WV      (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)

135 *

136 *     LDWV    (input) integer

137 *          The leading dimension of W just as declared in the

138 *          calling subroutine.  NW .LE. LDV

139 *

140 *     WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)

141 *          On exit, WORK(1) is set to an estimate of the optimal value

142 *          of LWORK for the given values of N, NW, KTOP and KBOT.

143 *

144 *     LWORK   (input) integer

145 *          The dimension of the work array WORK.  LWORK = 2*NW

146 *          suffices, but greater efficiency may result from larger

147 *          values of LWORK.

148 *

149 *          If LWORK = -1, then a workspace query is assumed; DLAQR2

150 *          only estimates the optimal workspace size for the given

151 *          values of N, NW, KTOP and KBOT.  The estimate is returned

152 *          in WORK(1).  No error message related to LWORK is issued

153 *          by XERBLA.  Neither H nor Z are accessed.

154 *

155 *     ================================================================

156 *     Based on contributions by

157 *        Karen Braman and Ralph Byers, Department of Mathematics,

158 *        University of Kansas, USA

159 *

160 *     ================================================================

161 *     .. Parameters ..

162       DOUBLE PRECISION   ZERO, ONE

163       PARAMETER          ( ZERO = 0.0d0, ONE = 1.0d0 )

164 *     ..

165 *     .. Local Scalars ..

166       DOUBLE PRECISION   AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,

167      $                   SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP

168       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,

169      $                   KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2,

170      $                   LWKOPT

171       LOGICAL            BULGE, SORTED

172 *     ..

173 *     .. External Functions ..

174       DOUBLE PRECISION   DLAMCH

175       EXTERNAL           DLAMCH

176 *     ..

177 *     .. External Subroutines ..

178       EXTERNAL           DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,

179      $                   DLANV2, DLARF, DLARFG, DLASET, DORMHR, DTREXC

180 *     ..

181 *     .. Intrinsic Functions ..

182       INTRINSIC          ABS, DBLE, INT, MAX, MIN, SQRT

183 *     ..

184 *     .. Executable Statements ..

185 *

186 *     ==== Estimate optimal workspace. ====

187 *

188       JW = MIN( NW, KBOT-KTOP+1 )

189       IF( JW.LE.2 ) THEN

190          LWKOPT = 1

191       ELSE

192 *

193 *        ==== Workspace query call to DGEHRD ====

194 *

195          CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )

196          LWK1 = INT( WORK( 1 ) )

197 *

198 *        ==== Workspace query call to DORMHR ====

199 *

200          CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,

201      $                WORK, -1, INFO )

202          LWK2 = INT( WORK( 1 ) )

203 *

204 *        ==== Optimal workspace ====

205 *

206          LWKOPT = JW + MAX( LWK1, LWK2 )

207       END IF

208 *

209 *     ==== Quick return in case of workspace query. ====

210 *

211       IF( LWORK.EQ.-1 ) THEN

212          WORK( 1 ) = DBLE( LWKOPT )

213          RETURN

214       END IF

215 *

216 *     ==== Nothing to do ...

217 *     ... for an empty active block ... ====

218       NS = 0

219       ND = 0

220       WORK( 1 ) = ONE

221       IF( KTOP.GT.KBOT )

222      $   RETURN

223 *     ... nor for an empty deflation window. ====

224       IF( NW.LT.1 )

225      $   RETURN

226 *

227 *     ==== Machine constants ====

228 *

229       SAFMIN = DLAMCH( 'SAFE MINIMUM' )

230       SAFMAX = ONE / SAFMIN

231       CALL DLABAD( SAFMIN, SAFMAX )

232       ULP = DLAMCH( 'PRECISION' )

233       SMLNUM = SAFMIN*( DBLE( N ) / ULP )

234 *

235 *     ==== Setup deflation window ====

236 *

237       JW = MIN( NW, KBOT-KTOP+1 )

238       KWTOP = KBOT - JW + 1

239       IF( KWTOP.EQ.KTOP ) THEN

240          S = ZERO

241       ELSE

242          S = H( KWTOP, KWTOP-1 )

243       END IF

244 *

245       IF( KBOT.EQ.KWTOP ) THEN

246 *

247 *        ==== 1-by-1 deflation window: not much to do ====

248 *

249          SR( KWTOP ) = H( KWTOP, KWTOP )

250          SI( KWTOP ) = ZERO

251          NS = 1

252          ND = 0

253          IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )

254      $        THEN

255             NS = 0

256             ND = 1

257             IF( KWTOP.GT.KTOP )

258      $         H( KWTOP, KWTOP-1 ) = ZERO

259          END IF

260          WORK( 1 ) = ONE

261          RETURN

262       END IF

263 *

264 *     ==== Convert to spike-triangular form.  (In case of a

265 *     .    rare QR failure, this routine continues to do

266 *     .    aggressive early deflation using that part of

267 *     .    the deflation window that converged using INFQR

268 *     .    here and there to keep track.) ====

269 *

270       CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )

271       CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )

272 *

273       CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )

274       CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),

275      $             SI( KWTOP ), 1, JW, V, LDV, INFQR )

276 *

277 *     ==== DTREXC needs a clean margin near the diagonal ====

278 *

279       DO 10 J = 1, JW - 3

280          T( J+2, J ) = ZERO

281          T( J+3, J ) = ZERO

282    10 CONTINUE

283       IF( JW.GT.2 )

284      $   T( JW, JW-2 ) = ZERO

285 *

286 *     ==== Deflation detection loop ====

287 *

288       NS = JW

289       ILST = INFQR + 1

290    20 CONTINUE

291       IF( ILST.LE.NS ) THEN

292          IF( NS.EQ.1 ) THEN

293             BULGE = .FALSE.

294          ELSE

295             BULGE = T( NS, NS-1 ).NE.ZERO

296          END IF

297 *

298 *        ==== Small spike tip test for deflation ====

299 *

300          IF( .NOT.BULGE ) THEN

301 *

302 *           ==== Real eigenvalue ====

303 *

304             FOO = ABS( T( NS, NS ) )

305             IF( FOO.EQ.ZERO )

306      $         FOO = ABS( S )

307             IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN

308 *

309 *              ==== Deflatable ====

310 *

311                NS = NS - 1

312             ELSE

313 *

314 *              ==== Undeflatable.   Move it up out of the way.

315 *              .    (DTREXC can not fail in this case.) ====

316 *

317                IFST = NS

318                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,

319      $                      INFO )

320                ILST = ILST + 1

321             END IF

322          ELSE

323 *

324 *           ==== Complex conjugate pair ====

325 *

326             FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*

327      $            SQRT( ABS( T( NS-1, NS ) ) )

328             IF( FOO.EQ.ZERO )

329      $         FOO = ABS( S )

330             IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.

331      $          MAX( SMLNUM, ULP*FOO ) ) THEN

332 *

333 *              ==== Deflatable ====

334 *

335                NS = NS - 2

336             ELSE

337 *

338 *              ==== Undeflatable. Move them up out of the way.

339 *              .    Fortunately, DTREXC does the right thing with

340 *              .    ILST in case of a rare exchange failure. ====

341 *

342                IFST = NS

343                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,

344      $                      INFO )

345                ILST = ILST + 2

346             END IF

347          END IF

348 *

349 *        ==== End deflation detection loop ====

350 *

351          GO TO 20

352       END IF

353 *

354 *        ==== Return to Hessenberg form ====

355 *

356       IF( NS.EQ.0 )

357      $   S = ZERO

358 *

359       IF( NS.LT.JW ) THEN

360 *

361 *        ==== sorting diagonal blocks of T improves accuracy for

362 *        .    graded matrices.  Bubble sort deals well with

363 *        .    exchange failures. ====

364 *

365          SORTED = .false.

366          I = NS + 1

367    30    CONTINUE

368          IF( SORTED )

369      $      GO TO 50

370          SORTED = .true.

371 *

372          KEND = I - 1

373          I = INFQR + 1

374          IF( I.EQ.NS ) THEN

375             K = I + 1

376          ELSE IF( T( I+1, I ).EQ.ZERO ) THEN

377             K = I + 1

378          ELSE

379             K = I + 2

380          END IF

381    40    CONTINUE

382          IF( K.LE.KEND ) THEN

383             IF( K.EQ.I+1 ) THEN

384                EVI = ABS( T( I, I ) )

385             ELSE

386                EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*

387      $               SQRT( ABS( T( I, I+1 ) ) )

388             END IF

389 *

390             IF( K.EQ.KEND ) THEN

391                EVK = ABS( T( K, K ) )

392             ELSE IF( T( K+1, K ).EQ.ZERO ) THEN

393                EVK = ABS( T( K, K ) )

394             ELSE

395                EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*

396      $               SQRT( ABS( T( K, K+1 ) ) )

397             END IF

398 *

399             IF( EVI.GE.EVK ) THEN

400                I = K

401             ELSE

402                SORTED = .false.

403                IFST = I

404                ILST = K

405                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,

406      $                      INFO )

407                IF( INFO.EQ.0 ) THEN

408                   I = ILST

409                ELSE

410                   I = K

411                END IF

412             END IF

413             IF( I.EQ.KEND ) THEN

414                K = I + 1

415             ELSE IF( T( I+1, I ).EQ.ZERO ) THEN

416                K = I + 1

417             ELSE

418                K = I + 2

419             END IF

420             GO TO 40

421          END IF

422          GO TO 30

423    50    CONTINUE

424       END IF

425 *

426 *     ==== Restore shift/eigenvalue array from T ====

427 *

428       I = JW

429    60 CONTINUE

430       IF( I.GE.INFQR+1 ) THEN

431          IF( I.EQ.INFQR+1 ) THEN

432             SR( KWTOP+I-1 ) = T( I, I )

433             SI( KWTOP+I-1 ) = ZERO

434             I = I - 1

435          ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN

436             SR( KWTOP+I-1 ) = T( I, I )

437             SI( KWTOP+I-1 ) = ZERO

438             I = I - 1

439          ELSE

440             AA = T( I-1, I-1 )

441             CC = T( I, I-1 )

442             BB = T( I-1, I )

443             DD = T( I, I )

444             CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),

445      $                   SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),

446      $                   SI( KWTOP+I-1 ), CS, SN )

447             I = I - 2

448          END IF

449          GO TO 60

450       END IF

451 *

452       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN

453          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN

454 *

455 *           ==== Reflect spike back into lower triangle ====

456 *

457             CALL DCOPY( NS, V, LDV, WORK, 1 )

458             BETA = WORK( 1 )

459             CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )

460             WORK( 1 ) = ONE

461 *

462             CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )

463 *

464             CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,

465      $                  WORK( JW+1 ) )

466             CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,

467      $                  WORK( JW+1 ) )

468             CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,

469      $                  WORK( JW+1 ) )

470 *

471             CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),

472      $                   LWORK-JW, INFO )

473          END IF

474 *

475 *        ==== Copy updated reduced window into place ====

476 *

477          IF( KWTOP.GT.1 )

478      $      H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )

479          CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )

480          CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),

481      $               LDH+1 )

482 *

483 *        ==== Accumulate orthogonal matrix in order update

484 *        .    H and Z, if requested.  ====

485 *

486          IF( NS.GT.1 .AND. S.NE.ZERO )

487      $      CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,

488      $                   WORK( JW+1 ), LWORK-JW, INFO )

489 *

490 *        ==== Update vertical slab in H ====

491 *

492          IF( WANTT ) THEN

493             LTOP = 1

494          ELSE

495             LTOP = KTOP

496          END IF

497          DO 70 KROW = LTOP, KWTOP - 1, NV

498             KLN = MIN( NV, KWTOP-KROW )

499             CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),

500      $                  LDH, V, LDV, ZERO, WV, LDWV )

501             CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )

502    70    CONTINUE

503 *

504 *        ==== Update horizontal slab in H ====

505 *

506          IF( WANTT ) THEN

507             DO 80 KCOL = KBOT + 1, N, NH

508                KLN = MIN( NH, N-KCOL+1 )

509                CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,

510      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )

511                CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),

512      $                      LDH )

513    80       CONTINUE

514          END IF

515 *

516 *        ==== Update vertical slab in Z ====

517 *

518          IF( WANTZ ) THEN

519             DO 90 KROW = ILOZ, IHIZ, NV

520                KLN = MIN( NV, IHIZ-KROW+1 )

521                CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),

522      $                     LDZ, V, LDV, ZERO, WV, LDWV )

523                CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),

524      $                      LDZ )

525    90       CONTINUE

526          END IF

527       END IF

528 *

529 *     ==== Return the number of deflations ... ====

530 *

531       ND = JW - NS

532 *

533 *     ==== ... and the number of shifts. (Subtracting

534 *     .    INFQR from the spike length takes care

535 *     .    of the case of a rare QR failure while

536 *     .    calculating eigenvalues of the deflation

537 *     .    window.)  ====

538 *

539       NS = NS - INFQR

540 *

541 *      ==== Return optimal workspace. ====

542 *

543       WORK( 1 ) = DBLE( LWKOPT )

544 *

545 *     ==== End of DLAQR2 ====

546 *

547       END