dlaqr3.f (flens/lapack/interface/ref

  1       SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,

  2      $                   IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,

  3      $                   LDT, NV, WV, LDWV, WORK, LWORK )

  4 *

  5 *  -- LAPACK auxiliary routine (version 3.2.2)                        --

  6 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..

  7 *  -- June 2010                                                       --

  8 *

  9 *     .. Scalar Arguments ..

 10       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,

 11      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW

 12       LOGICAL            WANTT, WANTZ

 13 *     ..

 14 *     .. Array Arguments ..

 15       DOUBLE PRECISION   H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),

 16      $                   V( LDV, * ), WORK( * ), WV( LDWV, * ),

 17      $                   Z( LDZ, * )

 18 *     ..

 19 *

 20 *     ******************************************************************

 21 *     Aggressive early deflation:

 22 *

 23 *     This subroutine accepts as input an upper Hessenberg matrix

 24 *     H and performs an orthogonal similarity transformation

 25 *     designed to detect and deflate fully converged eigenvalues from

 26 *     a trailing principal submatrix.  On output H has been over-

 27 *     written by a new Hessenberg matrix that is a perturbation of

 28 *     an orthogonal similarity transformation of H.  It is to be

 29 *     hoped that the final version of H has many zero subdiagonal

 30 *     entries.

 31 *

 32 *     ******************************************************************

 33 *     WANTT   (input) LOGICAL

 34 *          If .TRUE., then the Hessenberg matrix H is fully updated

 35 *          so that the quasi-triangular Schur factor may be

 36 *          computed (in cooperation with the calling subroutine).

 37 *          If .FALSE., then only enough of H is updated to preserve

 38 *          the eigenvalues.

 39 *

 40 *     WANTZ   (input) LOGICAL

 41 *          If .TRUE., then the orthogonal matrix Z is updated so

 42 *          so that the orthogonal Schur factor may be computed

 43 *          (in cooperation with the calling subroutine).

 44 *          If .FALSE., then Z is not referenced.

 45 *

 46 *     N       (input) INTEGER

 47 *          The order of the matrix H and (if WANTZ is .TRUE.) the

 48 *          order of the orthogonal matrix Z.

 49 *

 50 *     KTOP    (input) INTEGER

 51 *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.

 52 *          KBOT and KTOP together determine an isolated block

 53 *          along the diagonal of the Hessenberg matrix.

 54 *

 55 *     KBOT    (input) INTEGER

 56 *          It is assumed without a check that either

 57 *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together

 58 *          determine an isolated block along the diagonal of the

 59 *          Hessenberg matrix.

 60 *

 61 *     NW      (input) INTEGER

 62 *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).

 63 *

 64 *     H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)

 65 *          On input the initial N-by-N section of H stores the

 66 *          Hessenberg matrix undergoing aggressive early deflation.

 67 *          On output H has been transformed by an orthogonal

 68 *          similarity transformation, perturbed, and the returned

 69 *          to Hessenberg form that (it is to be hoped) has some

 70 *          zero subdiagonal entries.

 71 *

 72 *     LDH     (input) integer

 73 *          Leading dimension of H just as declared in the calling

 74 *          subroutine.  N .LE. LDH

 75 *

 76 *     ILOZ    (input) INTEGER

 77 *     IHIZ    (input) INTEGER

 78 *          Specify the rows of Z to which transformations must be

 79 *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.

 80 *

 81 *     Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)

 82 *          IF WANTZ is .TRUE., then on output, the orthogonal

 83 *          similarity transformation mentioned above has been

 84 *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.

 85 *          If WANTZ is .FALSE., then Z is unreferenced.

 86 *

 87 *     LDZ     (input) integer

 88 *          The leading dimension of Z just as declared in the

 89 *          calling subroutine.  1 .LE. LDZ.

 90 *

 91 *     NS      (output) integer

 92 *          The number of unconverged (ie approximate) eigenvalues

 93 *          returned in SR and SI that may be used as shifts by the

 94 *          calling subroutine.

 95 *

 96 *     ND      (output) integer

 97 *          The number of converged eigenvalues uncovered by this

 98 *          subroutine.

 99 *

100 *     SR      (output) DOUBLE PRECISION array, dimension (KBOT)

101 *     SI      (output) DOUBLE PRECISION array, dimension (KBOT)

102 *          On output, the real and imaginary parts of approximate

103 *          eigenvalues that may be used for shifts are stored in

104 *          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and

105 *          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.

106 *          The real and imaginary parts of converged eigenvalues

107 *          are stored in SR(KBOT-ND+1) through SR(KBOT) and

108 *          SI(KBOT-ND+1) through SI(KBOT), respectively.

109 *

110 *     V       (workspace) DOUBLE PRECISION array, dimension (LDV,NW)

111 *          An NW-by-NW work array.

112 *

113 *     LDV     (input) integer scalar

114 *          The leading dimension of V just as declared in the

115 *          calling subroutine.  NW .LE. LDV

116 *

117 *     NH      (input) integer scalar

118 *          The number of columns of T.  NH.GE.NW.

119 *

120 *     T       (workspace) DOUBLE PRECISION array, dimension (LDT,NW)

121 *

122 *     LDT     (input) integer

123 *          The leading dimension of T just as declared in the

124 *          calling subroutine.  NW .LE. LDT

125 *

126 *     NV      (input) integer

127 *          The number of rows of work array WV available for

128 *          workspace.  NV.GE.NW.

129 *

130 *     WV      (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)

131 *

132 *     LDWV    (input) integer

133 *          The leading dimension of W just as declared in the

134 *          calling subroutine.  NW .LE. LDV

135 *

136 *     WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)

137 *          On exit, WORK(1) is set to an estimate of the optimal value

138 *          of LWORK for the given values of N, NW, KTOP and KBOT.

139 *

140 *     LWORK   (input) integer

141 *          The dimension of the work array WORK.  LWORK = 2*NW

142 *          suffices, but greater efficiency may result from larger

143 *          values of LWORK.

144 *

145 *          If LWORK = -1, then a workspace query is assumed; DLAQR3

146 *          only estimates the optimal workspace size for the given

147 *          values of N, NW, KTOP and KBOT.  The estimate is returned

148 *          in WORK(1).  No error message related to LWORK is issued

149 *          by XERBLA.  Neither H nor Z are accessed.

150 *

151 *     ================================================================

152 *     Based on contributions by

153 *        Karen Braman and Ralph Byers, Department of Mathematics,

154 *        University of Kansas, USA

155 *

156 *     ================================================================

157 *     .. Parameters ..

158       DOUBLE PRECISION   ZERO, ONE

159       PARAMETER          ( ZERO = 0.0d0, ONE = 1.0d0 )

160 *     ..

161 *     .. Local Scalars ..

162       DOUBLE PRECISION   AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,

163      $                   SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP

164       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,

165      $                   KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,

166      $                   LWKOPT, NMIN

167       LOGICAL            BULGE, SORTED

168 *     ..

169 *     .. External Functions ..

170       DOUBLE PRECISION   DLAMCH

171       INTEGER            ILAENV

172       EXTERNAL           DLAMCH, ILAENV

173 *     ..

174 *     .. External Subroutines ..

175       EXTERNAL           DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,

176      $                   DLANV2, DLAQR4, DLARF, DLARFG, DLASET, DORMHR,

177      $                   DTREXC

178 *     ..

179 *     .. Intrinsic Functions ..

180       INTRINSIC          ABS, DBLE, INT, MAX, MIN, SQRT

181 *     ..

182 *     .. Executable Statements ..

183 *

184 *     ==== Estimate optimal workspace. ====

185 *

186       JW = MIN( NW, KBOT-KTOP+1 )

187       IF( JW.LE.2 ) THEN

188          LWKOPT = 1

189       ELSE

190 *

191 *        ==== Workspace query call to DGEHRD ====

192 *

193          CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )

194          LWK1 = INT( WORK( 1 ) )

195 *

196 *        ==== Workspace query call to DORMHR ====

197 *

198          CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,

199      $                WORK, -1, INFO )

200          LWK2 = INT( WORK( 1 ) )

201 *

202 *        ==== Workspace query call to DLAQR4 ====

203 *

204          CALL DLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, JW,

205      $                V, LDV, WORK, -1, INFQR )

206          LWK3 = INT( WORK( 1 ) )

207 *

208 *        ==== Optimal workspace ====

209 *

210          LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )

211       END IF

212 *

213 *     ==== Quick return in case of workspace query. ====

214 *

215       IF( LWORK.EQ.-1 ) THEN

216          WORK( 1 ) = DBLE( LWKOPT )

217          RETURN

218       END IF

219 *

220 *     ==== Nothing to do ...

221 *     ... for an empty active block ... ====

222       NS = 0

223       ND = 0

224       WORK( 1 ) = ONE

225       IF( KTOP.GT.KBOT )

226      $   RETURN

227 *     ... nor for an empty deflation window. ====

228       IF( NW.LT.1 )

229      $   RETURN

230 *

231 *     ==== Machine constants ====

232 *

233       SAFMIN = DLAMCH( 'SAFE MINIMUM' )

234       SAFMAX = ONE / SAFMIN

235       CALL DLABAD( SAFMIN, SAFMAX )

236       ULP = DLAMCH( 'PRECISION' )

237       SMLNUM = SAFMIN*( DBLE( N ) / ULP )

238 *

239 *     ==== Setup deflation window ====

240 *

241       JW = MIN( NW, KBOT-KTOP+1 )

242       KWTOP = KBOT - JW + 1

243       IF( KWTOP.EQ.KTOP ) THEN

244          S = ZERO

245       ELSE

246          S = H( KWTOP, KWTOP-1 )

247       END IF

248 *

249       IF( KBOT.EQ.KWTOP ) THEN

250 *

251 *        ==== 1-by-1 deflation window: not much to do ====

252 *

253          SR( KWTOP ) = H( KWTOP, KWTOP )

254          SI( KWTOP ) = ZERO

255          NS = 1

256          ND = 0

257          IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )

258      $        THEN

259             NS = 0

260             ND = 1

261             IF( KWTOP.GT.KTOP )

262      $         H( KWTOP, KWTOP-1 ) = ZERO

263          END IF

264          WORK( 1 ) = ONE

265          RETURN

266       END IF

267 *

268 *     ==== Convert to spike-triangular form.  (In case of a

269 *     .    rare QR failure, this routine continues to do

270 *     .    aggressive early deflation using that part of

271 *     .    the deflation window that converged using INFQR

272 *     .    here and there to keep track.) ====

273 *

274       CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )

275       CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )

276 *

277       CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )

278       NMIN = ILAENV( 12, 'DLAQR3', 'SV', JW, 1, JW, LWORK )

279       IF( JW.GT.NMIN ) THEN

280          CALL DLAQR4( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),

281      $                SI( KWTOP ), 1, JW, V, LDV, WORK, LWORK, INFQR )

282       ELSE

283          CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),

284      $                SI( KWTOP ), 1, JW, V, LDV, INFQR )

285       END IF

286 *

287 *     ==== DTREXC needs a clean margin near the diagonal ====

288 *

289       DO 10 J = 1, JW - 3

290          T( J+2, J ) = ZERO

291          T( J+3, J ) = ZERO

292    10 CONTINUE

293       IF( JW.GT.2 )

294      $   T( JW, JW-2 ) = ZERO

295 *

296 *     ==== Deflation detection loop ====

297 *

298       NS = JW

299       ILST = INFQR + 1

300    20 CONTINUE

301       IF( ILST.LE.NS ) THEN

302          IF( NS.EQ.1 ) THEN

303             BULGE = .FALSE.

304          ELSE

305             BULGE = T( NS, NS-1 ).NE.ZERO

306          END IF

307 *

308 *        ==== Small spike tip test for deflation ====

309 *

310          IF( .NOT.BULGE ) THEN

311 *

312 *           ==== Real eigenvalue ====

313 *

314             FOO = ABS( T( NS, NS ) )

315             IF( FOO.EQ.ZERO )

316      $         FOO = ABS( S )

317             IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN

318 *

319 *              ==== Deflatable ====

320 *

321                NS = NS - 1

322             ELSE

323 *

324 *              ==== Undeflatable.   Move it up out of the way.

325 *              .    (DTREXC can not fail in this case.) ====

326 *

327                IFST = NS

328                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,

329      $                      INFO )

330                ILST = ILST + 1

331             END IF

332          ELSE

333 *

334 *           ==== Complex conjugate pair ====

335 *

336             FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*

337      $            SQRT( ABS( T( NS-1, NS ) ) )

338             IF( FOO.EQ.ZERO )

339      $         FOO = ABS( S )

340             IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.

341      $          MAX( SMLNUM, ULP*FOO ) ) THEN

342 *

343 *              ==== Deflatable ====

344 *

345                NS = NS - 2

346             ELSE

347 *

348 *              ==== Undeflatable. Move them up out of the way.

349 *              .    Fortunately, DTREXC does the right thing with

350 *              .    ILST in case of a rare exchange failure. ====

351 *

352                IFST = NS

353                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,

354      $                      INFO )

355                ILST = ILST + 2

356             END IF

357          END IF

358 *

359 *        ==== End deflation detection loop ====

360 *

361          GO TO 20

362       END IF

363 *

364 *        ==== Return to Hessenberg form ====

365 *

366       IF( NS.EQ.0 )

367      $   S = ZERO

368 *

369       IF( NS.LT.JW ) THEN

370 *

371 *        ==== sorting diagonal blocks of T improves accuracy for

372 *        .    graded matrices.  Bubble sort deals well with

373 *        .    exchange failures. ====

374 *

375          SORTED = .false.

376          I = NS + 1

377    30    CONTINUE

378          IF( SORTED )

379      $      GO TO 50

380          SORTED = .true.

381 *

382          KEND = I - 1

383          I = INFQR + 1

384          IF( I.EQ.NS ) THEN

385             K = I + 1

386          ELSE IF( T( I+1, I ).EQ.ZERO ) THEN

387             K = I + 1

388          ELSE

389             K = I + 2

390          END IF

391    40    CONTINUE

392          IF( K.LE.KEND ) THEN

393             IF( K.EQ.I+1 ) THEN

394                EVI = ABS( T( I, I ) )

395             ELSE

396                EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*

397      $               SQRT( ABS( T( I, I+1 ) ) )

398             END IF

399 *

400             IF( K.EQ.KEND ) THEN

401                EVK = ABS( T( K, K ) )

402             ELSE IF( T( K+1, K ).EQ.ZERO ) THEN

403                EVK = ABS( T( K, K ) )

404             ELSE

405                EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*

406      $               SQRT( ABS( T( K, K+1 ) ) )

407             END IF

408 *

409             IF( EVI.GE.EVK ) THEN

410                I = K

411             ELSE

412                SORTED = .false.

413                IFST = I

414                ILST = K

415                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,

416      $                      INFO )

417                IF( INFO.EQ.0 ) THEN

418                   I = ILST

419                ELSE

420                   I = K

421                END IF

422             END IF

423             IF( I.EQ.KEND ) THEN

424                K = I + 1

425             ELSE IF( T( I+1, I ).EQ.ZERO ) THEN

426                K = I + 1

427             ELSE

428                K = I + 2

429             END IF

430             GO TO 40

431          END IF

432          GO TO 30

433    50    CONTINUE

434       END IF

435 *

436 *     ==== Restore shift/eigenvalue array from T ====

437 *

438       I = JW

439    60 CONTINUE

440       IF( I.GE.INFQR+1 ) THEN

441          IF( I.EQ.INFQR+1 ) THEN

442             SR( KWTOP+I-1 ) = T( I, I )

443             SI( KWTOP+I-1 ) = ZERO

444             I = I - 1

445          ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN

446             SR( KWTOP+I-1 ) = T( I, I )

447             SI( KWTOP+I-1 ) = ZERO

448             I = I - 1

449          ELSE

450             AA = T( I-1, I-1 )

451             CC = T( I, I-1 )

452             BB = T( I-1, I )

453             DD = T( I, I )

454             CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),

455      $                   SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),

456      $                   SI( KWTOP+I-1 ), CS, SN )

457             I = I - 2

458          END IF

459          GO TO 60

460       END IF

461 *

462       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN

463          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN

464 *

465 *           ==== Reflect spike back into lower triangle ====

466 *

467             CALL DCOPY( NS, V, LDV, WORK, 1 )

468             BETA = WORK( 1 )

469             CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )

470             WORK( 1 ) = ONE

471 *

472             CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )

473 *

474             CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,

475      $                  WORK( JW+1 ) )

476             CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,

477      $                  WORK( JW+1 ) )

478             CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,

479      $                  WORK( JW+1 ) )

480 *

481             CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),

482      $                   LWORK-JW, INFO )

483          END IF

484 *

485 *        ==== Copy updated reduced window into place ====

486 *

487          IF( KWTOP.GT.1 )

488      $      H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )

489          CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )

490          CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),

491      $               LDH+1 )

492 *

493 *        ==== Accumulate orthogonal matrix in order update

494 *        .    H and Z, if requested.  ====

495 *

496          IF( NS.GT.1 .AND. S.NE.ZERO )

497      $      CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,

498      $                   WORK( JW+1 ), LWORK-JW, INFO )

499 *

500 *        ==== Update vertical slab in H ====

501 *

502          IF( WANTT ) THEN

503             LTOP = 1

504          ELSE

505             LTOP = KTOP

506          END IF

507          DO 70 KROW = LTOP, KWTOP - 1, NV

508             KLN = MIN( NV, KWTOP-KROW )

509             CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),

510      $                  LDH, V, LDV, ZERO, WV, LDWV )

511             CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )

512    70    CONTINUE

513 *

514 *        ==== Update horizontal slab in H ====

515 *

516          IF( WANTT ) THEN

517             DO 80 KCOL = KBOT + 1, N, NH

518                KLN = MIN( NH, N-KCOL+1 )

519                CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,

520      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )

521                CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),

522      $                      LDH )

523    80       CONTINUE

524          END IF

525 *

526 *        ==== Update vertical slab in Z ====

527 *

528          IF( WANTZ ) THEN

529             DO 90 KROW = ILOZ, IHIZ, NV

530                KLN = MIN( NV, IHIZ-KROW+1 )

531                CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),

532      $                     LDZ, V, LDV, ZERO, WV, LDWV )

533                CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),

534      $                      LDZ )

535    90       CONTINUE

536          END IF

537       END IF

538 *

539 *     ==== Return the number of deflations ... ====

540 *

541       ND = JW - NS

542 *

543 *     ==== ... and the number of shifts. (Subtracting

544 *     .    INFQR from the spike length takes care

545 *     .    of the case of a rare QR failure while

546 *     .    calculating eigenvalues of the deflation

547 *     .    window.)  ====

548 *

549       NS = NS - INFQR

550 *

551 *      ==== Return optimal workspace. ====

552 *

553       WORK( 1 ) = DBLE( LWKOPT )

554 *

555 *     ==== End of DLAQR3 ====

556 *

557       END